metadata = blob.getMetadata();
+ switch (activity) {
+ case Acquire:
+ // Add owner in metadata
+ metadata.put(AzureStorageCheckpointLeaseManager.METADATA_OWNER_NAME, lease.getOwner());
+ break;
+
+ case Release:
+ // Remove owner in metadata
+ metadata.remove(AzureStorageCheckpointLeaseManager.METADATA_OWNER_NAME);
+ break;
+
+ default:
+ // Should never get here, but passing the metadata through unchanged is harmless.
+ break;
+ }
+ blob.setMetadata(metadata);
+ blob.uploadMetadata(condition, options, null);
+ }
+ // else don't touch metadata
+ }
+
+ private boolean wasLeaseLost(StorageException se, String partitionId) {
+ boolean retval = false;
+ TRACE_LOGGER.debug(this.hostContext.withHostAndPartition(partitionId, "WAS LEASE LOST? Http " + se.getHttpStatusCode()));
+ if (se.getExtendedErrorInformation() != null) {
+ TRACE_LOGGER.debug(this.hostContext.withHostAndPartition(partitionId,
+ "Http " + se.getExtendedErrorInformation().getErrorCode() + " :: " + se.getExtendedErrorInformation().getErrorMessage()));
+ }
+ if ((se.getHttpStatusCode() == 409) || // conflict
+ (se.getHttpStatusCode() == 412)) // precondition failed
+ {
+ StorageExtendedErrorInformation extendedErrorInfo = se.getExtendedErrorInformation();
+ if (extendedErrorInfo != null) {
+ String errorCode = extendedErrorInfo.getErrorCode();
+ TRACE_LOGGER.debug(this.hostContext.withHostAndPartition(partitionId, "Error code: " + errorCode));
+ TRACE_LOGGER.debug(this.hostContext.withHostAndPartition(partitionId, "Error message: " + extendedErrorInfo.getErrorMessage()));
+ if ((errorCode.compareTo(StorageErrorCodeStrings.LEASE_LOST) == 0) ||
+ (errorCode.compareTo(StorageErrorCodeStrings.LEASE_ID_MISMATCH_WITH_LEASE_OPERATION) == 0) ||
+ (errorCode.compareTo(StorageErrorCodeStrings.LEASE_ID_MISMATCH_WITH_BLOB_OPERATION) == 0) ||
+ (errorCode.compareTo(StorageErrorCodeStrings.LEASE_ALREADY_PRESENT) == 0)) {
+ retval = true;
+ }
+ }
+ }
+ return retval;
+ }
+
+ private enum UploadActivity {Create, Acquire, Release, Update}
+}
diff --git a/eventhubs/data-plane/azure-eventhubs-eph/src/main/java/com/microsoft/azure/eventprocessorhost/AzureStoragePartitionManagerOptions.java b/eventhubs/data-plane/azure-eventhubs-eph/src/main/java/com/microsoft/azure/eventprocessorhost/AzureStoragePartitionManagerOptions.java
new file mode 100644
index 0000000000000..0b877ff7024f3
--- /dev/null
+++ b/eventhubs/data-plane/azure-eventhubs-eph/src/main/java/com/microsoft/azure/eventprocessorhost/AzureStoragePartitionManagerOptions.java
@@ -0,0 +1,20 @@
+/*
+ * Copyright (c) Microsoft. All rights reserved.
+ * Licensed under the MIT license. See LICENSE file in the project root for full license information.
+ */
+
+package com.microsoft.azure.eventprocessorhost;
+
+public final class AzureStoragePartitionManagerOptions extends PartitionManagerOptions {
+ public AzureStoragePartitionManagerOptions() {
+ }
+
+ @Override
+ public void setLeaseDurationInSeconds(int duration) {
+ // Max Azure Storage blob lease is 60 seconds
+ if (duration > 60) {
+ throw new IllegalArgumentException("Lease duration cannot be more than 60 seconds");
+ }
+ super.setLeaseDurationInSeconds(duration);
+ }
+}
diff --git a/eventhubs/data-plane/azure-eventhubs-eph/src/main/java/com/microsoft/azure/eventprocessorhost/BaseLease.java b/eventhubs/data-plane/azure-eventhubs-eph/src/main/java/com/microsoft/azure/eventprocessorhost/BaseLease.java
new file mode 100644
index 0000000000000..3ca7cb1d95861
--- /dev/null
+++ b/eventhubs/data-plane/azure-eventhubs-eph/src/main/java/com/microsoft/azure/eventprocessorhost/BaseLease.java
@@ -0,0 +1,134 @@
+/*
+ * Copyright (c) Microsoft. All rights reserved.
+ * Licensed under the MIT license. See LICENSE file in the project root for full license information.
+ */
+
+package com.microsoft.azure.eventprocessorhost;
+
+/**
+ * BaseLease class is public so that advanced users can implement an ILeaseManager.
+ * Unless you are implementing ILeaseManager you should not have to deal with objects
+ * of this class or derived classes directly.
+ *
+ * This lightweight base exists to allow ILeaseManager.getAllLeases to operate as quickly
+ * as possible -- for some lease manager implementations, loading the entire contents of a
+ * lease form the store may be expensive. BaseLease contains only the minimum amount of
+ * information required to allow PartitionScanner to operate.
+ *
+ * Note that a Lease object just carries information about a partition lease. The APIs
+ * to acquire/renew/release a lease are all on ILeaseManager.
+ */
+public class BaseLease implements Comparable {
+ private final String partitionId;
+ private String owner = "";
+ private transient boolean isOwned = false; // do not serialize
+
+ /**
+ * Do not use; added only for GSon deserializer
+ */
+ protected BaseLease() {
+ partitionId = "-1";
+ }
+
+ /**
+ * Create a BaseLease for the given partition.
+ *
+ * @param partitionId Partition id for this lease.
+ */
+ public BaseLease(String partitionId) {
+ this.partitionId = partitionId;
+ }
+
+ /**
+ * Create and populate a BaseLease for the given partition.
+ *
+ * @param partitionId Partition id for this lease.
+ * @param owner Current owner of this lease, or empty.
+ * @param isOwned True if the lease is owned, false if not.
+ */
+ public BaseLease(String partitionId, String owner, boolean isOwned) {
+ this.partitionId = partitionId;
+ this.owner = owner;
+ this.isOwned = isOwned;
+ }
+
+ /**
+ * Create a BaseLease by duplicating the given Lease.
+ *
+ * @param source BaseLease to clone.
+ */
+ public BaseLease(BaseLease source) {
+ this.partitionId = source.partitionId;
+ this.owner = source.owner;
+ this.isOwned = source.isOwned;
+ }
+
+ /**
+ * The owner of a lease is the name of the EventProcessorHost instance which currently holds the lease.
+ *
+ * @return name of the owning instance
+ */
+ public String getOwner() {
+ return this.owner;
+ }
+
+ /**
+ * Set the owner string. Used when a host steals a lease.
+ *
+ * @param owner name of the new owning instance
+ */
+ public void setOwner(String owner) {
+ this.owner = owner;
+ }
+
+ /**
+ * Set the owned state of the lease.
+ *
+ * @param newState true if the lease is owned, or false if it is not
+ */
+ public void setIsOwned(boolean newState) {
+ this.isOwned = newState;
+ }
+
+ /**
+ * Get the owned state of the lease.
+ *
+ * @return true if the lease is owned, or false if it is not
+ */
+ public boolean getIsOwned() {
+ return this.isOwned;
+ }
+
+ /**
+ * Convenience function for comparing possibleOwner against this.owner
+ *
+ * @param possibleOwner name to check
+ * @return true if possibleOwner is the same as this.owner, false otherwise
+ */
+ public boolean isOwnedBy(String possibleOwner) {
+ boolean retval = false;
+ if (this.owner != null) {
+ retval = (this.owner.compareTo(possibleOwner) == 0);
+ }
+ return retval;
+ }
+
+ /**
+ * Returns the id of the partition that this Lease is for. Immutable so there is no corresponding setter.
+ *
+ * @return partition id
+ */
+ public String getPartitionId() {
+ return this.partitionId;
+ }
+
+ // Compares by partition id
+ @Override
+ public int compareTo(BaseLease other) {
+ return this.partitionId.compareTo(other.getPartitionId());
+ }
+
+ String getStateDebug() {
+ return "N/A";
+ }
+}
diff --git a/eventhubs/data-plane/azure-eventhubs-eph/src/main/java/com/microsoft/azure/eventprocessorhost/Checkpoint.java b/eventhubs/data-plane/azure-eventhubs-eph/src/main/java/com/microsoft/azure/eventprocessorhost/Checkpoint.java
new file mode 100644
index 0000000000000..1180420df3a41
--- /dev/null
+++ b/eventhubs/data-plane/azure-eventhubs-eph/src/main/java/com/microsoft/azure/eventprocessorhost/Checkpoint.java
@@ -0,0 +1,103 @@
+/*
+ * Copyright (c) Microsoft. All rights reserved.
+ * Licensed under the MIT license. See LICENSE file in the project root for full license information.
+ */
+
+package com.microsoft.azure.eventprocessorhost;
+
+import com.microsoft.azure.eventhubs.impl.ClientConstants;
+
+/**
+ * Checkpoint class is public so that advanced users can implement an ICheckpointManager.
+ * Unless you are implementing ICheckpointManager you should not have to deal with objects
+ * of this class directly.
+ *
+ * A Checkpoint is essentially just a tuple. It has a fixed partition id, set at creation time
+ * and immutable thereafter, and associates that with an offset/sequenceNumber pair which
+ * indicates a position within the events in that partition.
+ */
+public class Checkpoint {
+ private final String partitionId;
+ private String offset = ClientConstants.START_OF_STREAM;
+ private long sequenceNumber = 0;
+
+ /**
+ * Create a checkpoint with offset/sequenceNumber set to the start of the stream.
+ *
+ * @param partitionId Associated partition.
+ */
+ public Checkpoint(String partitionId) {
+ this.partitionId = partitionId;
+ }
+
+ /**
+ * Create a checkpoint with the given offset and sequenceNumber. It is important that the
+ * offset and sequence number refer to the same event in the stream. The safest thing
+ * to do is get both values from the system properties of one EventData instance.
+ *
+ * @param partitionId Associated partition.
+ * @param offset Offset in the stream.
+ * @param sequenceNumber Sequence number in the stream.
+ */
+ public Checkpoint(String partitionId, String offset, long sequenceNumber) {
+ this.partitionId = partitionId;
+ this.offset = offset;
+ this.sequenceNumber = sequenceNumber;
+ }
+
+ /**
+ * Create a checkpoint which is a duplicate of the given checkpoint.
+ *
+ * @param source Existing checkpoint to clone.
+ */
+ public Checkpoint(Checkpoint source) {
+ this.partitionId = source.partitionId;
+ this.offset = source.offset;
+ this.sequenceNumber = source.sequenceNumber;
+ }
+
+ /**
+ * Return the offset.
+ *
+ * @return the current offset value.
+ */
+ public String getOffset() {
+ return this.offset;
+ }
+
+ /**
+ * Set the offset. Remember to also set the sequence number!
+ *
+ * @param newOffset the new value for offset in the stream.
+ */
+ public void setOffset(String newOffset) {
+ this.offset = newOffset;
+ }
+
+ /**
+ * Get the sequence number.
+ *
+ * @return the current sequence number.
+ */
+ public long getSequenceNumber() {
+ return this.sequenceNumber;
+ }
+
+ /**
+ * Set the sequence number. Remember to also set the offset!
+ *
+ * @param newSequenceNumber the new value for sequence number.
+ */
+ public void setSequenceNumber(long newSequenceNumber) {
+ this.sequenceNumber = newSequenceNumber;
+ }
+
+ /**
+ * Get the partition id. There is no corresponding setter because the partition id is immutable.
+ *
+ * @return the associated partition id.
+ */
+ public String getPartitionId() {
+ return this.partitionId;
+ }
+}
diff --git a/eventhubs/data-plane/azure-eventhubs-eph/src/main/java/com/microsoft/azure/eventprocessorhost/Closable.java b/eventhubs/data-plane/azure-eventhubs-eph/src/main/java/com/microsoft/azure/eventprocessorhost/Closable.java
new file mode 100644
index 0000000000000..2d9fa0bff6e39
--- /dev/null
+++ b/eventhubs/data-plane/azure-eventhubs-eph/src/main/java/com/microsoft/azure/eventprocessorhost/Closable.java
@@ -0,0 +1,58 @@
+package com.microsoft.azure.eventprocessorhost;
+
+class Closable {
+ private final Object syncClose;
+ private final Closable parent; // null for top-level
+ private boolean isClosing;
+ private boolean isClosed;
+
+ // null parent means top-level
+ Closable(Closable parent) {
+ this.syncClose = new Object();
+ this.parent = parent;
+ this.isClosing = false;
+ this.isClosed = false;
+ }
+
+ protected final boolean getIsClosed() {
+ final boolean isParentClosed = this.parent != null && this.parent.getIsClosed();
+ synchronized (this.syncClose) {
+ return isParentClosed || this.isClosed;
+ }
+ }
+
+ // returns true even if the Parent is (being) Closed
+ protected final boolean getIsClosingOrClosed() {
+ final boolean isParentClosingOrClosed = this.parent != null && this.parent.getIsClosingOrClosed();
+ synchronized (this.syncClose) {
+ return isParentClosingOrClosed || this.isClosing || this.isClosed;
+ }
+ }
+
+ protected final void setClosing() {
+ synchronized (this.syncClose) {
+ this.isClosing = true;
+ }
+ }
+
+ protected final void setClosed() {
+ synchronized (this.syncClose) {
+ this.isClosing = false;
+ this.isClosed = true;
+ }
+ }
+
+ protected final void throwIfClosingOrClosed(String message) {
+ if (getIsClosingOrClosed()) {
+ throw new ClosingException(message);
+ }
+ }
+
+ class ClosingException extends RuntimeException {
+ private static final long serialVersionUID = 1138985585921317036L;
+
+ ClosingException(String message) {
+ super(message);
+ }
+ }
+}
diff --git a/eventhubs/data-plane/azure-eventhubs-eph/src/main/java/com/microsoft/azure/eventprocessorhost/CloseReason.java b/eventhubs/data-plane/azure-eventhubs-eph/src/main/java/com/microsoft/azure/eventprocessorhost/CloseReason.java
new file mode 100644
index 0000000000000..4fb4387be2f5b
--- /dev/null
+++ b/eventhubs/data-plane/azure-eventhubs-eph/src/main/java/com/microsoft/azure/eventprocessorhost/CloseReason.java
@@ -0,0 +1,22 @@
+/*
+ * Copyright (c) Microsoft. All rights reserved.
+ * Licensed under the MIT license. See LICENSE file in the project root for full license information.
+ */
+
+package com.microsoft.azure.eventprocessorhost;
+
+/***
+ * Used when implementing IEventProcessor. One argument to onClose is this enum.
+ */
+public enum CloseReason {
+ /***
+ * The IEventProcessor is closing because the lease on the partition has been lost.
+ */
+ LeaseLost,
+
+ /***
+ * The IEventProcessor is closing because the event processor host is being shut down,
+ * or because an error has occurred.
+ */
+ Shutdown
+}
diff --git a/eventhubs/data-plane/azure-eventhubs-eph/src/main/java/com/microsoft/azure/eventprocessorhost/CompleteLease.java b/eventhubs/data-plane/azure-eventhubs-eph/src/main/java/com/microsoft/azure/eventprocessorhost/CompleteLease.java
new file mode 100644
index 0000000000000..f58cec14f9877
--- /dev/null
+++ b/eventhubs/data-plane/azure-eventhubs-eph/src/main/java/com/microsoft/azure/eventprocessorhost/CompleteLease.java
@@ -0,0 +1,80 @@
+/*
+ * Copyright (c) Microsoft. All rights reserved.
+ * Licensed under the MIT license. See LICENSE file in the project root for full license information.
+ */
+
+package com.microsoft.azure.eventprocessorhost;
+
+/**
+ * CompleteLease class is public so that advanced users can implement an ILeaseManager.
+ * Unless you are implementing ILeaseManager you should not have to deal with objects
+ * of this class or derived classes directly.
+ *
+ * CompleteLease carries around complete information about a lease. By itself, it has the
+ * epoch. Any lease manager implementation can derive from this class to add data which
+ * the lease manager needs to function -- see AzureBlobLease for an example. Having two
+ * distinct classes allows the code to clearly express which variety of lease any variable
+ * holds or a method requires, and avoids the problem of accidentally supplying a lightweight
+ * BaseLease to a method which needs the lease-manager-specific fields.
+ */
+public class CompleteLease extends BaseLease {
+ protected long epoch = -1; // start with illegal epoch
+
+ /**
+ * Do not use; added only for GSon deserializer
+ */
+ protected CompleteLease() {
+ super();
+ }
+
+ /**
+ * Create a CompleteLease for the given partition.
+ *
+ * @param partitionId Partition id for this lease.
+ */
+ public CompleteLease(String partitionId) {
+ super(partitionId);
+ }
+
+ /**
+ * Create a Lease by duplicating the given Lease.
+ *
+ * @param source Lease to clone.
+ */
+ public CompleteLease(CompleteLease source) {
+ super(source);
+ this.epoch = source.epoch;
+ }
+
+ /**
+ * Epoch is a concept used by Event Hub receivers. If a receiver is created on a partition
+ * with a higher epoch than the existing receiver, the previous receiver is forcibly disconnected.
+ * Attempting to create a receiver with a lower epoch than the existing receiver will fail. The Lease
+ * carries the epoch around so that when a host instance steals a lease, it can create a receiver with a higher epoch.
+ *
+ * @return the epoch of the current receiver
+ */
+ public long getEpoch() {
+ return this.epoch;
+ }
+
+ /**
+ * Set the epoch value. Used to update the lease after creating a new receiver with a higher epoch.
+ *
+ * @param epoch updated epoch value
+ */
+ public void setEpoch(long epoch) {
+ this.epoch = epoch;
+ }
+
+ /**
+ * The most common operation on the epoch value is incrementing it after stealing a lease. This
+ * convenience function replaces the get-increment-set that would otherwise be required.
+ *
+ * @return The new value of the epoch.
+ */
+ public long incrementEpoch() {
+ this.epoch++;
+ return this.epoch;
+ }
+}
diff --git a/eventhubs/data-plane/azure-eventhubs-eph/src/main/java/com/microsoft/azure/eventprocessorhost/DefaultEventProcessorFactory.java b/eventhubs/data-plane/azure-eventhubs-eph/src/main/java/com/microsoft/azure/eventprocessorhost/DefaultEventProcessorFactory.java
new file mode 100644
index 0000000000000..99e578be6da59
--- /dev/null
+++ b/eventhubs/data-plane/azure-eventhubs-eph/src/main/java/com/microsoft/azure/eventprocessorhost/DefaultEventProcessorFactory.java
@@ -0,0 +1,20 @@
+/*
+ * Copyright (c) Microsoft. All rights reserved.
+ * Licensed under the MIT license. See LICENSE file in the project root for full license information.
+ */
+
+package com.microsoft.azure.eventprocessorhost;
+
+
+class DefaultEventProcessorFactory implements IEventProcessorFactory {
+ private Class eventProcessorClass = null;
+
+ void setEventProcessorClass(Class eventProcessorClass) {
+ this.eventProcessorClass = eventProcessorClass;
+ }
+
+ @Override
+ public T createEventProcessor(PartitionContext context) throws Exception {
+ return this.eventProcessorClass.newInstance();
+ }
+}
diff --git a/eventhubs/data-plane/azure-eventhubs-eph/src/main/java/com/microsoft/azure/eventprocessorhost/EventProcessorHost.java b/eventhubs/data-plane/azure-eventhubs-eph/src/main/java/com/microsoft/azure/eventprocessorhost/EventProcessorHost.java
new file mode 100644
index 0000000000000..86724933e0c9e
--- /dev/null
+++ b/eventhubs/data-plane/azure-eventhubs-eph/src/main/java/com/microsoft/azure/eventprocessorhost/EventProcessorHost.java
@@ -0,0 +1,570 @@
+/*
+ * Copyright (c) Microsoft. All rights reserved.
+ * Licensed under the MIT license. See LICENSE file in the project root for full license information.
+ */
+
+package com.microsoft.azure.eventprocessorhost;
+
+import com.microsoft.azure.eventhubs.ConnectionStringBuilder;
+import com.microsoft.azure.eventhubs.RetryPolicy;
+import com.microsoft.azure.storage.StorageException;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+import java.net.URISyntaxException;
+import java.security.InvalidKeyException;
+import java.util.UUID;
+import java.util.concurrent.*;
+import java.util.concurrent.atomic.AtomicInteger;
+
+/***
+ * The main class of event processor host.
+ */
+public final class EventProcessorHost {
+ private static final Logger TRACE_LOGGER = LoggerFactory.getLogger(EventProcessorHost.class);
+ private static final Object uuidSynchronizer = new Object();
+ // weOwnExecutor exists to support user-supplied thread pools.
+ private final boolean weOwnExecutor;
+ private final ScheduledExecutorService executorService;
+ private final int executorServicePoolSize = 16;
+ private final HostContext hostContext;
+ private boolean initializeLeaseManager = false;
+ private volatile CompletableFuture unregistered = null;
+ private PartitionManager partitionManager;
+ private PartitionManagerOptions partitionManagerOptions = null;
+
+ /**
+ * Create a new host instance to process events from an Event Hub.
+ *
+ * Since Event Hubs are generally used for scale-out, high-traffic scenarios, in most scenarios there will
+ * be only one host instances per process, and the processes will be run on separate machines. Besides scale, this also
+ * provides isolation: one process or machine crashing will not take out multiple host instances. However, it is
+ * supported to run multiple host instances on one machine, or even within one process, for development and testing.
+ *
+ * The hostName parameter is a name for this event processor host, which must be unique among all event processor host instances
+ * receiving from this event hub+consumer group combination: the unique name is used to distinguish which event processor host
+ * instance owns the lease for a given partition. An easy way to generate a unique hostName which also includes
+ * other information is to call EventProcessorHost.createHostName("mystring").
+ *
+ * This overload of the constructor uses the built-in lease and checkpoint managers. The
+ * Azure Storage account specified by the storageConnectionString parameter is used by the built-in
+ * managers to record leases and checkpoints, in the specified container.
+ *
+ * The Event Hub connection string may be conveniently constructed using the ConnectionStringBuilder class
+ * from the Java Event Hub client.
+ *
+ * @param hostName A name for this event processor host. See method notes.
+ * @param eventHubPath Specifies the Event Hub to receive events from.
+ * @param consumerGroupName The name of the consumer group to use when receiving from the Event Hub.
+ * @param eventHubConnectionString Connection string for the Event Hub to receive from.
+ * @param storageConnectionString Connection string for the Azure Storage account to use for persisting leases and checkpoints.
+ * @param storageContainerName Azure Storage container name for use by built-in lease and checkpoint manager.
+ */
+ public EventProcessorHost(
+ final String hostName,
+ final String eventHubPath,
+ final String consumerGroupName,
+ final String eventHubConnectionString,
+ final String storageConnectionString,
+ final String storageContainerName) {
+ this(hostName, eventHubPath, consumerGroupName, eventHubConnectionString, storageConnectionString, storageContainerName, (ScheduledExecutorService) null);
+ }
+
+ /**
+ * Create a new host to process events from an Event Hub.
+ *
+ * This overload adds an argument to specify a user-provided thread pool. The number of partitions in the
+ * target event hub and the number of host instances should be considered when choosing the size of the thread pool:
+ * how many partitions is one instance expected to own under normal circumstances? One thread per partition should
+ * provide good performance, while being able to support more partitions adequately if a host instance fails and its
+ * partitions must be redistributed.
+ *
+ * @param hostName A name for this event processor host. See method notes.
+ * @param eventHubPath Specifies the Event Hub to receive events from.
+ * @param consumerGroupName The name of the consumer group to use when receiving from the Event Hub.
+ * @param eventHubConnectionString Connection string for the Event Hub to receive from.
+ * @param storageConnectionString Connection string for the Azure Storage account to use for persisting leases and checkpoints.
+ * @param storageContainerName Azure Storage container name for use by built-in lease and checkpoint manager.
+ * @param executorService User-supplied thread executor, or null to use EventProcessorHost-internal executor.
+ */
+ public EventProcessorHost(
+ final String hostName,
+ final String eventHubPath,
+ final String consumerGroupName,
+ final String eventHubConnectionString,
+ final String storageConnectionString,
+ final String storageContainerName,
+ final ScheduledExecutorService executorService) {
+ this(hostName, eventHubPath, consumerGroupName, eventHubConnectionString, storageConnectionString, storageContainerName, (String) null, executorService);
+ }
+
+ /**
+ * Create a new host to process events from an Event Hub.
+ *
+ * This overload adds an argument to specify a prefix used by the built-in lease manager when naming blobs in Azure Storage.
+ *
+ * @param hostName A name for this event processor host. See method notes.
+ * @param eventHubPath Specifies the Event Hub to receive events from.
+ * @param consumerGroupName The name of the consumer group to use when receiving from the Event Hub.
+ * @param eventHubConnectionString Connection string for the Event Hub to receive from.
+ * @param storageConnectionString Connection string for the Azure Storage account to use for persisting leases and checkpoints.
+ * @param storageContainerName Azure Storage container name for use by built-in lease and checkpoint manager.
+ * @param storageBlobPrefix Prefix used when naming blobs within the storage container.
+ */
+ public EventProcessorHost(
+ final String hostName,
+ final String eventHubPath,
+ final String consumerGroupName,
+ final String eventHubConnectionString,
+ final String storageConnectionString,
+ final String storageContainerName,
+ final String storageBlobPrefix) {
+ this(hostName, eventHubPath, consumerGroupName, eventHubConnectionString, storageConnectionString, storageContainerName, storageBlobPrefix,
+ (ScheduledExecutorService) null);
+ }
+
+ /**
+ * Create a new host to process events from an Event Hub.
+ *
+ * This overload allows the caller to specify both a user-supplied thread pool and
+ * a prefix used by the built-in lease manager when naming blobs in Azure Storage.
+ *
+ * @param hostName A name for this event processor host. See method notes.
+ * @param eventHubPath Specifies the Event Hub to receive events from.
+ * @param consumerGroupName The name of the consumer group to use when receiving from the Event Hub.
+ * @param eventHubConnectionString Connection string for the Event Hub to receive from.
+ * @param storageConnectionString Connection string for the Azure Storage account to use for persisting leases and checkpoints.
+ * @param storageContainerName Azure Storage container name for use by built-in lease and checkpoint manager.
+ * @param storageBlobPrefix Prefix used when naming blobs within the storage container.
+ * @param executorService User-supplied thread executor, or null to use EventProcessorHost-internal executor.
+ */
+ public EventProcessorHost(
+ final String hostName,
+ final String eventHubPath,
+ final String consumerGroupName,
+ final String eventHubConnectionString,
+ final String storageConnectionString,
+ final String storageContainerName,
+ final String storageBlobPrefix,
+ final ScheduledExecutorService executorService) {
+ // Would like to check storageConnectionString and storageContainerName here but can't, because Java doesn't allow statements before
+ // calling another constructor. storageBlobPrefix is allowed to be null or empty, doesn't need checking.
+ this(hostName, eventHubPath, consumerGroupName, eventHubConnectionString,
+ new AzureStorageCheckpointLeaseManager(storageConnectionString, storageContainerName, storageBlobPrefix), executorService);
+ this.initializeLeaseManager = true;
+ this.partitionManagerOptions = new AzureStoragePartitionManagerOptions();
+ }
+
+ // Because Java won't let you do ANYTHING before calling another constructor. In particular, you can't
+ // new up an object and pass it as TWO parameters of the other constructor.
+ private EventProcessorHost(
+ final String hostName,
+ final String eventHubPath,
+ final String consumerGroupName,
+ final String eventHubConnectionString,
+ final AzureStorageCheckpointLeaseManager combinedManager,
+ final ScheduledExecutorService executorService) {
+ this(hostName, eventHubPath, consumerGroupName, eventHubConnectionString, combinedManager, combinedManager, executorService, null);
+ }
+
+ /**
+ * Create a new host to process events from an Event Hub.
+ *
+ * This overload allows the caller to provide their own lease and checkpoint managers to replace the built-in
+ * ones based on Azure Storage.
+ *
+ * @param hostName A name for this event processor host. See method notes.
+ * @param eventHubPath Specifies the Event Hub to receive events from.
+ * @param consumerGroupName The name of the consumer group to use when receiving from the Event Hub.
+ * @param eventHubConnectionString Connection string for the Event Hub to receive from.
+ * @param checkpointManager Implementation of ICheckpointManager, to be replacement checkpoint manager.
+ * @param leaseManager Implementation of ILeaseManager, to be replacement lease manager.
+ */
+ public EventProcessorHost(
+ final String hostName,
+ final String eventHubPath,
+ final String consumerGroupName,
+ final String eventHubConnectionString,
+ ICheckpointManager checkpointManager,
+ ILeaseManager leaseManager) {
+ this(hostName, eventHubPath, consumerGroupName, eventHubConnectionString, checkpointManager, leaseManager, null, null);
+ }
+
+ /**
+ * Create a new host to process events from an Event Hub.
+ *
+ * This overload allows the caller to provide their own lease and checkpoint managers to replace the built-in
+ * ones based on Azure Storage, and to provide an executor service and a retry policy for communications with the event hub.
+ *
+ * @param hostName A name for this event processor host. See method notes.
+ * @param eventHubPath Specifies the Event Hub to receive events from.
+ * @param consumerGroupName The name of the consumer group to use when receiving from the Event Hub.
+ * @param eventHubConnectionString Connection string for the Event Hub to receive from.
+ * @param checkpointManager Implementation of ICheckpointManager, to be replacement checkpoint manager.
+ * @param leaseManager Implementation of ILeaseManager, to be replacement lease manager.
+ * @param executorService User-supplied thread executor, or null to use EventProcessorHost-internal executor.
+ * @param retryPolicy Retry policy governing communications with the event hub.
+ */
+ public EventProcessorHost(
+ final String hostName,
+ final String eventHubPath,
+ final String consumerGroupName,
+ final String eventHubConnectionString,
+ ICheckpointManager checkpointManager,
+ ILeaseManager leaseManager,
+ ScheduledExecutorService executorService,
+ RetryPolicy retryPolicy) {
+ if ((hostName == null) || hostName.isEmpty()) {
+ throw new IllegalArgumentException("hostName argument must not be null or empty string");
+ }
+
+ // eventHubPath is allowed to be null or empty if it is provided in the connection string. That will be checked later.
+ if ((consumerGroupName == null) || consumerGroupName.isEmpty()) {
+ throw new IllegalArgumentException("consumerGroupName argument must not be null or empty");
+ }
+
+ if ((eventHubConnectionString == null) || eventHubConnectionString.isEmpty()) {
+ throw new IllegalArgumentException("eventHubConnectionString argument must not be null or empty");
+ }
+
+ // The event hub path must appear in at least one of the eventHubPath argument or the connection string.
+ // If it appears in both, then it must be the same in both. If it appears in only one, populate the other.
+ ConnectionStringBuilder providedCSB = new ConnectionStringBuilder(eventHubConnectionString);
+ String extractedEntityPath = providedCSB.getEventHubName();
+ String effectiveEventHubPath = eventHubPath;
+ String effectiveEventHubConnectionString = eventHubConnectionString;
+ if ((effectiveEventHubPath != null) && !effectiveEventHubPath.isEmpty()) {
+ if (extractedEntityPath != null) {
+ if (effectiveEventHubPath.compareTo(extractedEntityPath) != 0) {
+ throw new IllegalArgumentException("Provided EventHub path in eventHubPath parameter conflicts with the path in provided EventHub connection string");
+ }
+ // else they are the same and that's fine
+ } else {
+ // There is no entity path in the connection string, so put it there.
+ ConnectionStringBuilder rebuildCSB = new ConnectionStringBuilder()
+ .setEndpoint(providedCSB.getEndpoint())
+ .setEventHubName(effectiveEventHubPath)
+ .setSasKeyName(providedCSB.getSasKeyName())
+ .setSasKey(providedCSB.getSasKey());
+ rebuildCSB.setOperationTimeout(providedCSB.getOperationTimeout());
+ effectiveEventHubConnectionString = rebuildCSB.toString();
+ }
+ } else {
+ if ((extractedEntityPath != null) && !extractedEntityPath.isEmpty()) {
+ effectiveEventHubPath = extractedEntityPath;
+ } else {
+ throw new IllegalArgumentException("Provide EventHub entity path in either eventHubPath argument or in eventHubConnectionString");
+ }
+ }
+
+ if (checkpointManager == null) {
+ throw new IllegalArgumentException("Must provide an object which implements ICheckpointManager");
+ }
+ if (leaseManager == null) {
+ throw new IllegalArgumentException("Must provide an object which implements ILeaseManager");
+ }
+ // executorService argument is allowed to be null, that is the indication to use an internal threadpool.
+
+ if (this.partitionManagerOptions == null) {
+ // Normally will not be null because we're using the AzureStorage implementation.
+ // If it is null, we're using user-supplied implementation. Establish generic defaults
+ // in case the user doesn't provide an options object.
+ this.partitionManagerOptions = new PartitionManagerOptions();
+ }
+
+ if (executorService != null) {
+ // User has supplied an ExecutorService, so use that.
+ this.weOwnExecutor = false;
+ this.executorService = executorService;
+ } else {
+ this.weOwnExecutor = true;
+ this.executorService = Executors.newScheduledThreadPool(
+ this.executorServicePoolSize,
+ new EventProcessorHostThreadPoolFactory(hostName, effectiveEventHubPath, consumerGroupName));
+ }
+
+ this.hostContext = new HostContext(this.executorService,
+ this, hostName,
+ effectiveEventHubPath, consumerGroupName, effectiveEventHubConnectionString, retryPolicy,
+ leaseManager, checkpointManager);
+
+ this.partitionManager = new PartitionManager(hostContext);
+
+ TRACE_LOGGER.info(this.hostContext.withHost("New EventProcessorHost created."));
+ }
+
+ /**
+ * Convenience method for generating unique host names, safe to pass to the EventProcessorHost constructors
+ * that take a hostName argument.
+ *
+ * If a prefix is supplied, the constructed name begins with that string. If the prefix argument is null or
+ * an empty string, the constructed name begins with "javahost". Then a dash '-' and a UUID are appended to
+ * create a unique name.
+ *
+ * @param prefix String to use as the beginning of the name. If null or empty, a default is used.
+ * @return A unique host name to pass to EventProcessorHost constructors.
+ */
+ public static String createHostName(String prefix) {
+ String usePrefix = prefix;
+ if ((usePrefix == null) || usePrefix.isEmpty()) {
+ usePrefix = "javahost";
+ }
+ return usePrefix + "-" + safeCreateUUID();
+ }
+
+ /**
+ * Synchronized string UUID generation convenience method.
+ *
+ * We saw null and empty strings returned from UUID.randomUUID().toString() when used from multiple
+ * threads and there is no clear answer on the net about whether it is really thread-safe or not.
+ *
+ * One of the major users of UUIDs is the built-in lease and checkpoint manager, which can be replaced by
+ * user implementations. This UUID generation method is public so user implementations can use it as well and
+ * avoid the problems.
+ *
+ * @return A string UUID with dashes but no curly brackets.
+ */
+ public static String safeCreateUUID() {
+ synchronized (EventProcessorHost.uuidSynchronizer) {
+ final UUID newUuid = UUID.randomUUID();
+ return newUuid.toString();
+ }
+ }
+
+ /**
+ * The processor host name is supplied by the user at constructor time, but being able to get
+ * it is useful because it means not having to carry both the host object and the name around.
+ * As long as you have the host object, you can get the name back, such as for logging.
+ *
+ * @return The processor host name
+ */
+ public String getHostName() {
+ return this.hostContext.getHostName();
+ }
+
+ // TEST USE ONLY
+ void setPartitionManager(PartitionManager pm) {
+ this.partitionManager = pm;
+ }
+
+ HostContext getHostContext() {
+ return this.hostContext;
+ }
+
+ /**
+ * Returns the existing partition manager options object. Unless you are providing implementations of
+ * ILeaseManager and ICheckpointMananger, to change partition manager options, call this method to get
+ * the existing object and call setters on it to adjust the values.
+ *
+ * @return the internally-created PartitionManangerObjects object or any replacement object set with setPartitionManangerOptions
+ */
+ public PartitionManagerOptions getPartitionManagerOptions() {
+ return this.partitionManagerOptions;
+ }
+
+ /**
+ * Set the partition manager options all at once. Normally this method is used only when providing user
+ * implementations of ILeaseManager and ICheckpointManager, because it allows passing an object of a class
+ * derived from PartitionManagerOptions, which could contain options specific to the user-implemented ILeaseManager
+ * or ICheckpointMananger. When using the default, Azure Storage-based implementation, the recommendation is to
+ * call getPartitionManangerOptions to return the existing options object, then call setters on that object to
+ * adjust the values.
+ *
+ * @param options - a PartitionManangerOptions object (or derived object) representing the desired options
+ */
+ public void setPartitionManagerOptions(PartitionManagerOptions options) {
+ this.partitionManagerOptions = options;
+ }
+
+ /**
+ * Register class for event processor and start processing.
+ *
+ * This overload uses the default event processor factory, which simply creates new instances of
+ * the registered event processor class, and uses all the default options.
+ *
+ * The returned CompletableFuture completes when host initialization is finished. Initialization failures are
+ * reported by completing the future with an exception, so it is important to call get() on the future and handle
+ * any exceptions thrown.
+ *
+ * class MyEventProcessor implements IEventProcessor { ... }
+ * EventProcessorHost host = new EventProcessorHost(...);
+ * {@literal CompletableFuture} foo = host.registerEventProcessor(MyEventProcessor.class);
+ * foo.get();
+ *
+ *
+ * @param Not actually a parameter. Represents the type of your class that implements IEventProcessor.
+ * @param eventProcessorType Class that implements IEventProcessor.
+ * @return Future that completes when initialization is finished.
+ */
+ public CompletableFuture registerEventProcessor(Class eventProcessorType) {
+ DefaultEventProcessorFactory defaultFactory = new DefaultEventProcessorFactory();
+ defaultFactory.setEventProcessorClass(eventProcessorType);
+ return registerEventProcessorFactory(defaultFactory, EventProcessorOptions.getDefaultOptions());
+ }
+
+ /**
+ * Register class for event processor and start processing.
+ *
+ * This overload uses the default event processor factory, which simply creates new instances of
+ * the registered event processor class, but takes user-specified options.
+ *
+ * The returned CompletableFuture completes when host initialization is finished. Initialization failures are
+ * reported by completing the future with an exception, so it is important to call get() on the future and handle
+ * any exceptions thrown.
+ *
+ * @param Not actually a parameter. Represents the type of your class that implements IEventProcessor.
+ * @param eventProcessorType Class that implements IEventProcessor.
+ * @param processorOptions Options for the processor host and event processor(s).
+ * @return Future that completes when initialization is finished.
+ */
+ public CompletableFuture registerEventProcessor(Class eventProcessorType, EventProcessorOptions processorOptions) {
+ DefaultEventProcessorFactory defaultFactory = new DefaultEventProcessorFactory();
+ defaultFactory.setEventProcessorClass(eventProcessorType);
+ return registerEventProcessorFactory(defaultFactory, processorOptions);
+ }
+
+ /**
+ * Register a user-supplied event processor factory and start processing.
+ *
+ * If creating a new event processor requires more work than just new'ing an objects, the user must
+ * create an object that implements IEventProcessorFactory and pass it to this method, instead of calling
+ * registerEventProcessor.
+ *
+ * This overload uses default options for the processor host and event processor(s).
+ *
+ * The returned CompletableFuture completes when host initialization is finished. Initialization failures are
+ * reported by completing the future with an exception, so it is important to call get() on the future and handle
+ * any exceptions thrown.
+ *
+ * @param factory User-supplied event processor factory object.
+ * @return Future that completes when initialization is finished.
+ */
+ public CompletableFuture registerEventProcessorFactory(IEventProcessorFactory> factory) {
+ return registerEventProcessorFactory(factory, EventProcessorOptions.getDefaultOptions());
+ }
+
+ /**
+ * Register user-supplied event processor factory and start processing.
+ *
+ * This overload takes user-specified options.
+ *
+ * The returned CompletableFuture completes when host initialization is finished. Initialization failures are
+ * reported by completing the future with an exception, so it is important to call get() on the future and handle
+ * any exceptions thrown.
+ *
+ * @param factory User-supplied event processor factory object.
+ * @param processorOptions Options for the processor host and event processor(s).
+ * @return Future that completes when initialization is finished.
+ */
+ public CompletableFuture registerEventProcessorFactory(IEventProcessorFactory> factory, EventProcessorOptions processorOptions) {
+ if (this.unregistered != null) {
+ throw new IllegalStateException("Register cannot be called on an EventProcessorHost after unregister. Please create a new EventProcessorHost instance.");
+ }
+ if (this.hostContext.getEventProcessorFactory() != null) {
+ throw new IllegalStateException("Register has already been called on this EventProcessorHost");
+ }
+
+ this.hostContext.setEventProcessorFactory(factory);
+ this.hostContext.setEventProcessorOptions(processorOptions);
+
+ if (this.executorService.isShutdown() || this.executorService.isTerminated()) {
+ TRACE_LOGGER.warn(this.hostContext.withHost("Calling registerEventProcessor/Factory after executor service has been shut down."));
+ throw new RejectedExecutionException("EventProcessorHost executor service has been shut down");
+ }
+
+ if (this.initializeLeaseManager) {
+ try {
+ ((AzureStorageCheckpointLeaseManager) this.hostContext.getLeaseManager()).initialize(this.hostContext);
+ } catch (InvalidKeyException | URISyntaxException | StorageException e) {
+ TRACE_LOGGER.error(this.hostContext.withHost("Failure initializing default lease and checkpoint manager."));
+ throw new RuntimeException("Failure initializing Storage lease manager", e);
+ }
+ }
+
+ TRACE_LOGGER.info(this.hostContext.withHost("Starting event processing."));
+
+ return this.partitionManager.initialize();
+ }
+
+ /**
+ * Stop processing events and shut down this host instance.
+ *
+ * @return A CompletableFuture that completes when shutdown is finished.
+ */
+ public CompletableFuture unregisterEventProcessor() {
+ TRACE_LOGGER.info(this.hostContext.withHost("Stopping event processing"));
+
+ if (this.unregistered == null) {
+ // PartitionManager is created in constructor. If this object exists, then
+ // this.partitionManager is not null.
+ this.unregistered = this.partitionManager.stopPartitions();
+
+ // If we own the executor, stop it also.
+ // Owned executor is also created in constructor.
+ if (this.weOwnExecutor) {
+ this.unregistered = this.unregistered.thenRunAsync(() ->
+ {
+ // IMPORTANT: run this last stage in the default threadpool!
+ // If a task running in a threadpool waits for that threadpool to terminate, it's going to wait a long time...
+
+ // It is OK to call shutdown() here even if threads are still running.
+ // Shutdown() causes the executor to stop accepting new tasks, but existing tasks will
+ // run to completion. The pool will terminate when all existing tasks finish.
+ // By this point all new tasks generated by the shutdown have been submitted.
+ this.executorService.shutdown();
+
+ try {
+ this.executorService.awaitTermination(10, TimeUnit.MINUTES);
+ } catch (InterruptedException e) {
+ throw new CompletionException(e);
+ }
+ }, ForkJoinPool.commonPool());
+ }
+ }
+
+ return this.unregistered;
+ }
+
+ static class EventProcessorHostThreadPoolFactory implements ThreadFactory {
+ private static final AtomicInteger poolNumber = new AtomicInteger(1);
+ private final AtomicInteger threadNumber = new AtomicInteger(1);
+ private final ThreadGroup group;
+ private final String namePrefix;
+ private final String hostName;
+ private final String entityName;
+ private final String consumerGroupName;
+
+ public EventProcessorHostThreadPoolFactory(
+ String hostName,
+ String entityName,
+ String consumerGroupName) {
+ this.hostName = hostName;
+ this.entityName = entityName;
+ this.consumerGroupName = consumerGroupName;
+ this.namePrefix = this.getNamePrefix();
+ SecurityManager s = System.getSecurityManager();
+ this.group = (s != null) ? s.getThreadGroup() : Thread.currentThread().getThreadGroup();
+ }
+
+ @Override
+ public Thread newThread(Runnable r) {
+ Thread t = new Thread(this.group, r, this.namePrefix + this.threadNumber.getAndIncrement(), 0);
+ t.setDaemon(false);
+ t.setPriority(Thread.NORM_PRIORITY);
+ t.setUncaughtExceptionHandler(new ThreadUncaughtExceptionHandler());
+ return t;
+ }
+
+ private String getNamePrefix() {
+ return String.format("[%s|%s|%s]-%s-",
+ this.entityName, this.consumerGroupName, this.hostName, poolNumber.getAndIncrement());
+ }
+
+ static class ThreadUncaughtExceptionHandler implements Thread.UncaughtExceptionHandler {
+ @Override
+ public void uncaughtException(Thread t, Throwable e) {
+ TRACE_LOGGER.warn("Uncaught exception occurred. Thread " + t.getName(), e);
+ }
+ }
+ }
+}
diff --git a/eventhubs/data-plane/azure-eventhubs-eph/src/main/java/com/microsoft/azure/eventprocessorhost/EventProcessorHostActionStrings.java b/eventhubs/data-plane/azure-eventhubs-eph/src/main/java/com/microsoft/azure/eventprocessorhost/EventProcessorHostActionStrings.java
new file mode 100644
index 0000000000000..7d1335d5dfe4e
--- /dev/null
+++ b/eventhubs/data-plane/azure-eventhubs-eph/src/main/java/com/microsoft/azure/eventprocessorhost/EventProcessorHostActionStrings.java
@@ -0,0 +1,36 @@
+/*
+ * Copyright (c) Microsoft. All rights reserved.
+ * Licensed under the MIT license. See LICENSE file in the project root for full license information.
+ */
+
+package com.microsoft.azure.eventprocessorhost;
+
+/***
+ * The action string of ExceptionReceivedEventArts will be one of this.
+ * They describe what activity was taking place when the exception occurred.
+ */
+public final class EventProcessorHostActionStrings {
+ public final static String ACQUIRING_LEASE = "Acquiring Lease";
+ public final static String CHECKING_CHECKPOINT_STORE = "Checking Checpoint Store Existence";
+ public final static String CHECKING_LEASES = "Checking Leases";
+ public final static String CHECKING_LEASE_STORE = "Checking Lease Store Existence";
+ public final static String CLOSING_EVENT_PROCESSOR = "Closing Event Processor";
+ public final static String CREATING_CHECKPOINTS = "Creating Checkpoint Holders";
+ public final static String CREATING_CHECKPOINT_STORE = "Creating Checkpoint Store";
+ public final static String CREATING_EVENT_HUB_CLIENT = "Creating Event Hub Client";
+ public final static String CREATING_EVENT_PROCESSOR = "Creating Event Processor";
+ public final static String CREATING_LEASES = "Creating Leases";
+ public final static String CREATING_LEASE_STORE = "Creating Lease Store";
+ public final static String DELETING_LEASE = "Deleting Lease";
+ public final static String GETTING_CHECKPOINT = "Getting Checkpoint Details";
+ public final static String GETTING_LEASE = "Getting Lease Details";
+ public final static String INITIALIZING_STORES = "Initializing Stores";
+ public final static String OPENING_EVENT_PROCESSOR = "Opening Event Processor";
+ public final static String PARTITION_MANAGER_CLEANUP = "Partition Manager Cleanup";
+ public final static String PARTITION_MANAGER_MAIN_LOOP = "Partition Manager Main Loop";
+ public final static String RELEASING_LEASE = "Releasing Lease";
+ public final static String RENEWING_LEASE = "Renewing Lease";
+ public final static String STEALING_LEASE = "Stealing Lease";
+ public final static String UPDATING_CHECKPOINT = "Updating Checkpoint";
+ public final static String UPDATING_LEASE = "Updating Lease";
+}
diff --git a/eventhubs/data-plane/azure-eventhubs-eph/src/main/java/com/microsoft/azure/eventprocessorhost/EventProcessorOptions.java b/eventhubs/data-plane/azure-eventhubs-eph/src/main/java/com/microsoft/azure/eventprocessorhost/EventProcessorOptions.java
new file mode 100644
index 0000000000000..e5d5ac8cce5e7
--- /dev/null
+++ b/eventhubs/data-plane/azure-eventhubs-eph/src/main/java/com/microsoft/azure/eventprocessorhost/EventProcessorOptions.java
@@ -0,0 +1,242 @@
+/*
+ * Copyright (c) Microsoft. All rights reserved.
+ * Licensed under the MIT license. See LICENSE file in the project root for full license information.
+ */
+
+package com.microsoft.azure.eventprocessorhost;
+
+import com.microsoft.azure.eventhubs.EventPosition;
+import com.microsoft.azure.eventhubs.PartitionReceiver;
+
+import java.time.Duration;
+import java.util.Locale;
+import java.util.function.Consumer;
+import java.util.function.Function;
+
+/***
+ * Options affecting the behavior of the event processor host instance in general.
+ */
+public final class EventProcessorOptions {
+ private Consumer exceptionNotificationHandler = null;
+ private Boolean invokeProcessorAfterReceiveTimeout = false;
+ private boolean receiverRuntimeMetricEnabled = false;
+ private int maxBatchSize = 10;
+ private int prefetchCount = 300;
+ private Duration receiveTimeOut = Duration.ofMinutes(1);
+ private Function initialPositionProvider = (partitionId) -> {
+ return EventPosition.fromStartOfStream();
+ };
+
+ public EventProcessorOptions() {
+ }
+
+ /***
+ * Returns an EventProcessorOptions instance with all options set to the default values.
+ *
+ * The default values are:
+ *
+ * MaxBatchSize: 10
+ * ReceiveTimeOut: 1 minute
+ * PrefetchCount: 300
+ * InitialPositionProvider: uses the last checkpoint, or START_OF_STREAM
+ * InvokeProcessorAfterReceiveTimeout: false
+ * ReceiverRuntimeMetricEnabled: false
+ *
+ *
+ * @return an EventProcessorOptions instance with all options set to the default values
+ */
+ public static EventProcessorOptions getDefaultOptions() {
+ return new EventProcessorOptions();
+ }
+
+ /**
+ * Sets a handler which receives notification of general exceptions.
+ *
+ * Exceptions which occur while processing events from a particular Event Hub partition are delivered
+ * to the onError method of the event processor for that partition. This handler is called on occasions
+ * when there is no event processor associated with the throwing activity, or the event processor could
+ * not be created.
+ *
+ * The handler is not expected to do anything about the exception. If it is possible to recover, the
+ * event processor host instance will recover automatically.
+ *
+ * @param notificationHandler Handler which is called when an exception occurs. Set to null to stop handling.
+ */
+ public void setExceptionNotification(Consumer notificationHandler) {
+ this.exceptionNotificationHandler = notificationHandler;
+ }
+
+ /**
+ * Returns the maximum number of events that will be passed to one call to IEventProcessor.onEvents
+ *
+ * @return the maximum maximum number of events that will be passed to one call to IEventProcessor.onEvents
+ */
+ public int getMaxBatchSize() {
+ return this.maxBatchSize;
+ }
+
+ /**
+ * Sets the maximum number of events that will be passed to one call to IEventProcessor.onEvents
+ *
+ * @param maxBatchSize the maximum number of events that will be passed to one call to IEventProcessor.onEvents
+ */
+ public void setMaxBatchSize(int maxBatchSize) {
+ this.maxBatchSize = maxBatchSize;
+ }
+
+ /**
+ * Returns the timeout for receive operations.
+ *
+ * @return the timeout for receive operations
+ */
+ public Duration getReceiveTimeOut() {
+ return this.receiveTimeOut;
+ }
+
+ /**
+ * Sets the timeout for receive operations.
+ *
+ * @param receiveTimeOut new timeout for receive operations
+ */
+ public void setReceiveTimeOut(Duration receiveTimeOut) {
+ this.receiveTimeOut = receiveTimeOut;
+ }
+
+ /***
+ * Returns the current prefetch count for the underlying event hub client.
+ *
+ * @return the current prefetch count for the underlying client
+ */
+ public int getPrefetchCount() {
+ return this.prefetchCount;
+ }
+
+ /***
+ * Sets the prefetch count for the underlying event hub client.
+ *
+ * The default is 500. This controls how many events are received in advance.
+ *
+ * @param prefetchCount The new prefetch count.
+ */
+ public void setPrefetchCount(int prefetchCount) {
+ if (prefetchCount < PartitionReceiver.MINIMUM_PREFETCH_COUNT) {
+ throw new IllegalArgumentException(String.format(Locale.US,
+ "PrefetchCount has to be above %s", PartitionReceiver.MINIMUM_PREFETCH_COUNT));
+ }
+
+ if (prefetchCount > PartitionReceiver.MAXIMUM_PREFETCH_COUNT) {
+ throw new IllegalArgumentException(String.format(Locale.US,
+ "PrefetchCount has to be below %s", PartitionReceiver.MAXIMUM_PREFETCH_COUNT));
+ }
+
+ this.prefetchCount = prefetchCount;
+ }
+
+ /***
+ * If there is no checkpoint for a partition, the initialPositionProvider function is used to determine
+ * the position at which to start receiving events for that partition.
+ *
+ * @return the current initial position provider function
+ */
+ public Function getInitialPositionProvider() {
+ return this.initialPositionProvider;
+ }
+
+ /***
+ * Sets the function used to determine the position at which to start receiving events for a
+ * partition if there is no checkpoint for that partition.
+ *
+ * The provider function takes one argument, the partition id (a String), and returns the desired position.
+ *
+ * @param initialPositionProvider The new provider function.
+ */
+ public void setInitialPositionProvider(Function initialPositionProvider) {
+ this.initialPositionProvider = initialPositionProvider;
+ }
+
+ /***
+ * Returns whether the EventProcessorHost will call IEventProcessor.onEvents() with an empty iterable
+ * when a receive timeout occurs (true) or not (false).
+ *
+ * Defaults to false.
+ *
+ * @return true if EventProcessorHost will call IEventProcessor.OnEvents on receive timeout, false otherwise
+ */
+ public Boolean getInvokeProcessorAfterReceiveTimeout() {
+ return this.invokeProcessorAfterReceiveTimeout;
+ }
+
+ /**
+ * Changes whether the EventProcessorHost will call IEventProcessor.onEvents() with an empty iterable
+ * when a receive timeout occurs (true) or not (false).
+ *
+ * The default is false (no call).
+ *
+ * @param invokeProcessorAfterReceiveTimeout the new value for what to do
+ */
+ public void setInvokeProcessorAfterReceiveTimeout(Boolean invokeProcessorAfterReceiveTimeout) {
+ this.invokeProcessorAfterReceiveTimeout = invokeProcessorAfterReceiveTimeout;
+ }
+
+ /**
+ * Knob to enable/disable runtime metric of the receiver. If this is set to true,
+ * the first parameter {@link com.microsoft.azure.eventprocessorhost.PartitionContext#runtimeInformation} of
+ * {@link IEventProcessor#onEvents(com.microsoft.azure.eventprocessorhost.PartitionContext, java.lang.Iterable)} will be populated.
+ *
+ * Enabling this knob will add 3 additional properties to all raw AMQP events received.
+ *
+ * @return the {@link boolean} indicating, whether, the runtime metric of the receiver was enabled
+ */
+ public boolean getReceiverRuntimeMetricEnabled() {
+ return this.receiverRuntimeMetricEnabled;
+ }
+
+ /**
+ * Knob to enable/disable runtime metric of the receiver. If this is set to true,
+ * the first parameter {@link com.microsoft.azure.eventprocessorhost.PartitionContext#runtimeInformation} of
+ * {@link IEventProcessor#onEvents(com.microsoft.azure.eventprocessorhost.PartitionContext, java.lang.Iterable)} will be populated.
+ *
+ * Enabling this knob will add 3 additional properties to all raw AMQP events received.
+ *
+ * @param value the {@link boolean} to indicate, whether, the runtime metric of the receiver should be enabled
+ */
+ public void setReceiverRuntimeMetricEnabled(boolean value) {
+ this.receiverRuntimeMetricEnabled = value;
+ }
+
+ void notifyOfException(String hostname, Exception exception, String action) {
+ notifyOfException(hostname, exception, action, ExceptionReceivedEventArgs.NO_ASSOCIATED_PARTITION);
+ }
+
+ void notifyOfException(String hostname, Exception exception, String action, String partitionId) {
+ // Capture handler so it doesn't get set to null between test and use
+ Consumer handler = this.exceptionNotificationHandler;
+ if (handler != null) {
+ handler.accept(new ExceptionReceivedEventArgs(hostname, exception, action, partitionId));
+ }
+ }
+
+ /***
+ * A prefab initial position provider that starts from the first event available.
+ *
+ * How to use this initial position provider: setInitialPositionProvider(new EventProcessorOptions.StartOfStreamInitialPositionProvider());
+ */
+ public class StartOfStreamInitialPositionProvider implements Function {
+ @Override
+ public EventPosition apply(String t) {
+ return EventPosition.fromStartOfStream();
+ }
+ }
+
+ /***
+ * A prefab initial position provider that starts from the next event that becomes available.
+ *
+ * How to use this initial position provider: setInitialPositionProvider(new EventProcessorOptions.EndOfStreamInitialPositionProvider());
+ */
+ public class EndOfStreamInitialPositionProvider implements Function {
+ @Override
+ public EventPosition apply(String t) {
+ return EventPosition.fromEndOfStream();
+ }
+ }
+}
diff --git a/eventhubs/data-plane/azure-eventhubs-eph/src/main/java/com/microsoft/azure/eventprocessorhost/ExceptionReceivedEventArgs.java b/eventhubs/data-plane/azure-eventhubs-eph/src/main/java/com/microsoft/azure/eventprocessorhost/ExceptionReceivedEventArgs.java
new file mode 100644
index 0000000000000..78eb7311864a0
--- /dev/null
+++ b/eventhubs/data-plane/azure-eventhubs-eph/src/main/java/com/microsoft/azure/eventprocessorhost/ExceptionReceivedEventArgs.java
@@ -0,0 +1,68 @@
+/*
+ * Copyright (c) Microsoft. All rights reserved.
+ * Licensed under the MIT license. See LICENSE file in the project root for full license information.
+ */
+
+package com.microsoft.azure.eventprocessorhost;
+
+/**
+ * Passed as an argument to the general exception handler that can be set via EventProcessorOptions.
+ */
+public final class ExceptionReceivedEventArgs {
+ public static final String NO_ASSOCIATED_PARTITION = "N/A";
+ private final String hostname;
+ private final Exception exception;
+ private final String action;
+ private final String partitionId;
+
+ ExceptionReceivedEventArgs(String hostname, Exception exception, String action) {
+ this(hostname, exception, action, ExceptionReceivedEventArgs.NO_ASSOCIATED_PARTITION);
+ }
+
+ ExceptionReceivedEventArgs(String hostname, Exception exception, String action, String partitionId) {
+ this.hostname = hostname;
+ this.exception = exception;
+ this.action = action;
+ if ((partitionId == null) || partitionId.isEmpty()) {
+ throw new IllegalArgumentException("PartitionId must not be null or empty");
+ }
+ this.partitionId = partitionId;
+ }
+
+ /**
+ * Allows distinguishing the error source if multiple hosts in a single process.
+ *
+ * @return The name of the host that experienced the exception.
+ */
+ public String getHostname() {
+ return this.hostname;
+ }
+
+ /**
+ * Returns the exception that was thrown.
+ *
+ * @return The exception.
+ */
+ public Exception getException() {
+ return this.exception;
+ }
+
+ /**
+ * See EventProcessorHostActionString for a list of possible values.
+ *
+ * @return A short string that indicates what general activity threw the exception.
+ */
+ public String getAction() {
+ return this.action;
+ }
+
+ /**
+ * If the error is associated with a particular partition (for example, failed to open the event processor
+ * for the partition), the id of the partition. Otherwise, NO_ASSOCIATED_PARTITION.
+ *
+ * @return A partition id.
+ */
+ public String getPartitionId() {
+ return this.partitionId;
+ }
+}
diff --git a/eventhubs/data-plane/azure-eventhubs-eph/src/main/java/com/microsoft/azure/eventprocessorhost/ExceptionWithAction.java b/eventhubs/data-plane/azure-eventhubs-eph/src/main/java/com/microsoft/azure/eventprocessorhost/ExceptionWithAction.java
new file mode 100644
index 0000000000000..22b8e406f0ef2
--- /dev/null
+++ b/eventhubs/data-plane/azure-eventhubs-eph/src/main/java/com/microsoft/azure/eventprocessorhost/ExceptionWithAction.java
@@ -0,0 +1,27 @@
+/*
+ * Copyright (c) Microsoft. All rights reserved.
+ * Licensed under the MIT license. See LICENSE file in the project root for full license information.
+ */
+
+package com.microsoft.azure.eventprocessorhost;
+
+// This class is never thrown into user code, so it can be package private.
+class ExceptionWithAction extends Exception {
+ private static final long serialVersionUID = 7480590197418857145L;
+
+ private final String action;
+
+ ExceptionWithAction(Throwable e, String action) {
+ super(e);
+ this.action = action;
+ }
+
+ ExceptionWithAction(Throwable e, String message, String action) {
+ super(message, e);
+ this.action = action;
+ }
+
+ String getAction() {
+ return this.action;
+ }
+}
diff --git a/eventhubs/data-plane/azure-eventhubs-eph/src/main/java/com/microsoft/azure/eventprocessorhost/HostContext.java b/eventhubs/data-plane/azure-eventhubs-eph/src/main/java/com/microsoft/azure/eventprocessorhost/HostContext.java
new file mode 100644
index 0000000000000..d18a18317f771
--- /dev/null
+++ b/eventhubs/data-plane/azure-eventhubs-eph/src/main/java/com/microsoft/azure/eventprocessorhost/HostContext.java
@@ -0,0 +1,130 @@
+/*
+ * Copyright (c) Microsoft. All rights reserved.
+ * Licensed under the MIT license. See LICENSE file in the project root for full license information.
+ */
+
+package com.microsoft.azure.eventprocessorhost;
+
+import com.microsoft.azure.eventhubs.RetryPolicy;
+
+import java.util.concurrent.ScheduledExecutorService;
+
+final class HostContext {
+ final private ScheduledExecutorService executor;
+
+ // Ideally we wouldn't need the host, but there are certain things which can be dynamically changed
+ // by the user via APIs on the host and which need to be exposed on the HostContext. Passing the
+ // call through is easier and safer than trying to keep two copies in sync.
+ final private EventProcessorHost host;
+ final private String hostName;
+
+ final private String eventHubPath;
+ final private String consumerGroupName;
+ final private String eventHubConnectionString;
+ final private RetryPolicy retryPolicy;
+
+ final private ILeaseManager leaseManager;
+ final private ICheckpointManager checkpointManager;
+
+ // Cannot be final because it is not available at HostContext construction time.
+ private EventProcessorOptions eventProcessorOptions = null;
+
+ // Cannot be final because it is not available at HostContext construction time.
+ private IEventProcessorFactory> processorFactory = null;
+
+
+ HostContext(ScheduledExecutorService executor,
+ EventProcessorHost host, String hostName,
+ String eventHubPath, String consumerGroupName, String eventHubConnectionString, RetryPolicy retryPolicy,
+ ILeaseManager leaseManager, ICheckpointManager checkpointManager) {
+ this.executor = executor;
+
+ this.host = host;
+ this.hostName = hostName;
+
+ this.eventHubPath = eventHubPath;
+ this.consumerGroupName = consumerGroupName;
+ this.eventHubConnectionString = eventHubConnectionString;
+ this.retryPolicy = retryPolicy;
+
+ this.leaseManager = leaseManager;
+ this.checkpointManager = checkpointManager;
+ }
+
+ ScheduledExecutorService getExecutor() {
+ return this.executor;
+ }
+
+ String getHostName() {
+ return this.hostName;
+ }
+
+ String getEventHubPath() {
+ return this.eventHubPath;
+ }
+
+ String getConsumerGroupName() {
+ return this.consumerGroupName;
+ }
+
+ String getEventHubConnectionString() {
+ return this.eventHubConnectionString;
+ }
+
+ RetryPolicy getRetryPolicy() {
+ return this.retryPolicy;
+ }
+
+ ILeaseManager getLeaseManager() {
+ return this.leaseManager;
+ }
+
+ ICheckpointManager getCheckpointManager() {
+ return this.checkpointManager;
+ }
+
+ PartitionManagerOptions getPartitionManagerOptions() {
+ return this.host.getPartitionManagerOptions();
+ }
+
+ // May be null if called too early! Not set until register time.
+ // In particular, store initialization happens before this is set.
+ EventProcessorOptions getEventProcessorOptions() {
+ return this.eventProcessorOptions;
+ }
+
+ void setEventProcessorOptions(EventProcessorOptions epo) {
+ this.eventProcessorOptions = epo;
+ }
+
+ // May be null if called too early! Not set until register time.
+ // In particular, store initialization happens before this is set.
+ IEventProcessorFactory> getEventProcessorFactory() {
+ return this.processorFactory;
+ }
+
+ void setEventProcessorFactory(IEventProcessorFactory> pf) {
+ this.processorFactory = pf;
+ }
+
+ //
+ // Logging utility functions. They are here rather than on LoggingUtils because they
+ // make use of this.hostName.
+ //
+
+ String withHost(String logMessage) {
+ return "host " + this.hostName + ": " + logMessage;
+ }
+
+ String withHostAndPartition(String partitionId, String logMessage) {
+ return withHost(partitionId + ": " + logMessage);
+ }
+
+ String withHostAndPartition(PartitionContext context, String logMessage) {
+ return withHostAndPartition(context.getPartitionId(), logMessage);
+ }
+
+ String withHostAndPartition(BaseLease lease, String logMessage) {
+ return withHostAndPartition(lease.getPartitionId(), logMessage);
+ }
+}
diff --git a/eventhubs/data-plane/azure-eventhubs-eph/src/main/java/com/microsoft/azure/eventprocessorhost/ICheckpointManager.java b/eventhubs/data-plane/azure-eventhubs-eph/src/main/java/com/microsoft/azure/eventprocessorhost/ICheckpointManager.java
new file mode 100644
index 0000000000000..687d547901085
--- /dev/null
+++ b/eventhubs/data-plane/azure-eventhubs-eph/src/main/java/com/microsoft/azure/eventprocessorhost/ICheckpointManager.java
@@ -0,0 +1,97 @@
+/*
+ * Copyright (c) Microsoft. All rights reserved.
+ * Licensed under the MIT license. See LICENSE file in the project root for full license information.
+ */
+
+package com.microsoft.azure.eventprocessorhost;
+
+import java.util.List;
+import java.util.concurrent.CompletableFuture;
+
+/***
+ * If you wish to have EventProcessorHost store checkpoints somewhere other than Azure Storage,
+ * you can write your own checkpoint manager using this interface.
+ *
+ * The Azure Storage managers use the same storage for both lease and checkpoints, so both
+ * interfaces are implemented by the same class. You are free to do the same thing if you have
+ * a unified store for both types of data.
+ *
+ * This interface does not specify initialization methods because we have no way of knowing what
+ * information your implementation will require. If your implementation needs initialization, you
+ * will have to initialize the instance before passing it to the EventProcessorHost constructor.
+ */
+public interface ICheckpointManager {
+ /***
+ * Does the checkpoint store exist?
+ *
+ * The returned CompletableFuture completes with true if the checkpoint store exists or false if it
+ * does not. It completes exceptionally on error.
+ *
+ * @return CompletableFuture {@literal ->} true if it exists, false if not
+ */
+ public CompletableFuture checkpointStoreExists();
+
+ /***
+ * Create the checkpoint store if it doesn't exist. Do nothing if it does exist.
+ *
+ * @return CompletableFuture {@literal ->} null on success, completes exceptionally on error.
+ */
+ public CompletableFuture createCheckpointStoreIfNotExists();
+
+ /**
+ * Deletes the checkpoint store.
+ *
+ * @return CompletableFuture {@literal ->} null on success, completes exceptionally on error.
+ */
+ public CompletableFuture deleteCheckpointStore();
+
+ /***
+ * Get the checkpoint data associated with the given partition. Could return null if no checkpoint has
+ * been created for that partition.
+ *
+ * @param partitionId Id of partition to get checkpoint info for.
+ *
+ * @return CompletableFuture {@literal ->} checkpoint info, or null. Completes exceptionally on error.
+ */
+ public CompletableFuture getCheckpoint(String partitionId);
+
+ /***
+ * Creates the checkpoint HOLDERs for the given partitions. Does nothing for any checkpoint HOLDERs
+ * that already exist.
+ *
+ * The semantics of this are complicated because it is possible to use the same store for both
+ * leases and checkpoints (the Azure Storage implementation does so) and it is required to
+ * have a lease for every partition but it is not required to have a checkpoint for a partition.
+ * It is a valid scenario to never use checkpoints at all, so it is important for the store to
+ * distinguish between creating the structure(s) that will hold a checkpoint and actually creating
+ * a checkpoint (storing an offset/sequence number pair in the structure).
+ *
+ * @param partitionIds List of partitions to create checkpoint HOLDERs for.
+ * @return CompletableFuture {@literal ->} null on success, completes exceptionally on error.
+ */
+ public CompletableFuture createAllCheckpointsIfNotExists(List partitionIds);
+
+ /***
+ * Update the checkpoint in the store with the offset/sequenceNumber in the provided checkpoint.
+ *
+ * The lease argument is necessary to make the Azure Storage implementation work correctly: the
+ * Azure Storage implementation stores the checkpoint as part of the lease and we cannot completely
+ * hide the connection between the two. If your implementation does not have this limitation, you are
+ * free to ignore the lease argument.
+ *
+ * @param lease lease for the partition to be checkpointed.
+ * @param checkpoint offset/sequenceNumber and partition id to update the store with.
+ * @return CompletableFuture {@literal ->} null on success. Completes exceptionally on error.
+ */
+ public CompletableFuture updateCheckpoint(CompleteLease lease, Checkpoint checkpoint);
+
+ /***
+ * Delete the stored checkpoint data for the given partition. If there is no stored checkpoint for the
+ * given partition, that is treated as success. Deleting the checkpoint HOLDER is allowed but not required;
+ * your implementation is free to do whichever is more convenient.
+ *
+ * @param partitionId id of partition to delete checkpoint from store
+ * @return CompletableFuture {@literal ->} null on success. Completes exceptionally on error.
+ */
+ public CompletableFuture deleteCheckpoint(String partitionId);
+}
diff --git a/eventhubs/data-plane/azure-eventhubs-eph/src/main/java/com/microsoft/azure/eventprocessorhost/IEventProcessor.java b/eventhubs/data-plane/azure-eventhubs-eph/src/main/java/com/microsoft/azure/eventprocessorhost/IEventProcessor.java
new file mode 100644
index 0000000000000..4d52f262c6d36
--- /dev/null
+++ b/eventhubs/data-plane/azure-eventhubs-eph/src/main/java/com/microsoft/azure/eventprocessorhost/IEventProcessor.java
@@ -0,0 +1,74 @@
+/*
+ * Copyright (c) Microsoft. All rights reserved.
+ * Licensed under the MIT license. See LICENSE file in the project root for full license information.
+ */
+
+package com.microsoft.azure.eventprocessorhost;
+
+import com.microsoft.azure.eventhubs.EventData;
+
+
+/**
+ * Interface that must be implemented by event processor classes.
+ *
+ * Any given instance of an event processor class will only process events from one partition
+ * of one Event Hub. A PartitionContext is provided with each call to the event processor because
+ * some parameters could change, but it will always be the same partition.
+ *
+ * Although EventProcessorHost is multithreaded, calls to a given instance of an event processor
+ * class are serialized, except for onError(). onOpen() is called first, then onEvents() will be called zero or more
+ * times. When the event processor needs to be shut down, whether because there was a failure
+ * somewhere, or the lease for the partition has been lost, or because the entire processor host
+ * is being shut down, onClose() is called after the last onEvents() call returns.
+ *
+ * onError() could be called while onEvents() or onClose() is executing. No synchronization is attempted
+ * in order to avoid possibly deadlocking.
+ */
+public interface IEventProcessor {
+ /**
+ * Called by processor host to initialize the event processor.
+ *
+ * If onOpen fails, this event processor host instance will give up ownership of the partition.
+ *
+ * @param context Information about the partition that this event processor will process events from.
+ * @throws Exception to indicate failure.
+ */
+ public void onOpen(PartitionContext context) throws Exception;
+
+ /**
+ * Called by processor host to indicate that the event processor is being stopped.
+ *
+ * If onClose fails, the exception is reported to the general exception notification handler set via
+ * EventProcessorOptions, if any, but is otherwise ignored.
+ *
+ * @param context Information about the partition.
+ * @param reason Reason why the event processor is being stopped.
+ * @throws Exception to indicate failure.
+ */
+ public void onClose(PartitionContext context, CloseReason reason) throws Exception;
+
+ /**
+ * Called by the processor host when a batch of events has arrived.
+ *
+ * This is where the real work of the event processor is done. It is normally called when one
+ * or more events have arrived. If the EventProcessorHost instance was set up with an EventProcessorOptions
+ * on which setInvokeProcessorAfterReceiveTimeout(true) has been called, then when a receive times out,
+ * onEvents will be called with an empty iterable. By default this option is false and receive timeouts do not
+ * cause a call to this method.
+ *
+ * @param context Information about the partition.
+ * @param events The events to be processed. May be empty.
+ * @throws Exception to indicate failure.
+ */
+ public void onEvents(PartitionContext context, Iterable events) throws Exception;
+
+ /**
+ * Called when the underlying client experiences an error while receiving. EventProcessorHost will take
+ * care of recovering from the error and continuing to pump events, so no action is required from
+ * your code. This method is provided for informational purposes.
+ *
+ * @param context Information about the partition.
+ * @param error The error that occured.
+ */
+ public void onError(PartitionContext context, Throwable error);
+}
diff --git a/eventhubs/data-plane/azure-eventhubs-eph/src/main/java/com/microsoft/azure/eventprocessorhost/IEventProcessorFactory.java b/eventhubs/data-plane/azure-eventhubs-eph/src/main/java/com/microsoft/azure/eventprocessorhost/IEventProcessorFactory.java
new file mode 100644
index 0000000000000..2a5204a7f6dd0
--- /dev/null
+++ b/eventhubs/data-plane/azure-eventhubs-eph/src/main/java/com/microsoft/azure/eventprocessorhost/IEventProcessorFactory.java
@@ -0,0 +1,28 @@
+/*
+ * Copyright (c) Microsoft. All rights reserved.
+ * Licensed under the MIT license. See LICENSE file in the project root for full license information.
+ */
+
+package com.microsoft.azure.eventprocessorhost;
+
+
+/**
+ * Interface that must be implemented by an event processor factory class.
+ *
+ * User-provided factories are needed if creating an event processor object requires more work than
+ * just a new with a parameterless constructor.
+ *
+ * @param The type of event processor objects produced by this factory, which must implement IEventProcessor
+ */
+public interface IEventProcessorFactory {
+ /**
+ * Called to create an event processor for the given partition.
+ *
+ * If it throws an exception, that causes this event processor host instance to give up ownership of the partition.
+ *
+ * @param context Information about the partition that the event processor will handle events from.
+ * @throws Exception to indicate failure.
+ * @return The event processor object.
+ */
+ public T createEventProcessor(PartitionContext context) throws Exception;
+}
diff --git a/eventhubs/data-plane/azure-eventhubs-eph/src/main/java/com/microsoft/azure/eventprocessorhost/ILeaseManager.java b/eventhubs/data-plane/azure-eventhubs-eph/src/main/java/com/microsoft/azure/eventprocessorhost/ILeaseManager.java
new file mode 100644
index 0000000000000..ac6c8831033a5
--- /dev/null
+++ b/eventhubs/data-plane/azure-eventhubs-eph/src/main/java/com/microsoft/azure/eventprocessorhost/ILeaseManager.java
@@ -0,0 +1,145 @@
+/*
+ * Copyright (c) Microsoft. All rights reserved.
+ * Licensed under the MIT license. See LICENSE file in the project root for full license information.
+ */
+
+package com.microsoft.azure.eventprocessorhost;
+
+import java.util.List;
+import java.util.concurrent.CompletableFuture;
+
+/***
+ * If you wish to have EventProcessorHost store leases somewhere other than Azure Storage,
+ * you can write your own lease manager using this interface.
+ *
+ * The Azure Storage managers use the same storage for both lease and checkpoints, so both
+ * interfaces are implemented by the same class. You are free to do the same thing if you have
+ * a unified store for both types of data.
+ *
+ * This interface does not specify initialization methods because we have no way of knowing what
+ * information your implementation will require. If your implementation needs initialization, you
+ * will have to initialize the instance before passing it to the EventProcessorHost constructor.
+ */
+public interface ILeaseManager {
+ /**
+ * The lease duration is mostly internal to the lease manager implementation but may be needed
+ * by other parts of the event processor host.
+ *
+ * @return Duration of a lease before it expires unless renewed, specified in milliseconds.
+ */
+ public int getLeaseDurationInMilliseconds();
+
+ /**
+ * Does the lease store exist?
+ *
+ * The returned CompletableFuture completes with true if the checkpoint store exists or false if it
+ * does not. It completes exceptionally on error.
+ *
+ * @return CompletableFuture {@literal ->} true if it exists, false if not
+ */
+ public CompletableFuture leaseStoreExists();
+
+ /**
+ * Create the lease store if it does not exist, do nothing if it does exist.
+ *
+ * @return CompletableFuture {@literal ->} null on success, completes exceptionally on error.
+ */
+ public CompletableFuture createLeaseStoreIfNotExists();
+
+ /**
+ * Deletes the lease store.
+ *
+ * @return CompletableFuture {@literal ->} null on success, completes exceptionally on error.
+ */
+ public CompletableFuture deleteLeaseStore();
+
+ /**
+ * Returns the lease info for the given partition..
+ *
+ * @param partitionId Get the lease info for this partition.
+ * @return CompletableFuture {@literal ->} Lease, completes exceptionally on error.
+ */
+ public CompletableFuture getLease(String partitionId);
+
+ /**
+ * Returns lightweight BaseLease for all leases, which includes name of owning host and whether lease
+ * is expired. An implementation is free to return CompleteLease or its own class derived from CompleteLease,
+ * but it is important that getAllLeases run as fast as possible. If it is faster to obtain only the
+ * information required for a BaseLease, we heavily recommend doing that.
+ *
+ * @return CompletableFuture {@literal ->} list of BaseLease, completes exceptionally on error.
+ */
+ public CompletableFuture> getAllLeases();
+
+
+ /**
+ * Create in the store a lease for each of the given partitions, if it does not exist. Do nothing for any
+ * lease which exists in the store already.
+ *
+ * @param partitionIds ids of partitions to create lease info for
+ * @return CompletableFuture {@literal ->} null on success, completes exceptionally on error
+ */
+ public CompletableFuture createAllLeasesIfNotExists(List partitionIds);
+
+ /**
+ * Delete the lease info for a partition from the store. If there is no stored lease for the given partition,
+ * that is treated as success.
+ *
+ * @param lease the currently existing lease info for the partition
+ * @return CompletableFuture {@literal ->} null on success, completes exceptionally on error.
+ */
+ public CompletableFuture deleteLease(CompleteLease lease);
+
+ /**
+ * Acquire the lease on the desired partition for this EventProcessorHost.
+ *
+ * Note that it is legal to acquire a lease that is currently owned by another host, which is called "stealing".
+ * Lease-stealing is how partitions are redistributed when additional hosts are started.
+ *
+ * The existing Azure Storage implementation can experience races between two host instances attempting to acquire or steal
+ * the lease at the same time. To avoid situations where two host instances both believe that they own the lease, acquisition
+ * can fail non-exceptionally by returning false and should do so when there is any doubt -- the worst that can happen is that
+ * no host instance owns the lease for a short time. This is qualitatively different from, for example, the underlying store
+ * throwing an access exception, which is an error and should complete exceptionally.
+ *
+ * @param lease Lease info for the desired partition
+ * @return CompletableFuture {@literal ->} true if the lease was acquired, false if not, completes exceptionally on error.
+ */
+ public CompletableFuture acquireLease(CompleteLease lease);
+
+ /**
+ * Renew a lease currently held by this host instance.
+ *
+ * If the lease has been taken by another host instance (either stolen or after expiration) or explicitly released,
+ * renewLease must return false. With the Azure Storage-based implementation, it IS possible to renew an expired lease
+ * that has not been taken by another host, so your implementation can allow that or not, whichever is convenient. If
+ * it does not, renewLease should return false.
+ *
+ * @param lease Lease to be renewed
+ * @return true if the lease was renewed, false as described above, completes exceptionally on error.
+ */
+ public CompletableFuture renewLease(CompleteLease lease);
+
+ /**
+ * Give up a lease currently held by this host.
+ *
+ * If the lease has expired or been taken by another host, releasing it is unnecessary but will succeed since the intent
+ * has been fulfilled.
+ *
+ * @param lease Lease to be given up
+ * @return CompletableFuture {@literal ->} null on success, completes exceptionally on error.
+ */
+ public CompletableFuture releaseLease(CompleteLease lease);
+
+ /**
+ * Update the store with the information in the provided lease.
+ *
+ * It is necessary to currently hold a lease in order to update it. If the lease has been stolen, or expired, or
+ * released, it cannot be updated. Lease manager implementations should renew the lease before performing the update to avoid lease
+ * expiration during the process.
+ *
+ * @param lease New lease info to be stored
+ * @return true if the update was successful, false if lease was lost and could not be updated, completes exceptionally on error.
+ */
+ public CompletableFuture updateLease(CompleteLease lease);
+}
diff --git a/eventhubs/data-plane/azure-eventhubs-eph/src/main/java/com/microsoft/azure/eventprocessorhost/InMemoryCheckpointManager.java b/eventhubs/data-plane/azure-eventhubs-eph/src/main/java/com/microsoft/azure/eventprocessorhost/InMemoryCheckpointManager.java
new file mode 100644
index 0000000000000..29ba81a38e5cc
--- /dev/null
+++ b/eventhubs/data-plane/azure-eventhubs-eph/src/main/java/com/microsoft/azure/eventprocessorhost/InMemoryCheckpointManager.java
@@ -0,0 +1,164 @@
+/*
+ * Copyright (c) Microsoft. All rights reserved.
+ * Licensed under the MIT license. See LICENSE file in the project root for full license information.
+ */
+
+package com.microsoft.azure.eventprocessorhost;
+
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+import java.util.List;
+import java.util.concurrent.CompletableFuture;
+import java.util.concurrent.ConcurrentHashMap;
+
+/***
+ * An ICheckpointManager implementation based on an in-memory store.
+ *
+ * THIS CLASS IS PROVIDED AS A CONVENIENCE FOR TESTING ONLY. All data stored via this class is in memory
+ * only and not persisted in any way. In addition, it is only visible within the same process: multiple
+ * instances of EventProcessorHost in the same process will share the same in-memory store and checkpoints
+ * created by one will be visible to the others, but that is not true across processes.
+ *
+ * With an ordinary store, there is a clear and distinct line between the values that are persisted
+ * and the values that are live in memory. With an in-memory store, that line gets blurry. If we
+ * accidentally hand out a reference to the in-store object, then the calling code is operating on
+ * the "persisted" values without going through the manager and behavior will be very different.
+ * Hence, the implementation takes pains to distinguish between references to "live" and "persisted"
+ * checkpoints.
+ *
+ * To use this class, create a new instance and pass it to the EventProcessorHost constructor that takes
+ * ICheckpointManager as an argument. After the EventProcessorHost instance is constructed, be sure to
+ * call initialize() on this object before starting processing with EventProcessorHost.registerEventProcessor()
+ * or EventProcessorHost.registerEventProcessorFactory().
+ */
+public class InMemoryCheckpointManager implements ICheckpointManager {
+ private static final Logger TRACE_LOGGER = LoggerFactory.getLogger(InMemoryCheckpointManager.class);
+ private HostContext hostContext;
+
+ public InMemoryCheckpointManager() {
+ }
+
+ // This object is constructed before the EventProcessorHost and passed as an argument to
+ // EventProcessorHost's constructor. So it has to get context info later.
+ public void initialize(HostContext hostContext) {
+ this.hostContext = hostContext;
+ }
+
+ @Override
+ public CompletableFuture checkpointStoreExists() {
+ boolean exists = InMemoryCheckpointStore.singleton.existsMap();
+ TRACE_LOGGER.debug(this.hostContext.withHost("checkpointStoreExists() " + exists));
+ return CompletableFuture.completedFuture(exists);
+ }
+
+ @Override
+ public CompletableFuture createCheckpointStoreIfNotExists() {
+ TRACE_LOGGER.debug(this.hostContext.withHost("createCheckpointStoreIfNotExists()"));
+ InMemoryCheckpointStore.singleton.initializeMap();
+ return CompletableFuture.completedFuture(null);
+ }
+
+ @Override
+ public CompletableFuture deleteCheckpointStore() {
+ TRACE_LOGGER.debug(this.hostContext.withHost("deleteCheckpointStore()"));
+ InMemoryCheckpointStore.singleton.deleteMap();
+ return CompletableFuture.completedFuture(null);
+ }
+
+ @Override
+ public CompletableFuture getCheckpoint(String partitionId) {
+ Checkpoint returnCheckpoint = null;
+ Checkpoint checkpointInStore = InMemoryCheckpointStore.singleton.getCheckpoint(partitionId);
+ if (checkpointInStore == null) {
+ TRACE_LOGGER.warn(this.hostContext.withHostAndPartition(partitionId,
+ "getCheckpoint() no existing Checkpoint"));
+ returnCheckpoint = null;
+ } else if (checkpointInStore.getSequenceNumber() == -1) {
+ // Uninitialized, so return null.
+ TRACE_LOGGER.debug(this.hostContext.withHostAndPartition(partitionId, "getCheckpoint() uninitalized"));
+ returnCheckpoint = null;
+ } else {
+ TRACE_LOGGER.debug(this.hostContext.withHostAndPartition(partitionId,
+ "getCheckpoint() found " + checkpointInStore.getOffset() + "//" + checkpointInStore.getSequenceNumber()));
+ returnCheckpoint = new Checkpoint(checkpointInStore);
+ }
+ return CompletableFuture.completedFuture(returnCheckpoint);
+ }
+
+ @Override
+ public CompletableFuture createAllCheckpointsIfNotExists(List partitionIds) {
+ for (String id : partitionIds) {
+ Checkpoint checkpointInStore = InMemoryCheckpointStore.singleton.getCheckpoint(id);
+ if (checkpointInStore != null) {
+ TRACE_LOGGER.debug(this.hostContext.withHostAndPartition(id,
+ "createCheckpointIfNotExists() found existing checkpoint, OK"));
+ } else {
+ TRACE_LOGGER.debug(this.hostContext.withHostAndPartition(id,
+ "createCheckpointIfNotExists() creating new checkpoint"));
+ Checkpoint newStoreCheckpoint = new Checkpoint(id);
+ // This API actually creates the holder, not the checkpoint itself. In this implementation, we do create a Checkpoint object
+ // and put it in the store, but the values are set to indicate that it is not initialized.
+ newStoreCheckpoint.setOffset(null);
+ newStoreCheckpoint.setSequenceNumber(-1);
+ InMemoryCheckpointStore.singleton.setOrReplaceCheckpoint(newStoreCheckpoint);
+ }
+ }
+ return CompletableFuture.completedFuture(null);
+ }
+
+ @Override
+ public CompletableFuture updateCheckpoint(CompleteLease lease, Checkpoint checkpoint) {
+ TRACE_LOGGER.debug(this.hostContext.withHostAndPartition(checkpoint.getPartitionId(),
+ "updateCheckpoint() " + checkpoint.getOffset() + "//" + checkpoint.getSequenceNumber()));
+ Checkpoint checkpointInStore = InMemoryCheckpointStore.singleton.getCheckpoint(checkpoint.getPartitionId());
+ if (checkpointInStore != null) {
+ checkpointInStore.setOffset(checkpoint.getOffset());
+ checkpointInStore.setSequenceNumber(checkpoint.getSequenceNumber());
+ } else {
+ TRACE_LOGGER.warn(this.hostContext.withHostAndPartition(checkpoint.getPartitionId(),
+ "updateCheckpoint() can't find checkpoint"));
+ }
+ return CompletableFuture.completedFuture(null);
+ }
+
+ @Override
+ public CompletableFuture deleteCheckpoint(String partitionId) {
+ TRACE_LOGGER.debug(this.hostContext.withHostAndPartition(partitionId, "deleteCheckpoint()"));
+ InMemoryCheckpointStore.singleton.removeCheckpoint(partitionId);
+ return CompletableFuture.completedFuture(null);
+ }
+
+
+ private static class InMemoryCheckpointStore {
+ final static InMemoryCheckpointStore singleton = new InMemoryCheckpointStore();
+
+ private ConcurrentHashMap inMemoryCheckpointsPrivate = null;
+
+ synchronized boolean existsMap() {
+ return (this.inMemoryCheckpointsPrivate != null);
+ }
+
+ synchronized void initializeMap() {
+ if (this.inMemoryCheckpointsPrivate == null) {
+ this.inMemoryCheckpointsPrivate = new ConcurrentHashMap();
+ }
+ }
+
+ synchronized void deleteMap() {
+ this.inMemoryCheckpointsPrivate = null;
+ }
+
+ synchronized Checkpoint getCheckpoint(String partitionId) {
+ return this.inMemoryCheckpointsPrivate.get(partitionId);
+ }
+
+ synchronized void setOrReplaceCheckpoint(Checkpoint newCheckpoint) {
+ this.inMemoryCheckpointsPrivate.put(newCheckpoint.getPartitionId(), newCheckpoint);
+ }
+
+ synchronized void removeCheckpoint(String partitionId) {
+ this.inMemoryCheckpointsPrivate.remove(partitionId);
+ }
+ }
+}
diff --git a/eventhubs/data-plane/azure-eventhubs-eph/src/main/java/com/microsoft/azure/eventprocessorhost/InMemoryLeaseManager.java b/eventhubs/data-plane/azure-eventhubs-eph/src/main/java/com/microsoft/azure/eventprocessorhost/InMemoryLeaseManager.java
new file mode 100644
index 0000000000000..3e2a0332f6d74
--- /dev/null
+++ b/eventhubs/data-plane/azure-eventhubs-eph/src/main/java/com/microsoft/azure/eventprocessorhost/InMemoryLeaseManager.java
@@ -0,0 +1,416 @@
+/*
+ * Copyright (c) Microsoft. All rights reserved.
+ * Licensed under the MIT license. See LICENSE file in the project root for full license information.
+ */
+
+package com.microsoft.azure.eventprocessorhost;
+
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+import java.util.ArrayList;
+import java.util.List;
+import java.util.concurrent.*;
+
+/***
+ * An ILeaseManager implementation based on an in-memory store.
+ *
+ * THIS CLASS IS PROVIDED AS A CONVENIENCE FOR TESTING ONLY. All data stored via this class is in memory
+ * only and not persisted in any way. In addition, it is only visible within the same process: multiple
+ * instances of EventProcessorHost in the same process will share the same in-memory store and leases
+ * created by one will be visible to the others, but that is not true across processes.
+ *
+ * With an ordinary store, there is a clear and distinct line between the values that are persisted
+ * and the values that are live in memory. With an in-memory store, that line gets blurry. If we
+ * accidentally hand out a reference to the in-store object, then the calling code is operating on
+ * the "persisted" values without going through the manager and behavior will be very different.
+ * Hence, the implementation takes pains to distinguish between references to "live" and "persisted"
+ * checkpoints.
+ *
+ * To use this class, create a new instance and pass it to the EventProcessorHost constructor that takes
+ * ILeaseManager as an argument. After the EventProcessorHost instance is constructed, be sure to
+ * call initialize() on this object before starting processing with EventProcessorHost.registerEventProcessor()
+ * or EventProcessorHost.registerEventProcessorFactory().
+ */
+public class InMemoryLeaseManager implements ILeaseManager {
+ private final static Logger TRACE_LOGGER = LoggerFactory.getLogger(InMemoryLeaseManager.class);
+ private HostContext hostContext;
+ private long millisecondsLatency = 0;
+
+ public InMemoryLeaseManager() {
+ }
+
+ // This object is constructed before the EventProcessorHost and passed as an argument to
+ // EventProcessorHost's constructor. So it has to get context info later.
+ public void initialize(HostContext hostContext) {
+ this.hostContext = hostContext;
+ }
+
+ public void setLatency(long milliseconds) {
+ this.millisecondsLatency = milliseconds;
+ }
+
+ private void latency(String caller) {
+ if (this.millisecondsLatency > 0) {
+ try {
+ //TRACE_LOGGER.info("sleep " + caller);
+ Thread.sleep(this.millisecondsLatency);
+ } catch (InterruptedException e) {
+ // Don't care
+ TRACE_LOGGER.info("sleepFAIL " + caller);
+ }
+ }
+ }
+
+ @Override
+ public int getLeaseDurationInMilliseconds() {
+ return this.hostContext.getPartitionManagerOptions().getLeaseDurationInSeconds() * 1000;
+ }
+
+ @Override
+ public CompletableFuture leaseStoreExists() {
+ boolean exists = InMemoryLeaseStore.singleton.existsMap();
+ latency("leaseStoreExists");
+ TRACE_LOGGER.debug(this.hostContext.withHost("leaseStoreExists() " + exists));
+ return CompletableFuture.completedFuture(exists);
+ }
+
+ @Override
+ public CompletableFuture createLeaseStoreIfNotExists() {
+ TRACE_LOGGER.debug(this.hostContext.withHost("createLeaseStoreIfNotExists()"));
+ InMemoryLeaseStore.singleton.initializeMap(getLeaseDurationInMilliseconds());
+ latency("createLeaseStoreIfNotExists");
+ return CompletableFuture.completedFuture(null);
+ }
+
+ @Override
+ public CompletableFuture deleteLeaseStore() {
+ TRACE_LOGGER.debug(this.hostContext.withHost("deleteLeaseStore()"));
+ InMemoryLeaseStore.singleton.deleteMap();
+ latency("deleteLeaseStore");
+ return CompletableFuture.completedFuture(null);
+ }
+
+ @Override
+ public CompletableFuture getLease(String partitionId) {
+ TRACE_LOGGER.debug(this.hostContext.withHost("getLease()"));
+ latency("getLease");
+ InMemoryLease leaseInStore = InMemoryLeaseStore.singleton.getLease(partitionId);
+ return CompletableFuture.completedFuture(new InMemoryLease(leaseInStore));
+ }
+
+ @Override
+ public CompletableFuture> getAllLeases() {
+ ArrayList infos = new ArrayList();
+ for (String id : InMemoryLeaseStore.singleton.getPartitionIds()) {
+ InMemoryLease leaseInStore = InMemoryLeaseStore.singleton.getLease(id);
+ infos.add(new BaseLease(id, leaseInStore.getOwner(), !leaseInStore.isExpiredSync()));
+ }
+ latency("getAllLeasesStateInfo");
+ return CompletableFuture.completedFuture(infos);
+ }
+
+ @Override
+ public CompletableFuture createAllLeasesIfNotExists(List partitionIds) {
+ ArrayList> createFutures = new ArrayList>();
+
+ // Implemented like this to provide an experience more similar to lease creation in the Storage-based manager.
+ for (String id : partitionIds) {
+ final String workingId = id;
+ CompletableFuture oneCreate = CompletableFuture.supplyAsync(() -> {
+ InMemoryLease leaseInStore = InMemoryLeaseStore.singleton.getLease(workingId);
+ InMemoryLease returnLease = null;
+ if (leaseInStore != null) {
+ TRACE_LOGGER.debug(this.hostContext.withHostAndPartition(workingId,
+ "createLeaseIfNotExists() found existing lease, OK"));
+ returnLease = new InMemoryLease(leaseInStore);
+ } else {
+ TRACE_LOGGER.debug(this.hostContext.withHostAndPartition(workingId,
+ "createLeaseIfNotExists() creating new lease"));
+ InMemoryLease newStoreLease = new InMemoryLease(workingId);
+ InMemoryLeaseStore.singleton.setOrReplaceLease(newStoreLease);
+ returnLease = new InMemoryLease(newStoreLease);
+ }
+ latency("createLeaseIfNotExists " + workingId);
+ return returnLease;
+ }, this.hostContext.getExecutor());
+ createFutures.add(oneCreate);
+ }
+
+ CompletableFuture> dummy[] = new CompletableFuture>[createFutures.size()];
+ return CompletableFuture.allOf(createFutures.toArray(dummy));
+ }
+
+ @Override
+ public CompletableFuture deleteLease(CompleteLease lease) {
+ TRACE_LOGGER.debug(this.hostContext.withHostAndPartition(lease, "deleteLease()"));
+ InMemoryLeaseStore.singleton.removeLease((InMemoryLease) lease);
+ latency("deleteLease " + lease.getPartitionId());
+ return CompletableFuture.completedFuture(null);
+ }
+
+ @Override
+ public CompletableFuture acquireLease(CompleteLease lease) {
+ InMemoryLease leaseToAcquire = (InMemoryLease) lease;
+
+ TRACE_LOGGER.debug(this.hostContext.withHostAndPartition(leaseToAcquire, "acquireLease()"));
+
+ boolean retval = true;
+ InMemoryLease leaseInStore = InMemoryLeaseStore.singleton.getLease(leaseToAcquire.getPartitionId());
+ if (leaseInStore != null) {
+ InMemoryLease wasUnowned = InMemoryLeaseStore.singleton.atomicAquireUnowned(leaseToAcquire.getPartitionId(), this.hostContext.getHostName());
+ if (wasUnowned != null) {
+ // atomicAcquireUnowned already set ownership of the persisted lease, just update the live lease.
+ leaseToAcquire.setOwner(this.hostContext.getHostName());
+ TRACE_LOGGER.debug(this.hostContext.withHostAndPartition(leaseToAcquire,
+ "acquireLease() acquired lease"));
+ leaseInStore = wasUnowned;
+ leaseToAcquire.setExpirationTime(leaseInStore.getExpirationTime());
+ } else {
+ if (leaseInStore.isOwnedBy(this.hostContext.getHostName())) {
+ TRACE_LOGGER.debug(this.hostContext.withHostAndPartition(leaseToAcquire,
+ "acquireLease() already hold lease"));
+ } else {
+ String oldOwner = leaseInStore.getOwner();
+ // Make change in both persisted lease and live lease!
+ InMemoryLeaseStore.singleton.stealLease(leaseInStore, this.hostContext.getHostName());
+ leaseToAcquire.setOwner(this.hostContext.getHostName());
+ TRACE_LOGGER.debug(this.hostContext.withHostAndPartition(leaseToAcquire,
+ "acquireLease() stole lease from " + oldOwner));
+ }
+ long newExpiration = System.currentTimeMillis() + getLeaseDurationInMilliseconds();
+ // Make change in both persisted lease and live lease!
+ leaseInStore.setExpirationTime(newExpiration);
+ leaseToAcquire.setExpirationTime(newExpiration);
+ }
+ } else {
+ TRACE_LOGGER.warn(this.hostContext.withHostAndPartition(leaseToAcquire,
+ "acquireLease() can't find lease"));
+ retval = false;
+ }
+
+ latency("acquireLease " + lease.getPartitionId());
+ return CompletableFuture.completedFuture(retval);
+ }
+
+ // Real partition pumps get "notified" when another host has stolen their lease because the receiver throws
+ // a ReceiverDisconnectedException. It doesn't matter how many hosts try to steal the lease at the same time,
+ // only one will end up with it and that one will kick the others off via the exclusivity of epoch receivers.
+ // This mechanism simulates that for dummy partition pumps used in testing. If expectedOwner does not currently
+ // own the lease for the given partition, then notifier is called immediately, otherwise it is called whenever
+ // ownership of the lease changes.
+ public void notifyOnSteal(String expectedOwner, String partitionId, Callable> notifier) {
+ InMemoryLeaseStore.singleton.notifyOnSteal(expectedOwner, partitionId, notifier);
+ }
+
+ @Override
+ public CompletableFuture renewLease(CompleteLease lease) {
+ InMemoryLease leaseToRenew = (InMemoryLease) lease;
+
+ TRACE_LOGGER.debug(this.hostContext.withHostAndPartition(leaseToRenew, "renewLease()"));
+
+ boolean retval = true;
+ InMemoryLease leaseInStore = InMemoryLeaseStore.singleton.getLease(leaseToRenew.getPartitionId());
+ if (leaseInStore != null) {
+ // MATCH BEHAVIOR OF AzureStorageCheckpointLeaseManager:
+ // Renewing a lease that has expired succeeds unless some other host has grabbed it already.
+ // So don't check expiration, just ownership.
+ if (leaseInStore.isOwnedBy(this.hostContext.getHostName())) {
+ long newExpiration = System.currentTimeMillis() + getLeaseDurationInMilliseconds();
+ // Make change in both persisted lease and live lease!
+ leaseInStore.setExpirationTime(newExpiration);
+ leaseToRenew.setExpirationTime(newExpiration);
+ } else {
+ TRACE_LOGGER.debug(this.hostContext.withHostAndPartition(leaseToRenew,
+ "renewLease() not renewed because we don't own lease"));
+ retval = false;
+ }
+ } else {
+ TRACE_LOGGER.warn(this.hostContext.withHostAndPartition(leaseToRenew,
+ "renewLease() can't find lease"));
+ retval = false;
+ }
+
+ latency("renewLease " + lease.getPartitionId());
+ return CompletableFuture.completedFuture(retval);
+ }
+
+ @Override
+ public CompletableFuture releaseLease(CompleteLease lease) {
+ InMemoryLease leaseToRelease = (InMemoryLease) lease;
+
+ CompletableFuture retval = CompletableFuture.completedFuture(null);
+
+ TRACE_LOGGER.debug(this.hostContext.withHostAndPartition(leaseToRelease, "releaseLease()"));
+
+ InMemoryLease leaseInStore = InMemoryLeaseStore.singleton.getLease(leaseToRelease.getPartitionId());
+ if (leaseInStore != null) {
+ if (!leaseInStore.isExpiredSync() && leaseInStore.isOwnedBy(this.hostContext.getHostName())) {
+ TRACE_LOGGER.debug(this.hostContext.withHostAndPartition(leaseToRelease, "releaseLease() released OK"));
+ // Make change in both persisted lease and live lease!
+ leaseInStore.setOwner("");
+ leaseToRelease.setOwner("");
+ leaseInStore.setExpirationTime(0);
+ leaseToRelease.setExpirationTime(0);
+ } else {
+ // Lease was lost, intent achieved.
+ }
+ } else {
+ TRACE_LOGGER.warn(this.hostContext.withHostAndPartition(leaseToRelease, "releaseLease() can't find lease in store"));
+ retval = new CompletableFuture();
+ retval.completeExceptionally(new CompletionException(new RuntimeException("releaseLease can't find lease in store for " + leaseToRelease.getPartitionId())));
+ }
+ latency("releaseLease " + lease.getPartitionId());
+ return retval;
+ }
+
+ @Override
+ public CompletableFuture updateLease(CompleteLease lease) {
+ InMemoryLease leaseToUpdate = (InMemoryLease) lease;
+
+ TRACE_LOGGER.debug(this.hostContext.withHostAndPartition(leaseToUpdate, "updateLease()"));
+
+ // Renew lease first so it doesn't expire in the middle.
+ return renewLease(leaseToUpdate).thenApply((retval) ->
+ {
+ if (retval) {
+ InMemoryLease leaseInStore = InMemoryLeaseStore.singleton.getLease(leaseToUpdate.getPartitionId());
+ if (leaseInStore != null) {
+ if (!leaseInStore.isExpiredSync() && leaseInStore.isOwnedBy(this.hostContext.getHostName())) {
+ // We are updating with values already in the live lease, so only need to set on the persisted lease.
+ leaseInStore.setEpoch(leaseToUpdate.getEpoch());
+ // Don't copy expiration time, that is managed directly by Acquire/Renew/Release
+ } else {
+ TRACE_LOGGER.debug(this.hostContext.withHostAndPartition(leaseToUpdate,
+ "updateLease() not updated because we don't own lease"));
+ retval = false;
+ }
+ } else {
+ TRACE_LOGGER.warn(this.hostContext.withHostAndPartition(leaseToUpdate,
+ "updateLease() can't find lease"));
+ retval = false;
+ }
+ }
+ latency("updateLease " + lease.getPartitionId());
+ return retval;
+ });
+ }
+
+
+ private static class InMemoryLeaseStore {
+ final static InMemoryLeaseStore singleton = new InMemoryLeaseStore();
+ private static int leaseDurationInMilliseconds;
+
+ private ConcurrentHashMap inMemoryLeasesPrivate = null;
+ private ConcurrentHashMap> notifiers = new ConcurrentHashMap>();
+
+ synchronized boolean existsMap() {
+ return (this.inMemoryLeasesPrivate != null);
+ }
+
+ synchronized void initializeMap(int leaseDurationInMilliseconds) {
+ if (this.inMemoryLeasesPrivate == null) {
+ this.inMemoryLeasesPrivate = new ConcurrentHashMap();
+ }
+ InMemoryLeaseStore.leaseDurationInMilliseconds = leaseDurationInMilliseconds;
+ }
+
+ synchronized void deleteMap() {
+ this.inMemoryLeasesPrivate = null;
+ }
+
+ synchronized InMemoryLease getLease(String partitionId) {
+ return this.inMemoryLeasesPrivate.get(partitionId);
+ }
+
+ synchronized List getPartitionIds() {
+ ArrayList ids = new ArrayList();
+ this.inMemoryLeasesPrivate.keySet().forEach((key) ->
+ {
+ ids.add(key);
+ });
+ return ids;
+ }
+
+ synchronized InMemoryLease atomicAquireUnowned(String partitionId, String newOwner) {
+ InMemoryLease leaseInStore = getLease(partitionId);
+ if (leaseInStore.isExpiredSync() || (leaseInStore.getOwner() == null) || leaseInStore.getOwner().isEmpty()) {
+ leaseInStore.setOwner(newOwner);
+ leaseInStore.setExpirationTime(System.currentTimeMillis() + InMemoryLeaseStore.leaseDurationInMilliseconds);
+ } else {
+ // Return null if it was already owned
+ leaseInStore = null;
+ }
+ return leaseInStore;
+ }
+
+ synchronized void notifyOnSteal(String expectedOwner, String partitionId, Callable> notifier) {
+ InMemoryLease leaseInStore = getLease(partitionId);
+ if (!leaseInStore.isOwnedBy(expectedOwner)) {
+ // Already stolen.
+ try {
+ notifier.call();
+ } catch (Exception e) {
+ }
+ } else {
+ this.notifiers.put(partitionId, notifier);
+ }
+ }
+
+ synchronized void stealLease(InMemoryLease stealee, String newOwner) {
+ stealee.setOwner(newOwner);
+ Callable> notifier = this.notifiers.get(stealee.getPartitionId());
+ if (notifier != null) {
+ try {
+ notifier.call();
+ } catch (Exception e) {
+ }
+ }
+ }
+
+ synchronized void setOrReplaceLease(InMemoryLease newLease) {
+ this.inMemoryLeasesPrivate.put(newLease.getPartitionId(), newLease);
+ }
+
+ synchronized void removeLease(InMemoryLease goneLease) {
+ this.inMemoryLeasesPrivate.remove(goneLease.getPartitionId());
+ }
+ }
+
+
+ private static class InMemoryLease extends CompleteLease {
+ private final static Logger TRACE_LOGGER = LoggerFactory.getLogger(InMemoryLease.class);
+ private long expirationTimeMillis = 0;
+
+ InMemoryLease(String partitionId) {
+ super(partitionId);
+ this.epoch = 0;
+ }
+
+ InMemoryLease(InMemoryLease source) {
+ super(source);
+ this.expirationTimeMillis = source.expirationTimeMillis;
+ this.epoch = source.epoch;
+ }
+
+ long getExpirationTime() {
+ return this.expirationTimeMillis;
+ }
+
+ void setExpirationTime(long expireAtMillis) {
+ this.expirationTimeMillis = expireAtMillis;
+ }
+
+ public boolean isExpiredSync() {
+ boolean hasExpired = (System.currentTimeMillis() >= this.expirationTimeMillis);
+ if (hasExpired) {
+ // CHANGE TO MATCH BEHAVIOR OF AzureStorageCheckpointLeaseManager
+ // An expired lease can be renewed by the previous owner. In order to implement that behavior for
+ // InMemory, the owner field has to remain unchanged.
+ //setOwner("");
+ }
+ TRACE_LOGGER.debug("isExpired(" + this.getPartitionId() + (hasExpired ? ") expired " : ") leased ") + (this.expirationTimeMillis - System.currentTimeMillis()));
+ return hasExpired;
+ }
+ }
+}
diff --git a/eventhubs/data-plane/azure-eventhubs-eph/src/main/java/com/microsoft/azure/eventprocessorhost/LeaseLostException.java b/eventhubs/data-plane/azure-eventhubs-eph/src/main/java/com/microsoft/azure/eventprocessorhost/LeaseLostException.java
new file mode 100644
index 0000000000000..ed9482db73592
--- /dev/null
+++ b/eventhubs/data-plane/azure-eventhubs-eph/src/main/java/com/microsoft/azure/eventprocessorhost/LeaseLostException.java
@@ -0,0 +1,27 @@
+/*
+ * Copyright (c) Microsoft. All rights reserved.
+ * Licensed under the MIT license. See LICENSE file in the project root for full license information.
+ */
+
+package com.microsoft.azure.eventprocessorhost;
+
+public class LeaseLostException extends Exception {
+ private static final long serialVersionUID = -4625001822439809869L;
+
+ private final BaseLease lease;
+
+ LeaseLostException(BaseLease lease, Throwable cause) {
+ super(null, cause);
+ this.lease = lease;
+ }
+
+ LeaseLostException(BaseLease lease, String message) {
+ super(message, null);
+ this.lease = lease;
+ }
+
+ // We don't want to expose Lease to the public.
+ public String getPartitionId() {
+ return this.lease.getPartitionId();
+ }
+}
diff --git a/eventhubs/data-plane/azure-eventhubs-eph/src/main/java/com/microsoft/azure/eventprocessorhost/LoggingUtils.java b/eventhubs/data-plane/azure-eventhubs-eph/src/main/java/com/microsoft/azure/eventprocessorhost/LoggingUtils.java
new file mode 100644
index 0000000000000..e26d9e8778bd1
--- /dev/null
+++ b/eventhubs/data-plane/azure-eventhubs-eph/src/main/java/com/microsoft/azure/eventprocessorhost/LoggingUtils.java
@@ -0,0 +1,75 @@
+/*
+ * Copyright (c) Microsoft. All rights reserved.
+ * Licensed under the MIT license. See LICENSE file in the project root for full license information.
+ */
+
+package com.microsoft.azure.eventprocessorhost;
+
+import java.util.concurrent.CompletionException;
+import java.util.concurrent.ExecutionException;
+import java.util.concurrent.ScheduledExecutorService;
+import java.util.concurrent.ThreadPoolExecutor;
+
+/**
+ * Centralize log message generation
+ */
+public final class LoggingUtils {
+ static CompletionException wrapException(Throwable e, String action) {
+ return new CompletionException(new ExceptionWithAction(e, action));
+ }
+
+ static CompletionException wrapExceptionWithMessage(Throwable e, String message, String action) {
+ return new CompletionException(new ExceptionWithAction(e, message, action));
+ }
+
+ // outAction can be null if you don't care about any action string
+ static Throwable unwrapException(Throwable wrapped, StringBuilder outAction) {
+ Throwable unwrapped = wrapped;
+
+ while ((unwrapped instanceof ExecutionException) || (unwrapped instanceof CompletionException) ||
+ (unwrapped instanceof ExceptionWithAction)) {
+ if ((unwrapped instanceof ExceptionWithAction) && (outAction != null)) {
+ // Save the action string from an ExceptionWithAction, if desired.
+ outAction.append(((ExceptionWithAction) unwrapped).getAction());
+ }
+
+ if ((unwrapped.getCause() != null) && (unwrapped.getCause() instanceof Exception)) {
+ unwrapped = (Exception) unwrapped.getCause();
+ } else {
+ break;
+ }
+ }
+
+ return unwrapped;
+ }
+
+ static String threadPoolStatusReport(String hostName, ScheduledExecutorService threadPool) {
+ String report = "";
+
+ if (threadPool instanceof ThreadPoolExecutor) {
+ ThreadPoolExecutor pool = (ThreadPoolExecutor) threadPool;
+
+ StringBuilder builder = new StringBuilder();
+ builder.append("Thread pool settings: core: ");
+ builder.append(pool.getCorePoolSize());
+ builder.append(" active: ");
+ builder.append(pool.getActiveCount());
+ builder.append(" current: ");
+ builder.append(pool.getPoolSize());
+ builder.append(" largest: ");
+ builder.append(pool.getLargestPoolSize());
+ builder.append(" max: ");
+ builder.append(pool.getMaximumPoolSize());
+ builder.append(" policy: ");
+ builder.append(pool.getRejectedExecutionHandler().getClass().toString());
+ builder.append(" queue avail: ");
+ builder.append(pool.getQueue().remainingCapacity());
+
+ report = builder.toString();
+ } else {
+ report = "Cannot report on thread pool of type " + threadPool.getClass().toString();
+ }
+
+ return report;
+ }
+}
diff --git a/eventhubs/data-plane/azure-eventhubs-eph/src/main/java/com/microsoft/azure/eventprocessorhost/PartitionContext.java b/eventhubs/data-plane/azure-eventhubs-eph/src/main/java/com/microsoft/azure/eventprocessorhost/PartitionContext.java
new file mode 100644
index 0000000000000..601baab2d469e
--- /dev/null
+++ b/eventhubs/data-plane/azure-eventhubs-eph/src/main/java/com/microsoft/azure/eventprocessorhost/PartitionContext.java
@@ -0,0 +1,202 @@
+/*
+ * Copyright (c) Microsoft. All rights reserved.
+ * Licensed under the MIT license. See LICENSE file in the project root for full license information.
+ */
+
+package com.microsoft.azure.eventprocessorhost;
+
+import com.microsoft.azure.eventhubs.EventData;
+import com.microsoft.azure.eventhubs.EventPosition;
+import com.microsoft.azure.eventhubs.ReceiverRuntimeInformation;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+import java.util.concurrent.CompletableFuture;
+import java.util.function.Function;
+
+/***
+ * PartitionContext is used to provide partition-related information to the methods of IEventProcessor,
+ * particularly onEvents where the user's event-processing logic lives. It also allows the user to
+ * persist checkpoints for the partition, which determine where event processing will begin if the
+ * event processor for that partition must be restarted, such as if ownership of the partition moves
+ * from one event processor host instance to another.
+ */
+public class PartitionContext {
+ private static final Logger TRACE_LOGGER = LoggerFactory.getLogger(PartitionContext.class);
+ private final HostContext hostContext;
+ private final String partitionId;
+ private CompleteLease lease;
+ private String offset = null;
+ private long sequenceNumber = 0;
+ private ReceiverRuntimeInformation runtimeInformation;
+
+ PartitionContext(HostContext hostContext, String partitionId) {
+ this.hostContext = hostContext;
+ this.partitionId = partitionId;
+
+ this.runtimeInformation = new ReceiverRuntimeInformation(partitionId);
+ }
+
+ /***
+ * Get the name of the consumer group that is being received from.
+ *
+ * @return consumer group name
+ */
+ public String getConsumerGroupName() {
+ return this.hostContext.getConsumerGroupName();
+ }
+
+ /***
+ * Get the path of the event hub that is being received from.
+ *
+ * @return event hub path
+ */
+ public String getEventHubPath() {
+ return this.hostContext.getEventHubPath();
+ }
+
+ /***
+ * Get the name of the event processor host instance.
+ *
+ * @return event processor host instance name
+ */
+ public String getOwner() {
+ return this.lease.getOwner();
+ }
+
+ /***
+ * If receiver runtime metrics have been enabled in EventProcessorHost, this method
+ * gets the metrics as they come in.
+ *
+ * @return See ReceiverRuntimeInformation.
+ */
+ public ReceiverRuntimeInformation getRuntimeInformation() {
+ return this.runtimeInformation;
+ }
+
+ void setRuntimeInformation(ReceiverRuntimeInformation value) {
+ this.runtimeInformation = value;
+ }
+
+ CompleteLease getLease() {
+ return this.lease;
+ }
+
+ // Unlike other properties which are immutable after creation, the lease is updated dynamically and needs a setter.
+ void setLease(CompleteLease lease) {
+ this.lease = lease;
+ }
+
+ void setOffsetAndSequenceNumber(EventData event) {
+ if (event.getSystemProperties().getSequenceNumber() >= this.sequenceNumber) {
+ this.offset = event.getSystemProperties().getOffset();
+ this.sequenceNumber = event.getSystemProperties().getSequenceNumber();
+ } else {
+ TRACE_LOGGER.info(this.hostContext.withHostAndPartition(this.partitionId,
+ "setOffsetAndSequenceNumber(" + event.getSystemProperties().getOffset() + "//" +
+ event.getSystemProperties().getSequenceNumber() + ") would move backwards, ignoring"));
+ }
+ }
+
+ /***
+ * Get the id of the partition being received from.
+ *
+ * @return partition id
+ */
+ public String getPartitionId() {
+ return this.partitionId;
+ }
+
+ // Returns a String (offset) or Instant (timestamp).
+ CompletableFuture getInitialOffset() {
+ return this.hostContext.getCheckpointManager().getCheckpoint(this.partitionId)
+ .thenApply((startingCheckpoint) ->
+ {
+ return checkpointToOffset(startingCheckpoint);
+ });
+ }
+
+ EventPosition checkpointToOffset(Checkpoint startingCheckpoint) {
+ EventPosition startAt = null;
+ if (startingCheckpoint == null) {
+ // No checkpoint was ever stored. Use the initialOffsetProvider instead.
+ Function initialPositionProvider = this.hostContext.getEventProcessorOptions().getInitialPositionProvider();
+ TRACE_LOGGER.debug(this.hostContext.withHostAndPartition(this.partitionId, "Calling user-provided initial position provider"));
+ startAt = initialPositionProvider.apply(this.partitionId);
+ // Leave this.offset as null. The initialPositionProvider cannot provide enough information to write a valid checkpoint:
+ // at most if will give one of offset or sequence number, and if it is a starting time then it doesn't have either.
+ TRACE_LOGGER.info(this.hostContext.withHostAndPartition(this.partitionId, "Initial position provided: " + startAt));
+ } else {
+ // Checkpoint is valid, use it.
+ this.offset = startingCheckpoint.getOffset();
+ startAt = EventPosition.fromOffset(this.offset);
+ this.sequenceNumber = startingCheckpoint.getSequenceNumber();
+ TRACE_LOGGER.info(this.hostContext.withHostAndPartition(this.partitionId, "Retrieved starting offset " + this.offset + "//" + this.sequenceNumber));
+ }
+
+ return startAt;
+ }
+
+ /**
+ * Writes the position of the last event in the current batch to the checkpoint store via the checkpoint manager.
+ *
+ * It is important to check the result in order to detect failures.
+ *
+ * If receiving started from a user-provided EventPosition and no events have been received yet,
+ * then this will fail. (This scenario is possible when invoke-after-receive-timeout has been set
+ * in EventProcessorOptions.)
+ *
+ * @return CompletableFuture {@literal ->} null when the checkpoint has been persisted successfully, completes exceptionally on error.
+ */
+ public CompletableFuture checkpoint() {
+ CompletableFuture result = null;
+ if (this.offset == null) {
+ result = new CompletableFuture();
+ result.completeExceptionally(new RuntimeException("Cannot checkpoint until at least one event has been received on this partition"));
+ } else {
+ Checkpoint capturedCheckpoint = new Checkpoint(this.partitionId, this.offset, this.sequenceNumber);
+ result = checkpoint(capturedCheckpoint);
+ }
+ return result;
+ }
+
+ /**
+ * Writes the position of the provided EventData instance to the checkpoint store via the checkpoint manager.
+ *
+ * It is important to check the result in order to detect failures.
+ *
+ * @param event A received EventData
+ * @return CompletableFuture {@literal ->} null when the checkpoint has been persisted successfully, completes exceptionally on error.
+ */
+ public CompletableFuture checkpoint(EventData event) {
+ CompletableFuture result = null;
+ if (event == null) {
+ result = new CompletableFuture();
+ result.completeExceptionally(new IllegalArgumentException("Cannot checkpoint with null EventData"));
+ } else {
+ result = checkpoint(new Checkpoint(this.partitionId, event.getSystemProperties().getOffset(), event.getSystemProperties().getSequenceNumber()));
+ }
+ return result;
+ }
+
+ /**
+ * Writes the position of the provided Checkpoint instance to the checkpoint store via the checkpoint manager.
+ *
+ * It is important to check the result in order to detect failures.
+ *
+ * @param checkpoint a checkpoint
+ * @return CompletableFuture {@literal ->} null when the checkpoint has been persisted successfully, completes exceptionally on error.
+ */
+ public CompletableFuture checkpoint(Checkpoint checkpoint) {
+ CompletableFuture result = null;
+ if (checkpoint == null) {
+ result = new CompletableFuture();
+ result.completeExceptionally(new IllegalArgumentException("Cannot checkpoint with null Checkpoint"));
+ } else {
+ TRACE_LOGGER.debug(this.hostContext.withHostAndPartition(checkpoint.getPartitionId(),
+ "Saving checkpoint: " + checkpoint.getOffset() + "//" + checkpoint.getSequenceNumber()));
+ result = this.hostContext.getCheckpointManager().updateCheckpoint(this.lease, checkpoint);
+ }
+ return result;
+ }
+}
diff --git a/eventhubs/data-plane/azure-eventhubs-eph/src/main/java/com/microsoft/azure/eventprocessorhost/PartitionManager.java b/eventhubs/data-plane/azure-eventhubs-eph/src/main/java/com/microsoft/azure/eventprocessorhost/PartitionManager.java
new file mode 100644
index 0000000000000..ffad0a9de9305
--- /dev/null
+++ b/eventhubs/data-plane/azure-eventhubs-eph/src/main/java/com/microsoft/azure/eventprocessorhost/PartitionManager.java
@@ -0,0 +1,334 @@
+/*
+ * Copyright (c) Microsoft. All rights reserved.
+ * Licensed under the MIT license. See LICENSE file in the project root for full license information.
+ */
+
+package com.microsoft.azure.eventprocessorhost;
+
+import com.microsoft.azure.eventhubs.EventHubClient;
+import com.microsoft.azure.eventhubs.EventHubException;
+import com.microsoft.azure.eventhubs.EventHubRuntimeInformation;
+import com.microsoft.azure.eventhubs.IllegalEntityException;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+import java.io.IOException;
+import java.util.Arrays;
+import java.util.concurrent.*;
+
+class PartitionManager extends Closable {
+ private static final Logger TRACE_LOGGER = LoggerFactory.getLogger(PartitionManager.class);
+ // Protected instead of private for testability
+ protected final HostContext hostContext;
+ final private Object scanFutureSynchronizer = new Object();
+ private final int retryMax = 5;
+ protected PumpManager pumpManager = null;
+ protected volatile String partitionIds[] = null;
+ private ScheduledFuture> scanFuture = null;
+
+ PartitionManager(HostContext hostContext) {
+ super(null);
+ this.hostContext = hostContext;
+ }
+
+ CompletableFuture cachePartitionIds() {
+ CompletableFuture retval = null;
+
+ if (this.partitionIds != null) {
+ retval = CompletableFuture.completedFuture(null);
+ } else {
+ // This try-catch is necessary because EventHubClient.create can directly throw
+ // EventHubException or IOException, in addition to whatever failures may occur when the result of
+ // the CompletableFuture is evaluated.
+ try {
+ final CompletableFuture cleanupFuture = new CompletableFuture();
+
+ // Stage 0A: get EventHubClient for the event hub
+ retval = EventHubClient.create(this.hostContext.getEventHubConnectionString(), this.hostContext.getRetryPolicy(), this.hostContext.getExecutor())
+ // Stage 0B: set up a way to close the EventHubClient when we're done
+ .thenApplyAsync((ehClient) ->
+ {
+ final EventHubClient saveForCleanupClient = ehClient;
+ cleanupFuture.thenComposeAsync((empty) -> saveForCleanupClient.close(), this.hostContext.getExecutor());
+ return ehClient;
+ }, this.hostContext.getExecutor())
+ // Stage 1: use the client to get runtime info for the event hub
+ .thenComposeAsync((ehClient) -> ehClient.getRuntimeInformation(), this.hostContext.getExecutor())
+ // Stage 2: extract the partition ids from the runtime info or throw on null (timeout)
+ .thenAcceptAsync((EventHubRuntimeInformation ehInfo) ->
+ {
+ if (ehInfo != null) {
+ this.partitionIds = ehInfo.getPartitionIds();
+
+ TRACE_LOGGER.info(this.hostContext.withHost("Eventhub " + this.hostContext.getEventHubPath() + " count of partitions: " + ehInfo.getPartitionCount()));
+ for (String id : this.partitionIds) {
+ TRACE_LOGGER.info(this.hostContext.withHost("Found partition with id: " + id));
+ }
+ } else {
+ throw new CompletionException(new TimeoutException("getRuntimeInformation returned null"));
+ }
+ }, this.hostContext.getExecutor())
+ // Stage 3: RUN REGARDLESS OF EXCEPTIONS -- if there was an error, wrap it in IllegalEntityException and throw
+ .handleAsync((empty, e) ->
+ {
+ cleanupFuture.complete(null); // trigger client cleanup
+ if (e != null) {
+ Throwable notifyWith = e;
+ if (e instanceof CompletionException) {
+ notifyWith = e.getCause();
+ }
+ throw new CompletionException(new IllegalEntityException("Failure getting partition ids for event hub", notifyWith));
+ }
+ return null;
+ }, this.hostContext.getExecutor());
+ } catch (EventHubException | IOException e) {
+ retval = new CompletableFuture();
+ retval.completeExceptionally(new IllegalEntityException("Failure getting partition ids for event hub", e));
+ }
+ }
+
+ return retval;
+ }
+
+ // Testability hook: allows a test subclass to insert dummy pump.
+ PumpManager createPumpTestHook() {
+ return new PumpManager(this.hostContext, this);
+ }
+
+ // Testability hook: called after stores are initialized.
+ void onInitializeCompleteTestHook() {
+ }
+
+ // Testability hook: called at the end of the main loop after all partition checks/stealing is complete.
+ void onPartitionCheckCompleteTestHook() {
+ }
+
+ CompletableFuture stopPartitions() {
+ setClosing();
+
+ // If the lease scanner is between runs, cancel so it doesn't run again.
+ synchronized (this.scanFutureSynchronizer) {
+ if (this.scanFuture != null) {
+ this.scanFuture.cancel(true);
+ }
+ }
+
+ // Stop any partition pumps that are running.
+ CompletableFuture stopping = CompletableFuture.completedFuture(null);
+
+ if (this.pumpManager != null) {
+ TRACE_LOGGER.info(this.hostContext.withHost("Shutting down all pumps"));
+ stopping = this.pumpManager.removeAllPumps(CloseReason.Shutdown)
+ .whenCompleteAsync((empty, e) -> {
+ if (e != null) {
+ Throwable notifyWith = LoggingUtils.unwrapException(e, null);
+ TRACE_LOGGER.warn(this.hostContext.withHost("Failure during shutdown"), notifyWith);
+ if (notifyWith instanceof Exception) {
+ this.hostContext.getEventProcessorOptions().notifyOfException(this.hostContext.getHostName(), (Exception) notifyWith,
+ EventProcessorHostActionStrings.PARTITION_MANAGER_CLEANUP);
+
+ }
+ }
+ }, this.hostContext.getExecutor());
+ }
+ // else no pumps to shut down
+
+ stopping = stopping.whenCompleteAsync((empty, e) -> {
+ TRACE_LOGGER.info(this.hostContext.withHost("Partition manager exiting"));
+ setClosed();
+ }, this.hostContext.getExecutor());
+
+ return stopping;
+ }
+
+ public CompletableFuture initialize() {
+ this.pumpManager = createPumpTestHook();
+
+ // Stage 0: get partition ids and cache
+ return cachePartitionIds()
+ // Stage 1: initialize stores, if stage 0 succeeded
+ .thenComposeAsync((unused) -> initializeStores(), this.hostContext.getExecutor())
+ // Stage 2: RUN REGARDLESS OF EXCEPTIONS -- trace errors
+ .whenCompleteAsync((empty, e) ->
+ {
+ if (e != null) {
+ StringBuilder outAction = new StringBuilder();
+ Throwable notifyWith = LoggingUtils.unwrapException(e, outAction);
+ if (outAction.length() > 0) {
+ TRACE_LOGGER.error(this.hostContext.withHost(
+ "Exception while initializing stores (" + outAction.toString() + "), not starting partition manager"), notifyWith);
+ } else {
+ TRACE_LOGGER.error(this.hostContext.withHost("Exception while initializing stores, not starting partition manager"), notifyWith);
+ }
+ }
+ }, this.hostContext.getExecutor())
+ // Stage 3: schedule scan, which will find partitions and start pumps, if previous stages succeeded
+ .thenRunAsync(() ->
+ {
+ // Schedule the first scan immediately.
+ synchronized (this.scanFutureSynchronizer) {
+ TRACE_LOGGER.debug(this.hostContext.withHost("Scheduling lease scanner first pass"));
+ this.scanFuture = this.hostContext.getExecutor().schedule(() -> scan(true), 0, TimeUnit.SECONDS);
+ }
+
+ onInitializeCompleteTestHook();
+ }, this.hostContext.getExecutor());
+ }
+
+ private CompletableFuture> initializeStores() {
+ ILeaseManager leaseManager = this.hostContext.getLeaseManager();
+ ICheckpointManager checkpointManager = this.hostContext.getCheckpointManager();
+
+ // let R = this.retryMax
+ // Stages 0 to R: create lease store if it doesn't exist
+ CompletableFuture> initializeStoresFuture = buildRetries(CompletableFuture.completedFuture(null),
+ () -> leaseManager.createLeaseStoreIfNotExists(), "Failure creating lease store for this Event Hub, retrying",
+ "Out of retries creating lease store for this Event Hub", EventProcessorHostActionStrings.CREATING_LEASE_STORE, this.retryMax);
+
+ // Stages R+1 to 2R: create checkpoint store if it doesn't exist
+ initializeStoresFuture = buildRetries(initializeStoresFuture, () -> checkpointManager.createCheckpointStoreIfNotExists(),
+ "Failure creating checkpoint store for this Event Hub, retrying", "Out of retries creating checkpoint store for this Event Hub",
+ EventProcessorHostActionStrings.CREATING_CHECKPOINT_STORE, this.retryMax);
+
+ // Stages 2R+1 to 3R: create leases if they don't exist
+ initializeStoresFuture = buildRetries(initializeStoresFuture, () -> leaseManager.createAllLeasesIfNotExists(Arrays.asList(this.partitionIds)),
+ "Failure creating leases, retrying", "Out of retries creating leases", EventProcessorHostActionStrings.CREATING_LEASES, this.retryMax);
+
+ // Stages 3R+1 to 4R: create checkpoint holders if they don't exist
+ initializeStoresFuture = buildRetries(initializeStoresFuture, () -> checkpointManager.createAllCheckpointsIfNotExists(Arrays.asList(this.partitionIds)),
+ "Failure creating checkpoint holders, retrying", "Out of retries creating checkpoint holders",
+ EventProcessorHostActionStrings.CREATING_CHECKPOINTS, this.retryMax);
+
+ initializeStoresFuture.whenCompleteAsync((r, e) ->
+ {
+ // If an exception has propagated this far, it should be a FinalException, which is guaranteed to contain a CompletionException.
+ // Unwrap it so we don't leak a private type.
+ if ((e != null) && (e instanceof FinalException)) {
+ throw ((FinalException) e).getInner();
+ }
+
+ // Otherwise, allow the existing result to pass to the caller.
+ }, this.hostContext.getExecutor());
+
+ return initializeStoresFuture;
+ }
+
+ // CompletableFuture will be completed exceptionally if it runs out of retries.
+ // If the lambda succeeds, then it will not be invoked again by following stages.
+ private CompletableFuture> buildRetries(CompletableFuture> buildOnto, Callable> lambda, String retryMessage,
+ String finalFailureMessage, String action, int maxRetries) {
+ // Stage 0: first attempt
+ CompletableFuture> retryChain = buildOnto.thenComposeAsync((unused) ->
+ {
+ CompletableFuture> newresult = CompletableFuture.completedFuture(null);
+ try {
+ newresult = lambda.call();
+ } catch (Exception e1) {
+ throw new CompletionException(e1);
+ }
+ return newresult;
+ }, this.hostContext.getExecutor());
+
+ for (int i = 1; i < maxRetries; i++) {
+ retryChain = retryChain
+ // Stages 1, 3, 5, etc: trace errors but stop normal exception propagation in order to keep going.
+ // Either return null if we don't have a valid result, or pass the result along to the next stage.
+ // FinalExceptions are passed along also so that fatal error earlier in the chain aren't lost.
+ .handleAsync((r, e) ->
+ {
+ Object effectiveResult = r;
+ if (e != null) {
+ if (e instanceof FinalException) {
+ // Propagate FinalException up to the end
+ throw (FinalException) e;
+ } else {
+ TRACE_LOGGER.warn(this.hostContext.withHost(retryMessage), LoggingUtils.unwrapException(e, null));
+ }
+ } else {
+ // Some lambdas return null on success. Change to TRUE to skip retrying.
+ if (r == null) {
+ effectiveResult = true;
+ }
+ }
+ return (e == null) ? effectiveResult : null; // stop propagation of other exceptions so we can retry
+ }, this.hostContext.getExecutor())
+ // Stages 2, 4, 6, etc: if we already have a valid result, pass it along. Otherwise, make another attempt.
+ // Once we have a valid result there will be no more attempts or exceptions.
+ .thenComposeAsync((oldresult) ->
+ {
+ CompletableFuture> newresult = CompletableFuture.completedFuture(oldresult);
+ if (oldresult == null) {
+ try {
+ newresult = lambda.call();
+ } catch (Exception e1) {
+ throw new CompletionException(e1);
+ }
+ }
+ return newresult;
+ }, this.hostContext.getExecutor());
+ }
+ // Stage final: trace the exception with the final message, or pass along the valid result.
+ retryChain = retryChain.handleAsync((r, e) ->
+ {
+ if (e != null) {
+ if (e instanceof FinalException) {
+ throw (FinalException) e;
+ } else {
+ TRACE_LOGGER.warn(this.hostContext.withHost(finalFailureMessage));
+ throw new FinalException(LoggingUtils.wrapExceptionWithMessage(LoggingUtils.unwrapException(e, null), finalFailureMessage, action));
+ }
+ }
+ return (e == null) ? r : null;
+ }, this.hostContext.getExecutor());
+
+ return retryChain;
+ }
+
+ // Return Void so it can be called from a lambda.
+ // throwOnFailure is true
+ private Void scan(boolean isFirst) {
+ TRACE_LOGGER.debug(this.hostContext.withHost("Starting lease scan"));
+ long start = System.currentTimeMillis();
+
+ (new PartitionScanner(this.hostContext, (lease) -> this.pumpManager.addPump(lease), this)).scan(isFirst)
+ .whenCompleteAsync((didSteal, e) ->
+ {
+ TRACE_LOGGER.debug(this.hostContext.withHost("Scanning took " + (System.currentTimeMillis() - start)));
+
+ onPartitionCheckCompleteTestHook();
+
+ // Schedule the next scan unless we are shutting down.
+ if (!this.getIsClosingOrClosed()) {
+ int seconds = didSteal ? this.hostContext.getPartitionManagerOptions().getFastScanIntervalInSeconds() :
+ this.hostContext.getPartitionManagerOptions().getSlowScanIntervalInSeconds();
+ if (isFirst) {
+ seconds = this.hostContext.getPartitionManagerOptions().getStartupScanDelayInSeconds();
+ }
+ synchronized (this.scanFutureSynchronizer) {
+ this.scanFuture = this.hostContext.getExecutor().schedule(() -> scan(false), seconds, TimeUnit.SECONDS);
+ }
+ TRACE_LOGGER.debug(this.hostContext.withHost("Scheduling lease scanner in " + seconds));
+ } else {
+ TRACE_LOGGER.debug(this.hostContext.withHost("Not scheduling lease scanner due to shutdown"));
+ }
+ }, this.hostContext.getExecutor());
+
+ return null;
+ }
+
+ // Exception wrapper that buildRetries() uses to indicate that a fatal error has occurred. The chain
+ // built by buildRetries() normally swallows exceptions via odd-numbered stages so that the retries in
+ // even-numbered stages will execute. If multiple chains are concatenated, FinalException short-circuits
+ // the exceptional swallowing and allows fatal errors in earlier chains to be propagated all the way to the end.
+ class FinalException extends CompletionException {
+ private static final long serialVersionUID = -4600271981700687166L;
+
+ FinalException(CompletionException e) {
+ super(e);
+ }
+
+ CompletionException getInner() {
+ return (CompletionException) this.getCause();
+ }
+ }
+}
diff --git a/eventhubs/data-plane/azure-eventhubs-eph/src/main/java/com/microsoft/azure/eventprocessorhost/PartitionManagerOptions.java b/eventhubs/data-plane/azure-eventhubs-eph/src/main/java/com/microsoft/azure/eventprocessorhost/PartitionManagerOptions.java
new file mode 100644
index 0000000000000..ac20a6d61b396
--- /dev/null
+++ b/eventhubs/data-plane/azure-eventhubs-eph/src/main/java/com/microsoft/azure/eventprocessorhost/PartitionManagerOptions.java
@@ -0,0 +1,188 @@
+/*
+ * Copyright (c) Microsoft. All rights reserved.
+ * Licensed under the MIT license. See LICENSE file in the project root for full license information.
+ */
+
+package com.microsoft.azure.eventprocessorhost;
+
+/***
+ * Options affecting the operation of the partition manager within the event processor host.
+ * This class is broken out separately because many of these options also affect the operation
+ * of the ILeaseManager and ICheckpointManager implementations, and different implementations
+ * may need to subclass and provide different options or defaults.
+ */
+public class PartitionManagerOptions {
+ /**
+ * The default duration after which a partition lease will expire unless renewed.
+ */
+ public final static int DefaultLeaseDurationInSeconds = 30;
+
+ /**
+ * The default duration between lease renewals.
+ */
+ public final static int DefaultLeaseRenewIntervalInSeconds = 10;
+
+ /**
+ * The default timeout for checkpoint operations.
+ */
+ public final static int DefaultCheckpointTimeoutInSeconds = 120;
+
+ public final static int DefaultStartupScanDelayInSeconds = 30;
+ public final static int DefaultFastScanIntervalInSeconds = 3;
+ public final static int DefaultSlowScanIntervalInSeconds = 5;
+
+ protected int leaseDurationInSeconds = PartitionManagerOptions.DefaultLeaseDurationInSeconds;
+ protected int leaseRenewIntervalInSeconds = PartitionManagerOptions.DefaultLeaseRenewIntervalInSeconds;
+ protected int checkpointTimeoutInSeconds = PartitionManagerOptions.DefaultCheckpointTimeoutInSeconds;
+
+ protected int startupScanDelayInSeconds = PartitionManagerOptions.DefaultStartupScanDelayInSeconds;
+ protected int fastScanIntervalInSeconds = PartitionManagerOptions.DefaultFastScanIntervalInSeconds;
+ protected int slowScanIntervalInSeconds = PartitionManagerOptions.DefaultSlowScanIntervalInSeconds;
+
+ /***
+ * The base class automatically sets members to the static defaults.
+ */
+ public PartitionManagerOptions() {
+ }
+
+ /**
+ * Gets the duration after which a partition lease will expire unless renewed.
+ * Defaults to DefaultLeaseDurationInSeconds.
+ *
+ * @return lease duration
+ */
+ public int getLeaseDurationInSeconds() {
+ return this.leaseDurationInSeconds;
+ }
+
+ /**
+ * Sets the duration after which a partition lease will expire unless renewed.
+ * Must be greater than 0 and should not be less than the renew interval. When using the
+ * default, Azure Storage-based ILeaseManager, the duration cannot be greater than 60.
+ *
+ * @param duration new value for lease duration
+ */
+ public void setLeaseDurationInSeconds(int duration) {
+ if (duration <= 0) {
+ throw new IllegalArgumentException("Lease duration must be greater than 0");
+ }
+ this.leaseDurationInSeconds = duration;
+ }
+
+ /**
+ * Gets the duration between lease renewals. Defaults to DefaultLeaseRenewIntervalInSeconds.
+ *
+ * @return how often leases are renewed
+ */
+ public int getLeaseRenewIntervalInSeconds() {
+ return this.leaseRenewIntervalInSeconds;
+ }
+
+ /**
+ * Sets the duration between lease renewals. Must be greater than 0 and less than the current lease duration.
+ *
+ * @param interval new value for how often leases are renewed
+ */
+ public void setLeaseRenewIntervalInSeconds(int interval) {
+ if ((interval <= 0) || (interval > this.leaseDurationInSeconds)) {
+ throw new IllegalArgumentException("Lease renew interval must be greater than 0 and not more than lease duration");
+ }
+ this.leaseRenewIntervalInSeconds = interval;
+ }
+
+ /**
+ * Gets the timeout for checkpoint operations. Defaults to DefaultCheckpointTimeoutInSeconds.
+ *
+ * @return timeout for checkpoint operations
+ */
+ public int getCheckpointTimeoutInSeconds() {
+ return this.checkpointTimeoutInSeconds;
+ }
+
+ /**
+ * Sets the timeout for checkpoint operations. Must be greater than 0.
+ *
+ * @param timeout new value for checkpoint timeout
+ */
+ public void setCheckpointTimeoutInSeconds(int timeout) {
+ if (timeout <= 0) {
+ throw new IllegalArgumentException("Checkpoint timeout must be greater than 0");
+ }
+ this.checkpointTimeoutInSeconds = timeout;
+ }
+
+ /**
+ * Gets the delay time between the first scan for available partitions and the second. This is
+ * part of a startup optimization which allows individual hosts to become visible to other
+ * hosts, and thereby get a more accurate count of the number of hosts in the system, before
+ * they try to estimate how many partitions they should own.
+ *
+ * Defaults to DefaultStartupScanDelayInSeconds.
+ *
+ * @return delay time in seconds
+ */
+ public int getStartupScanDelayInSeconds() {
+ return this.startupScanDelayInSeconds;
+ }
+
+ /**
+ * Sets the delay time in seconds between the first scan and the second.
+ *
+ * @param delay new delay time in seconds
+ */
+ public void setStartupScanDelayInSeconds(int delay) {
+ if (delay <= 0) {
+ throw new IllegalArgumentException("Startup scan delay must be greater than 0");
+ }
+ this.startupScanDelayInSeconds = delay;
+ }
+
+ /**
+ * There are two possible interval times between scans for available partitions, fast and slow.
+ * The fast (short) interval is used after a scan in which lease stealing has occurred, to
+ * promote quicker rebalancing.
+ *
+ * Defaults to DefaultFastScanIntervalInSeconds.
+ *
+ * @return interval time in seconds
+ */
+ public int getFastScanIntervalInSeconds() {
+ return this.fastScanIntervalInSeconds;
+ }
+
+ /**
+ * Sets the time for fast interval.
+ *
+ * @param interval new fast interval in seconds
+ */
+ public void setFastScanIntervalInSeconds(int interval) {
+ if (interval <= 0) {
+ throw new IllegalArgumentException("Fast scan interval must be greater than 0");
+ }
+ this.fastScanIntervalInSeconds = interval;
+ }
+
+ /**
+ * The slow (long) interval is used after a scan in which lease stealing did not occur, to
+ * reduce unnecessary scanning when the system is in steady state.
+ *
+ * Defaults to DefaultSlowScanIntervalInSeconds.
+ *
+ * @return interval time in seconds
+ */
+ public int getSlowScanIntervalInSeconds() {
+ return this.slowScanIntervalInSeconds;
+ }
+
+ /**
+ * Sets the time for slow interval.
+ *
+ * @param interval new slow interval in seconds
+ */
+ public void setSlowScanIntervalInSeconds(int interval) {
+ if (interval <= 0) {
+ throw new IllegalArgumentException("Slow scan interval must be greater than 0");
+ }
+ this.slowScanIntervalInSeconds = interval;
+ }
+}
diff --git a/eventhubs/data-plane/azure-eventhubs-eph/src/main/java/com/microsoft/azure/eventprocessorhost/PartitionPump.java b/eventhubs/data-plane/azure-eventhubs-eph/src/main/java/com/microsoft/azure/eventprocessorhost/PartitionPump.java
new file mode 100644
index 0000000000000..fcdbe9abd8537
--- /dev/null
+++ b/eventhubs/data-plane/azure-eventhubs-eph/src/main/java/com/microsoft/azure/eventprocessorhost/PartitionPump.java
@@ -0,0 +1,528 @@
+/*
+ * Copyright (c) Microsoft. All rights reserved.
+ * Licensed under the MIT license. See LICENSE file in the project root for full license information.
+ */
+
+package com.microsoft.azure.eventprocessorhost;
+
+import com.microsoft.azure.eventhubs.*;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+import java.io.IOException;
+import java.util.ArrayList;
+import java.util.Iterator;
+import java.util.concurrent.CompletableFuture;
+import java.util.concurrent.CompletionException;
+import java.util.concurrent.ScheduledFuture;
+import java.util.concurrent.TimeUnit;
+import java.util.function.Consumer;
+
+class PartitionPump extends Closable implements PartitionReceiveHandler {
+ private static final Logger TRACE_LOGGER = LoggerFactory.getLogger(PartitionPump.class);
+ protected final HostContext hostContext;
+ protected final CompleteLease lease; // protected for testability
+ final private CompletableFuture shutdownTriggerFuture;
+ final private CompletableFuture shutdownFinishedFuture;
+ private final Object processingSynchronizer;
+ private final Consumer pumpManagerCallback;
+ private EventHubClient eventHubClient = null;
+ private PartitionReceiver partitionReceiver = null;
+ private CloseReason shutdownReason;
+ private volatile CompletableFuture> internalOperationFuture = null;
+ private IEventProcessor processor = null;
+ private PartitionContext partitionContext = null;
+ private ScheduledFuture> leaseRenewerFuture = null;
+
+ PartitionPump(HostContext hostContext, CompleteLease lease, Closable parent, Consumer pumpManagerCallback) {
+ super(parent);
+
+ this.hostContext = hostContext;
+ this.lease = lease;
+ this.pumpManagerCallback = pumpManagerCallback;
+ this.processingSynchronizer = new Object();
+
+ this.partitionContext = new PartitionContext(this.hostContext, this.lease.getPartitionId());
+ this.partitionContext.setLease(this.lease);
+
+ // Set up the shutdown futures. The shutdown process can be triggered just by completing this.shutdownFuture.
+ this.shutdownTriggerFuture = new CompletableFuture();
+ this.shutdownFinishedFuture = this.shutdownTriggerFuture
+ .handleAsync((r, e) -> {
+ this.pumpManagerCallback.accept(this.lease.getPartitionId());
+ return cancelPendingOperations();
+ }, this.hostContext.getExecutor())
+ .thenComposeAsync((empty) -> cleanUpAll(this.shutdownReason), this.hostContext.getExecutor())
+ .thenComposeAsync((empty) -> releaseLeaseOnShutdown(), this.hostContext.getExecutor())
+ .whenCompleteAsync((empty, e) -> {
+ setClosed();
+ }, this.hostContext.getExecutor());
+ }
+
+ // The CompletableFuture returned by startPump remains uncompleted as long as the pump is running.
+ // If startup fails, or an error occurs while running, it will complete exceptionally.
+ // If clean shutdown due to unregister call, it completes normally.
+ CompletableFuture startPump() {
+ // Do the slow startup stuff asynchronously.
+ // Use whenComplete to trigger cleanup on exception.
+ CompletableFuture.runAsync(() -> openProcessor(), this.hostContext.getExecutor())
+ .thenComposeAsync((empty) -> openClientsRetryWrapper(), this.hostContext.getExecutor())
+ .thenRunAsync(() -> scheduleLeaseRenewer(), this.hostContext.getExecutor())
+ .whenCompleteAsync((r, e) ->
+ {
+ if (e != null) {
+ // If startup failed, trigger shutdown to clean up.
+ internalShutdown(CloseReason.Shutdown, e);
+ }
+ }, this.hostContext.getExecutor());
+
+ return shutdownFinishedFuture;
+ }
+
+ private void openProcessor() {
+ TRACE_LOGGER.info(this.hostContext.withHostAndPartition(this.partitionContext, "Creating and opening event processor instance"));
+
+ String action = EventProcessorHostActionStrings.CREATING_EVENT_PROCESSOR;
+ try {
+ this.processor = this.hostContext.getEventProcessorFactory().createEventProcessor(this.partitionContext);
+ action = EventProcessorHostActionStrings.OPENING_EVENT_PROCESSOR;
+ this.processor.onOpen(this.partitionContext);
+ } catch (Exception e) {
+ // If the processor won't create or open, only thing we can do here is pass the buck.
+ // Null it out so we don't try to operate on it further.
+ this.processor = null;
+ TRACE_LOGGER.warn(this.hostContext.withHostAndPartition(this.partitionContext, "Failed " + action), e);
+ this.hostContext.getEventProcessorOptions().notifyOfException(this.hostContext.getHostName(), e, action, this.lease.getPartitionId());
+ throw new CompletionException(e);
+ }
+ }
+
+ private CompletableFuture openClientsRetryWrapper() {
+ // Stage 0: first attempt
+ CompletableFuture retryResult = openClients();
+
+ for (int i = 1; i < 5; i++) {
+ retryResult = retryResult
+ // Stages 1, 3, 5, etc: trace errors but stop exception propagation in order to keep going
+ // UNLESS it's ReceiverDisconnectedException.
+ .handleAsync((r, e) ->
+ {
+ if (e != null) {
+ Exception notifyWith = (Exception) LoggingUtils.unwrapException(e, null);
+ if (notifyWith instanceof ReceiverDisconnectedException) {
+ // TODO Assuming this is due to a receiver with a higher epoch.
+ // Is there a way to be sure without checking the exception text?
+ // DO NOT trace here because then we could get multiple traces for the same exception.
+ // If it's a bad epoch, then retrying isn't going to help.
+ // Rethrow to keep propagating error to the end and prevent any more attempts.
+ throw new CompletionException(notifyWith);
+ } else {
+ TRACE_LOGGER.warn(this.hostContext.withHostAndPartition(this.partitionContext,
+ "Failure creating client or receiver, retrying"), e);
+ }
+ }
+ // If we have a valid result, pass it along to prevent further attempts.
+ return (e == null) ? r : false;
+ }, this.hostContext.getExecutor())
+ // Stages 2, 4, 6, etc: make another attempt if needed.
+ .thenComposeAsync((done) ->
+ {
+ return done ? CompletableFuture.completedFuture(done) : openClients();
+ }, this.hostContext.getExecutor());
+ }
+ // Stage final: on success, hook up the user's event handler to start receiving events. On error,
+ // trace exceptions from the final attempt, or ReceiverDisconnectedException.
+ return retryResult.handleAsync((r, e) ->
+ {
+ if (e == null) {
+ // IEventProcessor.onOpen is called from the base PartitionPump and must have returned in order for execution to reach here,
+ // meaning it is safe to set the handler and start calling IEventProcessor.onEvents.
+ this.partitionReceiver.setReceiveHandler(this, this.hostContext.getEventProcessorOptions().getInvokeProcessorAfterReceiveTimeout());
+ } else {
+ Exception notifyWith = (Exception) LoggingUtils.unwrapException(e, null);
+ if (notifyWith instanceof ReceiverDisconnectedException) {
+ // TODO Assuming this is due to a receiver with a higher epoch.
+ // Is there a way to be sure without checking the exception text?
+ TRACE_LOGGER.warn(this.hostContext.withHostAndPartition(this.partitionContext,
+ "Receiver disconnected on create, bad epoch?"), notifyWith);
+ } else {
+ TRACE_LOGGER.warn(this.hostContext.withHostAndPartition(this.partitionContext,
+ "Failure creating client or receiver, out of retries"), e);
+ }
+
+ // IEventProcessor.onOpen is called from the base PartitionPump and must have returned in order for execution to reach here,
+ // so we can report this error to it instead of the general error handler.
+ this.processor.onError(this.partitionContext, new ExceptionWithAction(notifyWith, EventProcessorHostActionStrings.CREATING_EVENT_HUB_CLIENT));
+
+ // Rethrow so caller will see failure
+ throw LoggingUtils.wrapException(notifyWith, EventProcessorHostActionStrings.CREATING_EVENT_HUB_CLIENT);
+ }
+ return null;
+ }, this.hostContext.getExecutor());
+ }
+
+ protected void scheduleLeaseRenewer() {
+ if (!getIsClosingOrClosed()) {
+ int seconds = this.hostContext.getPartitionManagerOptions().getLeaseRenewIntervalInSeconds();
+ this.leaseRenewerFuture = this.hostContext.getExecutor().schedule(() -> leaseRenewer(), seconds, TimeUnit.SECONDS);
+ TRACE_LOGGER.debug(this.hostContext.withHostAndPartition(this.lease, "scheduling leaseRenewer in " + seconds));
+ }
+ }
+
+ private CompletableFuture openClients() {
+ // Create new client
+ TRACE_LOGGER.info(this.hostContext.withHostAndPartition(this.partitionContext, "Opening EH client"));
+
+ CompletableFuture startOpeningFuture = null;
+ try {
+ startOpeningFuture = EventHubClient.create(this.hostContext.getEventHubConnectionString(),
+ this.hostContext.getRetryPolicy(), this.hostContext.getExecutor());
+ } catch (EventHubException | IOException e2) {
+ // Marking startOpeningFuture as completed exceptionally will cause all the
+ // following stages to fall through except stage 1 which will report the error.
+ startOpeningFuture = new CompletableFuture();
+ startOpeningFuture.completeExceptionally(e2);
+ }
+ this.internalOperationFuture = startOpeningFuture;
+
+ // Stage 0: get EventHubClient
+ return startOpeningFuture
+ // Stage 1: save EventHubClient on success, trace on error
+ .whenCompleteAsync((ehclient, e) ->
+ {
+ if ((ehclient != null) && (e == null)) {
+ this.eventHubClient = ehclient;
+ } else {
+ TRACE_LOGGER.error(this.hostContext.withHostAndPartition(this.partitionContext, "EventHubClient creation failed"), e);
+ }
+ // this.internalOperationFuture allows canceling startup if it gets stuck. Null out now that EventHubClient creation has completed.
+ this.internalOperationFuture = null;
+ }, this.hostContext.getExecutor())
+ // Stage 2: get initial offset for receiver
+ .thenComposeAsync((empty) -> this.partitionContext.getInitialOffset(), this.hostContext.getExecutor())
+ // Stage 3: set up other receiver options, create receiver if initial offset is valid
+ .thenComposeAsync((startAt) ->
+ {
+ long epoch = this.lease.getEpoch();
+
+ TRACE_LOGGER.info(this.hostContext.withHostAndPartition(this.partitionContext,
+ "Opening EH receiver with epoch " + epoch + " at location " + startAt));
+
+ CompletableFuture receiverFuture = null;
+
+ try {
+ ReceiverOptions options = new ReceiverOptions();
+ options.setReceiverRuntimeMetricEnabled(this.hostContext.getEventProcessorOptions().getReceiverRuntimeMetricEnabled());
+ options.setPrefetchCount(this.hostContext.getEventProcessorOptions().getPrefetchCount());
+
+ receiverFuture = this.eventHubClient.createEpochReceiver(this.partitionContext.getConsumerGroupName(),
+ this.partitionContext.getPartitionId(), startAt, epoch, options);
+ this.internalOperationFuture = receiverFuture;
+ } catch (EventHubException e) {
+ TRACE_LOGGER.error(this.hostContext.withHostAndPartition(this.partitionContext, "Opening EH receiver failed with an error "), e);
+ receiverFuture = new CompletableFuture();
+ receiverFuture.completeExceptionally(e);
+ }
+
+ return receiverFuture;
+ }, this.hostContext.getExecutor())
+ // Stage 4: save PartitionReceiver on success, trace on error
+ .whenCompleteAsync((receiver, e) ->
+ {
+ if ((receiver != null) && (e == null)) {
+ this.partitionReceiver = receiver;
+ } else if (this.eventHubClient != null) {
+ if (e instanceof ReceiverDisconnectedException) {
+ TRACE_LOGGER.info(this.hostContext.withHostAndPartition(this.partitionContext, "PartitionReceiver disconnected during startup"));
+ } else {
+ TRACE_LOGGER.error(this.hostContext.withHostAndPartition(this.partitionContext, "PartitionReceiver creation failed"), e);
+ }
+ }
+ // else if this.eventHubClient is null then we failed in stage 0 and already traced in stage 1
+
+ // this.internalOperationFuture allows canceling startup if it gets stuck. Null out now that PartitionReceiver creation has completed.
+ this.internalOperationFuture = null;
+ }, this.hostContext.getExecutor())
+ // Stage 5: on success, set up the receiver
+ .thenApplyAsync((receiver) ->
+ {
+ this.partitionReceiver.setReceiveTimeout(this.hostContext.getEventProcessorOptions().getReceiveTimeOut());
+
+ TRACE_LOGGER.info(this.hostContext.withHostAndPartition(this.partitionContext,
+ "EH client and receiver creation finished"));
+
+ return true;
+ }, this.hostContext.getExecutor());
+ }
+
+ private CompletableFuture cleanUpAll(CloseReason reason) // swallows all exceptions
+ {
+ return cleanUpClients()
+ .thenRunAsync(() ->
+ {
+ if (this.processor != null) {
+ try {
+ synchronized (this.processingSynchronizer) {
+ // When we take the lock, any existing onEvents call has finished.
+ // Because the client has been closed, there will not be any more
+ // calls to onEvents in the future. Therefore we can safely call onClose.
+ this.processor.onClose(this.partitionContext, reason);
+ }
+ } catch (Exception e) {
+ TRACE_LOGGER.warn(this.hostContext.withHostAndPartition(this.partitionContext,
+ "Failure closing processor"), e);
+ // If closing the processor has failed, the state of the processor is suspect.
+ // Report the failure to the general error handler instead.
+ this.hostContext.getEventProcessorOptions().notifyOfException(this.hostContext.getHostName(), e, EventProcessorHostActionStrings.CLOSING_EVENT_PROCESSOR,
+ this.lease.getPartitionId());
+ }
+ }
+ }, this.hostContext.getExecutor());
+ }
+
+ private CompletableFuture cleanUpClients() // swallows all exceptions
+ {
+ CompletableFuture cleanupFuture = null;
+ if (this.partitionReceiver != null) {
+ // Disconnect the processor from the receiver we're about to close.
+ // Fortunately this is idempotent -- setting the handler to null when it's already been
+ // nulled by code elsewhere is harmless!
+ // Setting to null also waits for the in-progress calls to complete
+ TRACE_LOGGER.info(this.hostContext.withHostAndPartition(this.partitionContext, "Setting receive handler to null"));
+ cleanupFuture = this.partitionReceiver.setReceiveHandler(null);
+ } else {
+ TRACE_LOGGER.debug(this.hostContext.withHostAndPartition(this.partitionContext, "partitionReceiver is null in cleanup"));
+ cleanupFuture = CompletableFuture.completedFuture(null);
+ }
+ cleanupFuture = cleanupFuture.handleAsync((empty, e) ->
+ {
+ if (e != null) {
+ TRACE_LOGGER.warn(this.hostContext.withHostAndPartition(this.partitionContext,
+ "Got exception when ReceiveHandler is set to null."), LoggingUtils.unwrapException(e, null));
+ }
+ return null; // stop propagation of exceptions
+ }, this.hostContext.getExecutor())
+ .thenApplyAsync((empty) ->
+ {
+ TRACE_LOGGER.info(this.hostContext.withHostAndPartition(this.partitionContext, "Closing EH receiver"));
+ PartitionReceiver partitionReceiverTemp = this.partitionReceiver;
+ this.partitionReceiver = null;
+ return partitionReceiverTemp;
+ }, this.hostContext.getExecutor())
+ .thenComposeAsync((partitionReceiverTemp) ->
+ {
+ return (partitionReceiverTemp != null) ? partitionReceiverTemp.close() : CompletableFuture.completedFuture(null);
+ }, this.hostContext.getExecutor())
+ .handleAsync((empty, e) ->
+ {
+ if (e != null) {
+ TRACE_LOGGER.warn(this.hostContext.withHostAndPartition(this.partitionContext,
+ "Closing EH receiver failed."), LoggingUtils.unwrapException(e, null));
+ }
+ return null; // stop propagation of exceptions
+ }, this.hostContext.getExecutor())
+ .thenApplyAsync((empty) ->
+ {
+ TRACE_LOGGER.info(this.hostContext.withHostAndPartition(this.partitionContext, "Closing EH client"));
+ final EventHubClient eventHubClientTemp = this.eventHubClient;
+ this.eventHubClient = null;
+ if (eventHubClientTemp == null) {
+ TRACE_LOGGER.debug(this.hostContext.withHostAndPartition(this.partitionContext,
+ "eventHubClient is null in cleanup"));
+ }
+ return eventHubClientTemp;
+ }, this.hostContext.getExecutor())
+ .thenComposeAsync((eventHubClientTemp) ->
+ {
+ return (eventHubClientTemp != null) ? eventHubClientTemp.close() : CompletableFuture.completedFuture(null);
+ }, this.hostContext.getExecutor())
+ .handleAsync((empty, e) ->
+ {
+ if (e != null) {
+ TRACE_LOGGER.warn(this.hostContext.withHostAndPartition(this.partitionContext, "Closing EH client failed."),
+ LoggingUtils.unwrapException(e, null));
+ }
+ return null; // stop propagation of exceptions
+ }, this.hostContext.getExecutor());
+
+ return cleanupFuture;
+ }
+
+ protected Void cancelPendingOperations() {
+ // If an open operation is stuck, this lets us shut down anyway.
+ CompletableFuture> captured = this.internalOperationFuture;
+ if (captured != null) {
+ captured.cancel(true);
+ }
+
+ ScheduledFuture> capturedLeaseRenewer = this.leaseRenewerFuture;
+ if (capturedLeaseRenewer != null) {
+ capturedLeaseRenewer.cancel(true);
+ }
+ return null;
+ }
+
+ private CompletableFuture releaseLeaseOnShutdown() // swallows all exceptions
+ {
+ CompletableFuture result = CompletableFuture.completedFuture(null);
+
+ if (this.shutdownReason != CloseReason.LeaseLost) {
+ // Since this pump is dead, release the lease. Don't care about any errors that may occur. Worst case is
+ // that the lease eventually expires, since the lease renewer has been cancelled.
+ result = PartitionPump.this.hostContext.getLeaseManager().releaseLease(this.lease)
+ .handleAsync((empty, e) ->
+ {
+ if (e != null) {
+ TRACE_LOGGER.warn(this.hostContext.withHostAndPartition(this.partitionContext,
+ "Failure releasing lease on pump shutdown"), LoggingUtils.unwrapException(e, null));
+ }
+ return null; // stop propagation of exceptions
+ }, this.hostContext.getExecutor());
+ }
+ // else we already lost the lease, releasing is unnecessary and would fail if we try
+
+ return result;
+ }
+
+ protected void internalShutdown(CloseReason reason, Throwable e) {
+ setClosing();
+
+ this.shutdownReason = reason;
+ if (e == null) {
+ this.shutdownTriggerFuture.complete(null);
+ } else {
+ this.shutdownTriggerFuture.completeExceptionally(e);
+ }
+ }
+
+ CompletableFuture shutdown(CloseReason reason) {
+ TRACE_LOGGER.info(this.hostContext.withHostAndPartition(this.partitionContext,
+ "pump shutdown for reason " + reason.toString()));
+ internalShutdown(reason, null);
+ return this.shutdownFinishedFuture;
+ }
+
+ private void leaseRenewer() {
+ TRACE_LOGGER.debug(this.hostContext.withHostAndPartition(this.lease, "leaseRenewer()"));
+
+ // Theoretically, if the future is cancelled then this method should never fire, but
+ // there's no harm in being sure.
+ if (this.leaseRenewerFuture.isCancelled()) {
+ return;
+ }
+ if (getIsClosingOrClosed()) {
+ return;
+ }
+
+ // Stage 0: renew the lease
+ this.hostContext.getLeaseManager().renewLease(this.lease)
+ // Stage 1: check result of renewing
+ .thenApplyAsync((renewed) ->
+ {
+ Boolean scheduleNext = true;
+ if (!renewed) {
+ // False return from renewLease means that lease was lost.
+ // Start pump shutdown process and do not schedule another call to leaseRenewer.
+ TRACE_LOGGER.info(this.hostContext.withHostAndPartition(this.lease, "Lease lost, shutting down pump"));
+ internalShutdown(CloseReason.LeaseLost, null);
+ scheduleNext = false;
+ }
+ return scheduleNext;
+ }, this.hostContext.getExecutor())
+ // Stage 2: RUN REGARDLESS OF EXCEPTIONS -- trace exceptions, schedule next iteration
+ .whenCompleteAsync((scheduleNext, e) ->
+ {
+ if (e != null) {
+ // Failure renewing lease due to storage exception or whatever.
+ // Trace error and leave scheduleNext as true to schedule another try.
+ Exception notifyWith = (Exception) LoggingUtils.unwrapException(e, null);
+ TRACE_LOGGER.info(this.hostContext.withHostAndPartition(this.lease, "Transient failure renewing lease"), notifyWith);
+ // Notify the general error handler rather than calling this.processor.onError so we can provide context (RENEWING_LEASE)
+ this.hostContext.getEventProcessorOptions().notifyOfException(this.hostContext.getHostName(), notifyWith, EventProcessorHostActionStrings.RENEWING_LEASE,
+ this.lease.getPartitionId());
+ }
+
+ if ((scheduleNext != null) && scheduleNext.booleanValue() && !this.leaseRenewerFuture.isCancelled() && !getIsClosingOrClosed()) {
+ scheduleLeaseRenewer();
+ }
+ }, this.hostContext.getExecutor());
+ }
+
+ @Override
+ public int getMaxEventCount() {
+ return this.hostContext.getEventProcessorOptions().getMaxBatchSize();
+ }
+
+ @Override
+ public void onReceive(Iterable events) {
+ if (this.hostContext.getEventProcessorOptions().getReceiverRuntimeMetricEnabled()) {
+ this.partitionContext.setRuntimeInformation(this.partitionReceiver.getRuntimeInformation());
+ }
+
+ // This method is called on the thread that the Java EH client uses to run the pump.
+ // There is one pump per EventHubClient. Since each PartitionPump creates a new EventHubClient,
+ // using that thread to call onEvents does no harm. Even if onEvents is slow, the pump will
+ // get control back each time onEvents returns, and be able to receive a new batch of events
+ // with which to make the next onEvents call. The pump gains nothing by running faster than onEvents.
+
+ // The underlying client returns null if there are no events, but the contract for IEventProcessor
+ // is different and is expecting an empty iterable if there are no events (and invoke processor after
+ // receive timeout is turned on).
+
+ Iterable effectiveEvents = events;
+ if (effectiveEvents == null) {
+ effectiveEvents = new ArrayList();
+ }
+
+ // Update offset and sequence number in the PartitionContext to support argument-less overload of PartitionContext.checkpoint()
+ Iterator iter = effectiveEvents.iterator();
+ EventData last = null;
+ while (iter.hasNext()) {
+ last = iter.next();
+ }
+ if (last != null) {
+ this.partitionContext.setOffsetAndSequenceNumber(last);
+ }
+
+ try {
+ // Synchronize to serialize calls to the processor.
+ // The handler is not installed until after onOpen returns, so onEvents cannot overlap with onOpen.
+ // onEvents and onClose are synchronized via this.processingSynchronizer to prevent calls to onClose
+ // while an onEvents call is still in progress.
+ synchronized (this.processingSynchronizer) {
+ this.processor.onEvents(this.partitionContext, effectiveEvents);
+ }
+ } catch (Exception e) {
+ // TODO -- do we pass errors from IEventProcessor.onEvents to IEventProcessor.onError?
+ // Depending on how you look at it, that's either pointless (if the user's code throws, the user's code should already know about it) or
+ // a convenient way of centralizing error handling.
+ // In the meantime, just trace it.
+ TRACE_LOGGER.warn(this.hostContext.withHostAndPartition(this.partitionContext,
+ "Got exception from onEvents"), e);
+ }
+ }
+
+ @Override
+ public void onError(Throwable error) {
+ if (error == null) {
+ error = new Throwable("No error info supplied by EventHub client");
+ }
+ if (error instanceof ReceiverDisconnectedException) {
+ TRACE_LOGGER.info(this.hostContext.withHostAndPartition(this.partitionContext,
+ "EventHub client disconnected, probably another host took the partition"));
+ } else {
+ TRACE_LOGGER.warn(this.hostContext.withHostAndPartition(this.partitionContext, "EventHub client error: " + error.toString()));
+ if (error instanceof Exception) {
+ TRACE_LOGGER.warn(this.hostContext.withHostAndPartition(this.partitionContext, "EventHub client error continued"), (Exception) error);
+ }
+ }
+
+ // It is vital to perform the rest of cleanup in a separate thread and not block this one. This thread is the client's
+ // receive pump thread, and blocking it means that the receive pump never completes its CompletableFuture, which in turn
+ // blocks other client calls that we would like to make during cleanup. Specifically, this issue was found when
+ // PartitionReceiver.setReceiveHandler(null).get() was called and never returned.
+ final Throwable capturedError = error;
+ CompletableFuture.runAsync(() -> PartitionPump.this.processor.onError(PartitionPump.this.partitionContext, capturedError), this.hostContext.getExecutor())
+ .thenRunAsync(() -> internalShutdown(CloseReason.Shutdown, capturedError), this.hostContext.getExecutor());
+ }
+}
diff --git a/eventhubs/data-plane/azure-eventhubs-eph/src/main/java/com/microsoft/azure/eventprocessorhost/PartitionScanner.java b/eventhubs/data-plane/azure-eventhubs-eph/src/main/java/com/microsoft/azure/eventprocessorhost/PartitionScanner.java
new file mode 100644
index 0000000000000..4bd0a3386909c
--- /dev/null
+++ b/eventhubs/data-plane/azure-eventhubs-eph/src/main/java/com/microsoft/azure/eventprocessorhost/PartitionScanner.java
@@ -0,0 +1,323 @@
+/*
+ * Copyright (c) Microsoft. All rights reserved.
+ * Licensed under the MIT license. See LICENSE file in the project root for full license information.
+ */
+
+package com.microsoft.azure.eventprocessorhost;
+
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+import java.util.*;
+import java.util.concurrent.CompletableFuture;
+import java.util.concurrent.ConcurrentHashMap;
+import java.util.concurrent.atomic.AtomicInteger;
+import java.util.function.Consumer;
+
+class PartitionScanner extends Closable {
+ private static final Logger TRACE_LOGGER = LoggerFactory.getLogger(PartitionScanner.class);
+ private static final Random randomizer = new Random();
+ private final HostContext hostContext;
+ private final Consumer addPump;
+
+ // Populated by getAllLeaseStates()
+ private List allLeaseStates = null;
+
+ // Values populated by sortLeasesAndCalculateDesiredCount
+ private int desiredCount;
+ private int unownedCount; // updated by acquireExpiredInChunksParallel
+ final private ConcurrentHashMap leasesOwnedByOthers; // updated by acquireExpiredInChunksParallel
+
+ PartitionScanner(HostContext hostContext, Consumer addPump, Closable parent) {
+ super(parent);
+
+ this.hostContext = hostContext;
+ this.addPump = addPump;
+
+ this.desiredCount = 0;
+ this.unownedCount = 0;
+ this.leasesOwnedByOthers = new ConcurrentHashMap();
+ }
+
+ public CompletableFuture scan(boolean isFirst) {
+ return getAllLeaseStates()
+ .thenComposeAsync((unused) -> {
+ throwIfClosingOrClosed("PartitionScanner is shutting down");
+ int ourLeasesCount = sortLeasesAndCalculateDesiredCount(isFirst);
+ return acquireExpiredInChunksParallel(0, this.desiredCount - ourLeasesCount);
+ }, this.hostContext.getExecutor())
+ .thenApplyAsync((remainingNeeded) -> {
+ throwIfClosingOrClosed("PartitionScanner is shutting down");
+ ArrayList stealThese = new ArrayList();
+ if (remainingNeeded > 0) {
+ TRACE_LOGGER.debug(this.hostContext.withHost("Looking to steal: " + remainingNeeded));
+ stealThese = findLeasesToSteal(remainingNeeded);
+ }
+ return stealThese;
+ }, this.hostContext.getExecutor())
+ .thenComposeAsync((stealThese) -> {
+ throwIfClosingOrClosed("PartitionScanner is shutting down");
+ return stealLeases(stealThese);
+ }, this.hostContext.getExecutor())
+ .handleAsync((didSteal, e) -> {
+ if ((e != null) && !(e instanceof ClosingException)) {
+ StringBuilder outAction = new StringBuilder();
+ Exception notifyWith = (Exception) LoggingUtils.unwrapException(e, outAction);
+ TRACE_LOGGER.warn(this.hostContext.withHost("Exception scanning leases"), notifyWith);
+ this.hostContext.getEventProcessorOptions().notifyOfException(this.hostContext.getHostName(), notifyWith, outAction.toString(),
+ ExceptionReceivedEventArgs.NO_ASSOCIATED_PARTITION);
+ didSteal = false;
+ }
+ return didSteal;
+ }, this.hostContext.getExecutor());
+ }
+
+ private CompletableFuture getAllLeaseStates() {
+ throwIfClosingOrClosed("PartitionScanner is shutting down");
+ return this.hostContext.getLeaseManager().getAllLeases()
+ .thenAcceptAsync((states) -> {
+ throwIfClosingOrClosed("PartitionScanner is shutting down");
+ this.allLeaseStates = states;
+ Collections.sort(this.allLeaseStates);
+ }, this.hostContext.getExecutor());
+ }
+
+ // NONBLOCKING
+ private int sortLeasesAndCalculateDesiredCount(boolean isFirst) {
+ TRACE_LOGGER.debug(this.hostContext.withHost("Accounting input: allLeaseStates size is " + this.allLeaseStates.size()));
+
+ HashSet uniqueOwners = new HashSet();
+ uniqueOwners.add(this.hostContext.getHostName());
+ int ourLeasesCount = 0;
+ this.unownedCount = 0;
+ for (BaseLease info : this.allLeaseStates) {
+ boolean ownedByUs = info.getIsOwned() && info.getOwner() != null && (info.getOwner().compareTo(this.hostContext.getHostName()) == 0);
+ if (info.getIsOwned() && info.getOwner() != null) {
+ uniqueOwners.add(info.getOwner());
+ } else {
+ this.unownedCount++;
+ }
+ if (ownedByUs) {
+ ourLeasesCount++;
+ } else if (info.getIsOwned()) {
+ this.leasesOwnedByOthers.put(info.getPartitionId(), info);
+ }
+ }
+ int hostCount = uniqueOwners.size();
+ int countPerHost = this.allLeaseStates.size() / hostCount;
+ this.desiredCount = isFirst ? 1 : countPerHost;
+ if (!isFirst && (this.unownedCount > 0) && (this.unownedCount < hostCount) && ((this.allLeaseStates.size() % hostCount) != 0)) {
+ // Distribute leftovers.
+ this.desiredCount++;
+ }
+
+ ArrayList sortedHosts = new ArrayList(uniqueOwners);
+ Collections.sort(sortedHosts);
+ int hostOrdinal = -1;
+ int startingPoint = 0;
+ if (isFirst) {
+ // If the entire system is starting up, the list of hosts is probably not complete and we can't really
+ // compute a meaningful hostOrdinal. But we only want hostOrdinal to calculate startingPoint. Instead,
+ // just randomly select a startingPoint.
+ startingPoint = PartitionScanner.randomizer.nextInt(this.allLeaseStates.size());
+ } else {
+ for (hostOrdinal = 0; hostOrdinal < sortedHosts.size(); hostOrdinal++) {
+ if (sortedHosts.get(hostOrdinal).compareTo(this.hostContext.getHostName()) == 0) {
+ break;
+ }
+ }
+ startingPoint = countPerHost * hostOrdinal;
+ }
+ // Rotate allLeaseStates
+ TRACE_LOGGER.debug(this.hostContext.withHost("Host ordinal: " + hostOrdinal + " Rotating leases to start at " + startingPoint));
+ if (startingPoint != 0) {
+ ArrayList rotatedList = new ArrayList(this.allLeaseStates.size());
+ for (int j = 0; j < this.allLeaseStates.size(); j++) {
+ rotatedList.add(this.allLeaseStates.get((j + startingPoint) % this.allLeaseStates.size()));
+ }
+ this.allLeaseStates = rotatedList;
+ }
+
+ TRACE_LOGGER.debug(this.hostContext.withHost("Host count is " + hostCount + " Desired owned count is " + this.desiredCount));
+ TRACE_LOGGER.debug(this.hostContext.withHost("ourLeasesCount " + ourLeasesCount + " leasesOwnedByOthers " + this.leasesOwnedByOthers.size()
+ + " unowned " + unownedCount));
+
+ return ourLeasesCount;
+ }
+
+ // NONBLOCKING
+ // Returns a CompletableFuture as a convenience for the caller
+ private CompletableFuture> findExpiredLeases(int startAt, int endAt) {
+ final ArrayList expiredLeases = new ArrayList();
+ TRACE_LOGGER.debug(this.hostContext.withHost("Finding expired leases from '" + this.allLeaseStates.get(startAt).getPartitionId() + "'[" + startAt + "] up to '" +
+ ((endAt < this.allLeaseStates.size()) ? this.allLeaseStates.get(endAt).getPartitionId() : "end") + "'[" + endAt + "]"));
+
+ for (BaseLease info : this.allLeaseStates.subList(startAt, endAt)) {
+ if (!info.getIsOwned()) {
+ expiredLeases.add(info);
+ }
+ }
+
+ TRACE_LOGGER.debug(this.hostContext.withHost("Found in range: " + expiredLeases.size()));
+ return CompletableFuture.completedFuture(expiredLeases);
+ }
+
+ private CompletableFuture acquireExpiredInChunksParallel(int startAt, int needed) {
+ throwIfClosingOrClosed("PartitionScanner is shutting down");
+
+ CompletableFuture resultFuture = CompletableFuture.completedFuture(needed);
+ if (startAt < this.allLeaseStates.size()) {
+ TRACE_LOGGER.debug(this.hostContext.withHost("Examining chunk at '" + this.allLeaseStates.get(startAt).getPartitionId() + "'[" + startAt + "] need " + needed));
+ } else {
+ TRACE_LOGGER.debug(this.hostContext.withHost("Examining chunk skipping, startAt is off end: " + startAt));
+ }
+
+ if ((needed > 0) && (this.unownedCount > 0) && (startAt < this.allLeaseStates.size())) {
+ final AtomicInteger runningNeeded = new AtomicInteger(needed);
+ final int endAt = Math.min(startAt + needed, this.allLeaseStates.size());
+
+ resultFuture = findExpiredLeases(startAt, endAt)
+ .thenComposeAsync((getThese) -> {
+ throwIfClosingOrClosed("PartitionScanner is shutting down");
+ CompletableFuture acquireFuture = CompletableFuture.completedFuture(null);
+ if (getThese.size() > 0) {
+ ArrayList> getFutures = new ArrayList>();
+ for (BaseLease info : getThese) {
+ throwIfClosingOrClosed("PartitionScanner is shutting down");
+ final AcquisitionHolder holder = new AcquisitionHolder();
+ CompletableFuture getOneFuture = this.hostContext.getLeaseManager().getLease(info.getPartitionId())
+ .thenComposeAsync((lease) -> {
+ throwIfClosingOrClosed("PartitionScanner is shutting down");
+ holder.setAcquiredLease(lease);
+ return this.hostContext.getLeaseManager().acquireLease(lease);
+ }, this.hostContext.getExecutor())
+ .thenAcceptAsync((acquired) -> {
+ throwIfClosingOrClosed("PartitionScanner is shutting down");
+ if (acquired) {
+ runningNeeded.decrementAndGet();
+ TRACE_LOGGER.debug(this.hostContext.withHostAndPartition(holder.getAcquiredLease().getPartitionId(), "Acquired unowned/expired"));
+ if (this.leasesOwnedByOthers.containsKey(holder.getAcquiredLease().getPartitionId())) {
+ this.leasesOwnedByOthers.remove(holder.getAcquiredLease().getPartitionId());
+ this.unownedCount--;
+ }
+ this.addPump.accept(holder.getAcquiredLease());
+ } else {
+ this.leasesOwnedByOthers.put(holder.getAcquiredLease().getPartitionId(), holder.getAcquiredLease());
+ }
+ }, this.hostContext.getExecutor());
+ getFutures.add(getOneFuture);
+ }
+ CompletableFuture>[] dummy = new CompletableFuture>[getFutures.size()];
+ acquireFuture = CompletableFuture.allOf(getFutures.toArray(dummy));
+ }
+ return acquireFuture;
+ }, this.hostContext.getExecutor())
+ .handleAsync((empty, e) -> {
+ // log/notify if exception occurred, then swallow exception and continue with next chunk
+ if ((e != null) && !(e instanceof ClosingException)) {
+ Exception notifyWith = (Exception) LoggingUtils.unwrapException(e, null);
+ TRACE_LOGGER.warn(this.hostContext.withHost("Failure getting/acquiring lease, continuing"), notifyWith);
+ this.hostContext.getEventProcessorOptions().notifyOfException(this.hostContext.getHostName(), notifyWith,
+ EventProcessorHostActionStrings.CHECKING_LEASES, ExceptionReceivedEventArgs.NO_ASSOCIATED_PARTITION);
+ }
+ return null;
+ }, this.hostContext.getExecutor())
+ .thenComposeAsync((unused) -> acquireExpiredInChunksParallel(endAt, runningNeeded.get()), this.hostContext.getExecutor());
+ } else {
+ TRACE_LOGGER.debug(this.hostContext.withHost("Short circuit: needed is 0, unowned is 0, or off end"));
+ }
+
+ return resultFuture;
+ }
+
+ // NONBLOCKING
+ private ArrayList findLeasesToSteal(int stealAsk) {
+ // Generate a map of hostnames and owned counts.
+ HashMap hostOwns = new HashMap();
+ for (BaseLease info : this.leasesOwnedByOthers.values()) {
+ if (hostOwns.containsKey(info.getOwner())) {
+ int newCount = hostOwns.get(info.getOwner()) + 1;
+ hostOwns.put(info.getOwner(), newCount);
+ } else {
+ hostOwns.put(info.getOwner(), 1);
+ }
+ }
+
+ // Extract hosts which own more than the desired count
+ ArrayList bigOwners = new ArrayList();
+ for (Map.Entry pair : hostOwns.entrySet()) {
+ if (pair.getValue() > this.desiredCount) {
+ bigOwners.add(pair.getKey());
+ TRACE_LOGGER.debug(this.hostContext.withHost("Big owner " + pair.getKey() + " has " + pair.getValue()));
+ }
+ }
+
+ ArrayList stealInfos = new ArrayList();
+
+ if (bigOwners.size() > 0) {
+ // Randomly pick one of the big owners
+ String bigVictim = bigOwners.get(PartitionScanner.randomizer.nextInt(bigOwners.size()));
+ int victimExtra = hostOwns.get(bigVictim) - this.desiredCount - 1;
+ int stealCount = Math.min(victimExtra, stealAsk);
+ TRACE_LOGGER.debug(this.hostContext.withHost("Stealing " + stealCount + " from " + bigVictim));
+
+ // Grab stealCount partitions owned by bigVictim and return the infos.
+ for (BaseLease candidate : this.allLeaseStates) {
+ if (candidate.getOwner() != null && candidate.getOwner().compareTo(bigVictim) == 0) {
+ stealInfos.add(candidate);
+ if (stealInfos.size() >= stealCount) {
+ break;
+ }
+ }
+ }
+ } else {
+ TRACE_LOGGER.debug(this.hostContext.withHost("No big owners found, skipping steal"));
+ }
+
+ return stealInfos;
+ }
+
+ private CompletableFuture stealLeases(List stealThese) {
+ CompletableFuture