From 0b33f0027c1d7f60d9d6bc66b882c432cd4668fe Mon Sep 17 00:00:00 2001 From: Pranavi Ancha Date: Fri, 5 Dec 2025 15:22:45 +0530 Subject: [PATCH 1/4] Document performance characteristics and safe usage patterns for messaging API --- .../apache/helix/ClusterMessagingService.java | 30 +++ .../main/java/org/apache/helix/Criteria.java | 96 ++++++++- .../helix/messaging/CriteriaEvaluator.java | 45 ++++ website/0.9.9/src/site/markdown/Features.md | 195 +++++++++++++++--- .../src/site/markdown/tutorial_messaging.md | 20 ++ website/1.3.2/src/site/markdown/Features.md | 195 +++++++++++++++--- .../src/site/markdown/tutorial_messaging.md | 20 ++ website/1.4.3/src/site/markdown/Features.md | 195 +++++++++++++++--- .../src/site/markdown/tutorial_messaging.md | 20 ++ 9 files changed, 742 insertions(+), 74 deletions(-) diff --git a/helix-core/src/main/java/org/apache/helix/ClusterMessagingService.java b/helix-core/src/main/java/org/apache/helix/ClusterMessagingService.java index f31f0e397a..e44d40392f 100644 --- a/helix-core/src/main/java/org/apache/helix/ClusterMessagingService.java +++ b/helix-core/src/main/java/org/apache/helix/ClusterMessagingService.java @@ -37,6 +37,13 @@ public interface ClusterMessagingService { /** * Send message matching the specifications mentioned in recipientCriteria. + * + *

PERFORMANCE WARNING: When recipientCriteria uses {@link DataSource#EXTERNALVIEW} + * with wildcard or unspecified resource names, this scans ALL ExternalView znodes in the cluster, + * regardless of other criteria like instanceName. At scale (thousands of resources), this causes + * severe performance degradation. Use {@link DataSource#LIVEINSTANCES} when you don't need + * resource/partition filtering, or specify exact resource names when using EXTERNALVIEW. + * * @param recipientCriteria criteria to be met, defined as {@link Criteria} * @See Criteria * @param message @@ -54,6 +61,11 @@ public interface ClusterMessagingService { * This method will return after sending the messages.
* This is useful when message need to be sent and current thread need not * wait for response since processing will be done in another thread. + * + *

PERFORMANCE WARNING: See performance considerations in {@link #send(Criteria, Message)}. + * Using {@link DataSource#EXTERNALVIEW} with wildcard resources can scan all ExternalView znodes + * and cause severe performance issues at scale. Prefer {@link DataSource#LIVEINSTANCES} when possible. + * * @see #send(Criteria, Message) * @param recipientCriteria * @param message @@ -64,6 +76,10 @@ public interface ClusterMessagingService { int send(Criteria recipientCriteria, Message message, AsyncCallback callbackOnReply, int timeOut); /** + *

PERFORMANCE WARNING: See performance considerations in {@link #send(Criteria, Message)}. + * Using {@link DataSource#EXTERNALVIEW} with wildcard resources can scan all ExternalView znodes + * and cause severe performance issues at scale. Prefer {@link DataSource#LIVEINSTANCES} when possible. + * * @see #send(Criteria, Message, AsyncCallback, int) * @param recipientCriteria * @param message @@ -85,6 +101,11 @@ int send(Criteria recipientCriteria, Message message, AsyncCallback callbackOnRe * for response.
* The current thread can use callbackOnReply instance to store application * specific data. + * + *

PERFORMANCE WARNING: See performance considerations in {@link #send(Criteria, Message)}. + * Using {@link DataSource#EXTERNALVIEW} with wildcard resources can scan all ExternalView znodes + * and cause severe performance issues at scale. Prefer {@link DataSource#LIVEINSTANCES} when possible. + * * @see #send(Criteria, Message, AsyncCallback, int) * @param recipientCriteria * @param message @@ -96,6 +117,10 @@ int sendAndWait(Criteria recipientCriteria, Message message, AsyncCallback callb int timeOut); /** + *

PERFORMANCE WARNING: See performance considerations in {@link #send(Criteria, Message)}. + * Using {@link DataSource#EXTERNALVIEW} with wildcard resources can scan all ExternalView znodes + * and cause severe performance issues at scale. Prefer {@link DataSource#LIVEINSTANCES} when possible. + * * @see #send(Criteria, Message, AsyncCallback, int, int) * @param receipientCriteria * @param message @@ -143,6 +168,11 @@ int sendAndWait(Criteria receipientCriteria, Message message, AsyncCallback call /** * This will generate all messages to be sent given the recipientCriteria and MessageTemplate, * the messages are not sent. + * + *

PERFORMANCE WARNING: See performance considerations in {@link #send(Criteria, Message)}. + * Using {@link DataSource#EXTERNALVIEW} with wildcard resources can scan all ExternalView znodes + * and cause severe performance issues at scale. Prefer {@link DataSource#LIVEINSTANCES} when possible. + * * @param recipientCriteria criteria to be met, defined as {@link Criteria} * @param messageTemplate the Message on which to base the messages to send * @return messages to be sent, grouped by the type of instance to send the message to diff --git a/helix-core/src/main/java/org/apache/helix/Criteria.java b/helix-core/src/main/java/org/apache/helix/Criteria.java index d01228db83..61d495721c 100644 --- a/helix-core/src/main/java/org/apache/helix/Criteria.java +++ b/helix-core/src/main/java/org/apache/helix/Criteria.java @@ -20,9 +20,78 @@ */ /** - * Describes various properties that operations involving {@link Message} delivery will follow. + * Specifies recipient criteria for message delivery in a Helix cluster. + * + *

The {@link Criteria} object defines which instances should receive a message by specifying + * attributes like instance name, resource, partition, and state. The most critical configuration + * is {@link DataSource}, which determines where Helix looks up cluster state to resolve recipients. + * + *

PERFORMANCE WARNING: Using {@link DataSource#EXTERNALVIEW} with wildcard or unspecified + * resource names causes Helix to scan ALL ExternalView znodes in the cluster, regardless of other + * criteria fields. At scale (thousands of resources), this causes severe performance degradation. + * + *

Quick Start - Common Patterns: + *

+ * // Pattern 1: Send to specific live instance (most efficient)
+ * Criteria criteria = new Criteria();
+ * criteria.setInstanceName("host_1234");
+ * criteria.setRecipientInstanceType(InstanceType.PARTICIPANT);
+ * criteria.setDataSource(DataSource.LIVEINSTANCES);
+ * criteria.setSessionSpecific(true);
+ * 
+ * // Pattern 2: Send to all replicas of a specific partition
+ * Criteria criteria = new Criteria();
+ * criteria.setInstanceName("%");
+ * criteria.setRecipientInstanceType(InstanceType.PARTICIPANT);
+ * criteria.setDataSource(DataSource.EXTERNALVIEW);
+ * criteria.setResource("MyDatabase");  // IMPORTANT: Specify exact resource name
+ * criteria.setPartition("MyDatabase_5");
+ * criteria.setSessionSpecific(true);
+ * 
+ * // Pattern 3: Broadcast to all live instances
+ * Criteria criteria = new Criteria();
+ * criteria.setInstanceName("%");
+ * criteria.setRecipientInstanceType(InstanceType.PARTICIPANT);
+ * criteria.setDataSource(DataSource.LIVEINSTANCES);
+ * criteria.setSessionSpecific(true);
+ * 
+ * + *

DataSource Selection Guide: + *

+ * + * @see ClusterMessagingService#send(Criteria, org.apache.helix.model.Message) + * @see org.apache.helix.messaging.CriteriaEvaluator */ public class Criteria { + /** + * Specifies the source of cluster state information for resolving message recipients. + * + *

The DataSource determines which ZooKeeper znodes Helix reads to match the criteria: + *

+ * + *

Performance Impact: LIVEINSTANCES is fastest as it reads minimal data. EXTERNALVIEW + * and IDEALSTATES can be slow at scale if wildcards are used in resource names, as Helix must + * read and deserialize all resource znodes to match the criteria. + */ public enum DataSource { IDEALSTATES, EXTERNALVIEW, @@ -80,8 +149,17 @@ public DataSource getDataSource() { } /** - * Set the current source of truth - * @param source ideal state or external view + * Set the current source of truth for resolving message recipients. + * + *

PERFORMANCE GUIDANCE: + *

+ * + * @param source ideal state, external view, live instances, or instances */ public void setDataSource(DataSource source) { _dataSource = source; @@ -161,8 +239,16 @@ public String getResource() { } /** - * Set the destination resource name - * @param resourceName the resource name or % for all resources + * Set the destination resource name. + * + *

Note: This field is only meaningful when using {@link DataSource#EXTERNALVIEW} or + * {@link DataSource#IDEALSTATES}. It is ignored for LIVEINSTANCES and INSTANCES. + * + *

PERFORMANCE: When using EXTERNALVIEW, specifying an exact resource name (e.g., "MyDatabase") + * reads only that resource's ExternalView znode. Using wildcard "%" reads ALL ExternalView znodes + * in the cluster, which can cause severe performance issues at scale. + * + * @param resourceName the exact resource name, or "%" for all resources (avoid wildcard at scale) */ public void setResource(String resourceName) { this.resourceName = resourceName; diff --git a/helix-core/src/main/java/org/apache/helix/messaging/CriteriaEvaluator.java b/helix-core/src/main/java/org/apache/helix/messaging/CriteriaEvaluator.java index f0e9ef58ff..2e56b26af7 100644 --- a/helix-core/src/main/java/org/apache/helix/messaging/CriteriaEvaluator.java +++ b/helix-core/src/main/java/org/apache/helix/messaging/CriteriaEvaluator.java @@ -39,12 +39,51 @@ import org.slf4j.Logger; import org.slf4j.LoggerFactory; +/** + * Evaluates {@link Criteria} against persisted Helix data to determine message recipients. + * + *

PERFORMANCE WARNING: When using {@link DataSource#EXTERNALVIEW}, this evaluator + * will scan all ExternalView znodes in the cluster if the resource name is unspecified or uses wildcards + * (e.g., "%" or "*"). This scanning happens even when targeting specific instances, and is + * NOT automatically optimized based on other criteria fields (like instanceName). + * + *

At high ExternalView cardinality (thousands of resources), this can cause severe performance degradation. + * + *

Safer Patterns: + *

+ * + *

Example - Targeting a specific instance: + *

+ * // BAD: Scans all ExternalViews even though instance is specified
+ * Criteria criteria = new Criteria();
+ * criteria.setInstanceName("instance123");
+ * criteria.setDataSource(DataSource.EXTERNALVIEW);
+ * criteria.setResource("%"); // wildcard triggers full scan
+ * 
+ * // GOOD: Uses LIVEINSTANCES, avoids ExternalView scan
+ * Criteria criteria = new Criteria();
+ * criteria.setInstanceName("instance123");
+ * criteria.setDataSource(DataSource.LIVEINSTANCES);
+ * 
+ */ public class CriteriaEvaluator { private static Logger logger = LoggerFactory.getLogger(CriteriaEvaluator.class); public static final String MATCH_ALL_SYM = "%"; /** * Examine persisted data to match wildcards in {@link Criteria} + * + *

PERFORMANCE WARNING: Using {@link DataSource#EXTERNALVIEW} with wildcard resource + * names (or unspecified resource) will scan ALL ExternalView znodes, even when targeting specific + * instances. At high cardinality, this can cause severe performance degradation. Prefer + * {@link DataSource#LIVEINSTANCES} when resource/partition filtering is not needed. + * * @param recipientCriteria Criteria specifying the message destinations * @param manager connection to the persisted data * @return map of evaluated criteria @@ -56,6 +95,12 @@ public List> evaluateCriteria(Criteria recipientCriteria, /** * Examine persisted data to match wildcards in {@link Criteria} + * + *

PERFORMANCE WARNING: Using {@link DataSource#EXTERNALVIEW} with wildcard resource + * names (or unspecified resource) will scan ALL ExternalView znodes, even when targeting specific + * instances. At high cardinality, this can cause severe performance degradation. Prefer + * {@link DataSource#LIVEINSTANCES} when resource/partition filtering is not needed. + * * @param recipientCriteria Criteria specifying the message destinations * @param accessor connection to the persisted data * @return map of evaluated criteria diff --git a/website/0.9.9/src/site/markdown/Features.md b/website/0.9.9/src/site/markdown/Features.md index 476956a637..527621425b 100644 --- a/website/0.9.9/src/site/markdown/Features.md +++ b/website/0.9.9/src/site/markdown/Features.md @@ -220,30 +220,179 @@ Since Helix is aware of the global state of the system, it can send the message This is a very generic api and can also be used to schedule various periodic tasks in the cluster like data backups etc. System Admins can also perform adhoc tasks like on demand backup or execute a system command(like rm -rf ;-)) across all nodes. +#### Understanding Criteria and DataSource + +The `Criteria` object allows you to specify message recipients using various attributes. A critical configuration is the `DataSource`, which determines where Helix looks up the cluster state to resolve your criteria. + +**Available DataSource Options:** + +Helix provides four DataSource types, each reading from different znodes in ZooKeeper: + +| DataSource | Description | When to Use | Performance Characteristics | +|------------|-------------|-------------|----------------------------| +| **LIVEINSTANCES** | Reads from `/LIVEINSTANCES` znodes | Targeting live instances without needing resource/partition/state filtering | **Fastest** - Minimal data, only active instances | +| **INSTANCES** | Reads from `/INSTANCES/[instance]` znodes | Targeting specific configured instances (live or not) based on instance configuration | Fast - Reads instance configs only | +| **EXTERNALVIEW** | Reads from `/EXTERNALVIEWS/[resource]` znodes | Targeting based on actual replica placement, partition ownership, or replica state (MASTER/SLAVE) | **Slowest** - Can read thousands of znodes if wildcards used | +| **IDEALSTATES** | Reads from `/IDEALSTATES/[resource]` znodes | Targeting based on ideal state configuration (intended placement) | Moderate - Similar to ExternalView but less commonly used | + +**Key Differences:** + +- **LIVEINSTANCES**: Contains only instance names of currently connected participants. No resource/partition information. Smallest dataset. +- **INSTANCES**: Contains instance configuration (host, port, enabled/disabled status). No resource/partition information. +- **EXTERNALVIEW**: Contains actual current state - which instances own which partitions and their states (MASTER/SLAVE/OFFLINE). Large dataset at scale. +- **IDEALSTATES**: Contains desired state - which instances should own which partitions. Similar size to ExternalView. + +**Choosing the Right DataSource:** + +| Your Goal | Correct DataSource | Example Use Case | +|-----------|-------------------|------------------| +| Send to specific live instance(s) | `LIVEINSTANCES` | Health check, admin command to specific node | +| Send to all live instances | `LIVEINSTANCES` | Broadcast announcement, cluster-wide operation | +| Send to replicas of a specific partition | `EXTERNALVIEW` (with exact resource name) | Bootstrap replica from peers | +| Send to all MASTER replicas of a resource | `EXTERNALVIEW` (with exact resource name) | Trigger operation on masters only | +| Send based on partition state | `EXTERNALVIEW` (with exact resource name) | Target only ONLINE/MASTER/SLAVE replicas | + +#### CRITICAL: Performance Considerations + +**⚠️ WARNING:** Using `EXTERNALVIEW` as the DataSource can cause severe performance issues at scale. + +**The Problem:** +When using `DataSource.EXTERNALVIEW`, Helix will scan **ALL** ExternalView znodes in the cluster if: +- You use wildcards (`%` or `*`) in the resource name, OR +- You leave the resource name unspecified + +**This happens even when targeting specific instances!** The scan is NOT automatically optimized based on other criteria fields like `instanceName`. + +At high ExternalView cardinality (thousands of resources), this can cause severe performance degradation. + +#### How to Set Criteria Correctly + +**Pattern 1: Targeting Specific Instances (Most Common)** + +When you only need to send messages to specific instances and don't need resource/partition-level filtering: + +```java +// GOOD: Efficient - Uses LIVEINSTANCES, avoids ExternalView scan +Criteria criteria = new Criteria(); +criteria.setInstanceName("instance123"); // or "%" for all live instances +criteria.setRecipientInstanceType(InstanceType.PARTICIPANT); +criteria.setDataSource(DataSource.LIVEINSTANCES); // Key: Use LIVEINSTANCES +criteria.setSessionSpecific(true); +``` + +```java +// BAD: Inefficient - Scans ALL ExternalViews even though targeting specific instance +Criteria criteria = new Criteria(); +criteria.setInstanceName("instance123"); +criteria.setRecipientInstanceType(InstanceType.PARTICIPANT); +criteria.setDataSource(DataSource.EXTERNALVIEW); // Will scan ALL resources! +criteria.setResource("%"); // Wildcard triggers full scan +``` + +**Pattern 2: Targeting Specific Resource and Partition** + +When you need to send messages based on resource ownership (e.g., all replicas of a partition): + +```java +// GOOD: Efficient - Specifies exact resource name, scans only 1 ExternalView +Criteria criteria = new Criteria(); +criteria.setInstanceName("%"); +criteria.setRecipientInstanceType(InstanceType.PARTICIPANT); +criteria.setDataSource(DataSource.EXTERNALVIEW); +criteria.setResource("MyDB"); // Exact resource name - scans only this EV +criteria.setPartition("MyDB_0"); // Specific partition +criteria.setPartitionState("MASTER"); // Only send to masters +criteria.setSessionSpecific(true); +``` + +```java +// BAD: Inefficient - Wildcard resource scans ALL ExternalViews +Criteria criteria = new Criteria(); +criteria.setInstanceName("%"); +criteria.setRecipientInstanceType(InstanceType.PARTICIPANT); +criteria.setDataSource(DataSource.EXTERNALVIEW); +criteria.setResource("%"); // Wildcard scans ALL ExternalViews in cluster! +criteria.setPartition("MyDB_0"); +criteria.setSessionSpecific(true); ``` - ClusterMessagingService messagingService = manager.getMessagingService(); - //CONSTRUCT THE MESSAGE - Message requestBackupUriRequest = new Message( - MessageType.USER_DEFINE_MSG, UUID.randomUUID().toString()); - requestBackupUriRequest - .setMsgSubType(BootstrapProcess.REQUEST_BOOTSTRAP_URL); - requestBackupUriRequest.setMsgState(MessageState.NEW); - //SET THE RECIPIENT CRITERIA, All nodes that satisfy the criteria will receive the message - Criteria recipientCriteria = new Criteria(); - recipientCriteria.setInstanceName("%"); - recipientCriteria.setRecipientInstanceType(InstanceType.PARTICIPANT); - recipientCriteria.setResource("MyDB"); - recipientCriteria.setPartition(""); - //Should be processed only the process that is active at the time of sending the message. - //This means if the recipient is restarted after message is sent, it will not be processed. - recipientCriteria.setSessionSpecific(true); - // wait for 30 seconds - int timeout = 30000; - //The handler that will be invoked when any recipient responds to the message. - BootstrapReplyHandler responseHandler = new BootstrapReplyHandler(); - //This will return only after all recipients respond or after timeout. - int sentMessageCount = messagingService.sendAndWait(recipientCriteria, - requestBackupUriRequest, responseHandler, timeout); + +**Pattern 3: Broadcasting to All Live Instances** + +```java +// GOOD: Efficient broadcast to all live participants +Criteria criteria = new Criteria(); +criteria.setInstanceName("%"); +criteria.setRecipientInstanceType(InstanceType.PARTICIPANT); +criteria.setDataSource(DataSource.LIVEINSTANCES); // Fast broadcast +criteria.setSessionSpecific(true); +``` + +#### Criteria Configuration Reference + +The `Criteria` class provides the following configuration methods: + +| Method | Parameter | Description | Wildcard Support | Example | +|--------|-----------|-------------|------------------|---------| +| `setDataSource(DataSource)` | LIVEINSTANCES, INSTANCES, EXTERNALVIEW, IDEALSTATES | **MOST IMPORTANT:** Determines which znodes to read | N/A | `DataSource.LIVEINSTANCES` | +| `setInstanceName(String)` | Instance name | Target specific instance(s) by name | Yes (`%` = all) | `"localhost_12918"` or `"%"` | +| `setResource(String)` | Resource name | Filter by resource name (only meaningful for EXTERNALVIEW/IDEALSTATES) | Yes (`%` = all) | `"MyDatabase"` or `"%"` | +| `setPartition(String)` | Partition name | Filter by specific partition (only meaningful for EXTERNALVIEW/IDEALSTATES) | Yes (`%` = all) | `"MyDatabase_0"` or `"%"` | +| `setPartitionState(String)` | State name | Filter by replica state like MASTER, SLAVE, ONLINE, OFFLINE (only for EXTERNALVIEW/IDEALSTATES) | Yes (`%` = all) | `"MASTER"` or `"%"` | +| `setRecipientInstanceType(InstanceType)` | PARTICIPANT, CONTROLLER, SPECTATOR | Type of Helix process to target | No | `InstanceType.PARTICIPANT` | +| `setSessionSpecific(boolean)` | true/false | If true, message is only delivered to currently active sessions (not redelivered after restart) | No | `true` (recommended) | + +**Important Notes:** + +- **Wildcards:** Use `%` (SQL-style) or `*` to match all. Single underscore `_` matches any single character. +- **DataSource Compatibility:** Setting `resource`, `partition`, or `partitionState` only makes sense with `EXTERNALVIEW` or `IDEALSTATES` DataSource. They are ignored for `LIVEINSTANCES` and `INSTANCES`. +- **Session-Specific:** Set to `true` for most use cases to avoid redelivering messages after a participant restarts. +- **Empty vs Wildcard:** Empty string `""` and wildcard `"%"` are treated the same - both match all. + +**DataSource and Criteria Field Compatibility:** + +| DataSource | Works With | Ignores | +|------------|-----------|---------| +| LIVEINSTANCES | instanceName, recipientInstanceType, sessionSpecific | resource, partition, partitionState | +| INSTANCES | instanceName, recipientInstanceType, sessionSpecific | resource, partition, partitionState | +| EXTERNALVIEW | All fields | - | +| IDEALSTATES | All fields | - | + +**Key Rules:** +1. **Always set DataSource explicitly** - Don't rely on defaults +2. **Use LIVEINSTANCES when possible** - Avoid ExternalView unless you need resource/partition filtering +3. **Specify exact resource names** - Avoid wildcards when using EXTERNALVIEW +4. **Test at scale** - Performance degrades non-linearly with resource count + +#### Example: Bootstrap Replica from Peers + +```java +ClusterMessagingService messagingService = manager.getMessagingService(); + +// Construct the message +Message requestBackupUriRequest = new Message( + MessageType.USER_DEFINE_MSG, UUID.randomUUID().toString()); +requestBackupUriRequest + .setMsgSubType(BootstrapProcess.REQUEST_BOOTSTRAP_URL); +requestBackupUriRequest.setMsgState(MessageState.NEW); + +// Set recipient criteria to find all replicas of specific partition +Criteria recipientCriteria = new Criteria(); +recipientCriteria.setInstanceName("%"); +recipientCriteria.setRecipientInstanceType(InstanceType.PARTICIPANT); +recipientCriteria.setDataSource(DataSource.EXTERNALVIEW); // Need resource-level info +recipientCriteria.setResource("MyDB"); // IMPORTANT: Specify exact resource, not "%" +recipientCriteria.setPartition("MyDB_0"); // Target specific partition +recipientCriteria.setSessionSpecific(true); + +// Wait for 30 seconds +int timeout = 30000; + +// Handler invoked when any recipient responds +BootstrapReplyHandler responseHandler = new BootstrapReplyHandler(); + +// Returns only after all recipients respond or after timeout +int sentMessageCount = messagingService.sendAndWait(recipientCriteria, + requestBackupUriRequest, responseHandler, timeout); ``` See HelixManager.getMessagingService for more info. diff --git a/website/0.9.9/src/site/markdown/tutorial_messaging.md b/website/0.9.9/src/site/markdown/tutorial_messaging.md index 0b32bdac17..d0b85eaab4 100644 --- a/website/0.9.9/src/site/markdown/tutorial_messaging.md +++ b/website/0.9.9/src/site/markdown/tutorial_messaging.md @@ -25,6 +25,26 @@ under the License. In this chapter, we\'ll learn about messaging, a convenient feature in Helix for sending messages between nodes of a cluster. This is an interesting feature that is quite useful in practice. It is common that nodes in a distributed system require a mechanism to interact with each other. +### Performance Considerations + +**IMPORTANT:** When using the messaging API with `Criteria`, be aware of the following performance characteristics: + +- **ExternalView Scanning:** By default, the messaging service uses `DataSource.EXTERNALVIEW` to resolve criteria. This can scan **all** ExternalView znodes in the cluster, even when targeting specific instances. At high resource cardinality (thousands of resources), this can cause severe performance degradation. + +**Recommended Patterns:** + +- **Use `DataSource.LIVEINSTANCES`** when you only need to target live instances and do not require resource/partition-level filtering. This is much faster and more efficient. +- **Specify exact resource names** instead of wildcards if you must use ExternalView scanning. + +Example of efficient messaging: +```java +Criteria recipientCriteria = new Criteria(); +recipientCriteria.setInstanceName("instance123"); +recipientCriteria.setRecipientInstanceType(InstanceType.PARTICIPANT); +recipientCriteria.setDataSource(DataSource.LIVEINSTANCES); // Efficient: avoids EV scan +recipientCriteria.setSessionSpecific(true); +``` + ### Example: Bootstrapping a Replica Consider a search system where the index replica starts up and it does not have an index. A typical solution is to get the index from a common location, or to copy the index from another replica. diff --git a/website/1.3.2/src/site/markdown/Features.md b/website/1.3.2/src/site/markdown/Features.md index 476956a637..527621425b 100644 --- a/website/1.3.2/src/site/markdown/Features.md +++ b/website/1.3.2/src/site/markdown/Features.md @@ -220,30 +220,179 @@ Since Helix is aware of the global state of the system, it can send the message This is a very generic api and can also be used to schedule various periodic tasks in the cluster like data backups etc. System Admins can also perform adhoc tasks like on demand backup or execute a system command(like rm -rf ;-)) across all nodes. +#### Understanding Criteria and DataSource + +The `Criteria` object allows you to specify message recipients using various attributes. A critical configuration is the `DataSource`, which determines where Helix looks up the cluster state to resolve your criteria. + +**Available DataSource Options:** + +Helix provides four DataSource types, each reading from different znodes in ZooKeeper: + +| DataSource | Description | When to Use | Performance Characteristics | +|------------|-------------|-------------|----------------------------| +| **LIVEINSTANCES** | Reads from `/LIVEINSTANCES` znodes | Targeting live instances without needing resource/partition/state filtering | **Fastest** - Minimal data, only active instances | +| **INSTANCES** | Reads from `/INSTANCES/[instance]` znodes | Targeting specific configured instances (live or not) based on instance configuration | Fast - Reads instance configs only | +| **EXTERNALVIEW** | Reads from `/EXTERNALVIEWS/[resource]` znodes | Targeting based on actual replica placement, partition ownership, or replica state (MASTER/SLAVE) | **Slowest** - Can read thousands of znodes if wildcards used | +| **IDEALSTATES** | Reads from `/IDEALSTATES/[resource]` znodes | Targeting based on ideal state configuration (intended placement) | Moderate - Similar to ExternalView but less commonly used | + +**Key Differences:** + +- **LIVEINSTANCES**: Contains only instance names of currently connected participants. No resource/partition information. Smallest dataset. +- **INSTANCES**: Contains instance configuration (host, port, enabled/disabled status). No resource/partition information. +- **EXTERNALVIEW**: Contains actual current state - which instances own which partitions and their states (MASTER/SLAVE/OFFLINE). Large dataset at scale. +- **IDEALSTATES**: Contains desired state - which instances should own which partitions. Similar size to ExternalView. + +**Choosing the Right DataSource:** + +| Your Goal | Correct DataSource | Example Use Case | +|-----------|-------------------|------------------| +| Send to specific live instance(s) | `LIVEINSTANCES` | Health check, admin command to specific node | +| Send to all live instances | `LIVEINSTANCES` | Broadcast announcement, cluster-wide operation | +| Send to replicas of a specific partition | `EXTERNALVIEW` (with exact resource name) | Bootstrap replica from peers | +| Send to all MASTER replicas of a resource | `EXTERNALVIEW` (with exact resource name) | Trigger operation on masters only | +| Send based on partition state | `EXTERNALVIEW` (with exact resource name) | Target only ONLINE/MASTER/SLAVE replicas | + +#### CRITICAL: Performance Considerations + +**⚠️ WARNING:** Using `EXTERNALVIEW` as the DataSource can cause severe performance issues at scale. + +**The Problem:** +When using `DataSource.EXTERNALVIEW`, Helix will scan **ALL** ExternalView znodes in the cluster if: +- You use wildcards (`%` or `*`) in the resource name, OR +- You leave the resource name unspecified + +**This happens even when targeting specific instances!** The scan is NOT automatically optimized based on other criteria fields like `instanceName`. + +At high ExternalView cardinality (thousands of resources), this can cause severe performance degradation. + +#### How to Set Criteria Correctly + +**Pattern 1: Targeting Specific Instances (Most Common)** + +When you only need to send messages to specific instances and don't need resource/partition-level filtering: + +```java +// GOOD: Efficient - Uses LIVEINSTANCES, avoids ExternalView scan +Criteria criteria = new Criteria(); +criteria.setInstanceName("instance123"); // or "%" for all live instances +criteria.setRecipientInstanceType(InstanceType.PARTICIPANT); +criteria.setDataSource(DataSource.LIVEINSTANCES); // Key: Use LIVEINSTANCES +criteria.setSessionSpecific(true); +``` + +```java +// BAD: Inefficient - Scans ALL ExternalViews even though targeting specific instance +Criteria criteria = new Criteria(); +criteria.setInstanceName("instance123"); +criteria.setRecipientInstanceType(InstanceType.PARTICIPANT); +criteria.setDataSource(DataSource.EXTERNALVIEW); // Will scan ALL resources! +criteria.setResource("%"); // Wildcard triggers full scan +``` + +**Pattern 2: Targeting Specific Resource and Partition** + +When you need to send messages based on resource ownership (e.g., all replicas of a partition): + +```java +// GOOD: Efficient - Specifies exact resource name, scans only 1 ExternalView +Criteria criteria = new Criteria(); +criteria.setInstanceName("%"); +criteria.setRecipientInstanceType(InstanceType.PARTICIPANT); +criteria.setDataSource(DataSource.EXTERNALVIEW); +criteria.setResource("MyDB"); // Exact resource name - scans only this EV +criteria.setPartition("MyDB_0"); // Specific partition +criteria.setPartitionState("MASTER"); // Only send to masters +criteria.setSessionSpecific(true); +``` + +```java +// BAD: Inefficient - Wildcard resource scans ALL ExternalViews +Criteria criteria = new Criteria(); +criteria.setInstanceName("%"); +criteria.setRecipientInstanceType(InstanceType.PARTICIPANT); +criteria.setDataSource(DataSource.EXTERNALVIEW); +criteria.setResource("%"); // Wildcard scans ALL ExternalViews in cluster! +criteria.setPartition("MyDB_0"); +criteria.setSessionSpecific(true); ``` - ClusterMessagingService messagingService = manager.getMessagingService(); - //CONSTRUCT THE MESSAGE - Message requestBackupUriRequest = new Message( - MessageType.USER_DEFINE_MSG, UUID.randomUUID().toString()); - requestBackupUriRequest - .setMsgSubType(BootstrapProcess.REQUEST_BOOTSTRAP_URL); - requestBackupUriRequest.setMsgState(MessageState.NEW); - //SET THE RECIPIENT CRITERIA, All nodes that satisfy the criteria will receive the message - Criteria recipientCriteria = new Criteria(); - recipientCriteria.setInstanceName("%"); - recipientCriteria.setRecipientInstanceType(InstanceType.PARTICIPANT); - recipientCriteria.setResource("MyDB"); - recipientCriteria.setPartition(""); - //Should be processed only the process that is active at the time of sending the message. - //This means if the recipient is restarted after message is sent, it will not be processed. - recipientCriteria.setSessionSpecific(true); - // wait for 30 seconds - int timeout = 30000; - //The handler that will be invoked when any recipient responds to the message. - BootstrapReplyHandler responseHandler = new BootstrapReplyHandler(); - //This will return only after all recipients respond or after timeout. - int sentMessageCount = messagingService.sendAndWait(recipientCriteria, - requestBackupUriRequest, responseHandler, timeout); + +**Pattern 3: Broadcasting to All Live Instances** + +```java +// GOOD: Efficient broadcast to all live participants +Criteria criteria = new Criteria(); +criteria.setInstanceName("%"); +criteria.setRecipientInstanceType(InstanceType.PARTICIPANT); +criteria.setDataSource(DataSource.LIVEINSTANCES); // Fast broadcast +criteria.setSessionSpecific(true); +``` + +#### Criteria Configuration Reference + +The `Criteria` class provides the following configuration methods: + +| Method | Parameter | Description | Wildcard Support | Example | +|--------|-----------|-------------|------------------|---------| +| `setDataSource(DataSource)` | LIVEINSTANCES, INSTANCES, EXTERNALVIEW, IDEALSTATES | **MOST IMPORTANT:** Determines which znodes to read | N/A | `DataSource.LIVEINSTANCES` | +| `setInstanceName(String)` | Instance name | Target specific instance(s) by name | Yes (`%` = all) | `"localhost_12918"` or `"%"` | +| `setResource(String)` | Resource name | Filter by resource name (only meaningful for EXTERNALVIEW/IDEALSTATES) | Yes (`%` = all) | `"MyDatabase"` or `"%"` | +| `setPartition(String)` | Partition name | Filter by specific partition (only meaningful for EXTERNALVIEW/IDEALSTATES) | Yes (`%` = all) | `"MyDatabase_0"` or `"%"` | +| `setPartitionState(String)` | State name | Filter by replica state like MASTER, SLAVE, ONLINE, OFFLINE (only for EXTERNALVIEW/IDEALSTATES) | Yes (`%` = all) | `"MASTER"` or `"%"` | +| `setRecipientInstanceType(InstanceType)` | PARTICIPANT, CONTROLLER, SPECTATOR | Type of Helix process to target | No | `InstanceType.PARTICIPANT` | +| `setSessionSpecific(boolean)` | true/false | If true, message is only delivered to currently active sessions (not redelivered after restart) | No | `true` (recommended) | + +**Important Notes:** + +- **Wildcards:** Use `%` (SQL-style) or `*` to match all. Single underscore `_` matches any single character. +- **DataSource Compatibility:** Setting `resource`, `partition`, or `partitionState` only makes sense with `EXTERNALVIEW` or `IDEALSTATES` DataSource. They are ignored for `LIVEINSTANCES` and `INSTANCES`. +- **Session-Specific:** Set to `true` for most use cases to avoid redelivering messages after a participant restarts. +- **Empty vs Wildcard:** Empty string `""` and wildcard `"%"` are treated the same - both match all. + +**DataSource and Criteria Field Compatibility:** + +| DataSource | Works With | Ignores | +|------------|-----------|---------| +| LIVEINSTANCES | instanceName, recipientInstanceType, sessionSpecific | resource, partition, partitionState | +| INSTANCES | instanceName, recipientInstanceType, sessionSpecific | resource, partition, partitionState | +| EXTERNALVIEW | All fields | - | +| IDEALSTATES | All fields | - | + +**Key Rules:** +1. **Always set DataSource explicitly** - Don't rely on defaults +2. **Use LIVEINSTANCES when possible** - Avoid ExternalView unless you need resource/partition filtering +3. **Specify exact resource names** - Avoid wildcards when using EXTERNALVIEW +4. **Test at scale** - Performance degrades non-linearly with resource count + +#### Example: Bootstrap Replica from Peers + +```java +ClusterMessagingService messagingService = manager.getMessagingService(); + +// Construct the message +Message requestBackupUriRequest = new Message( + MessageType.USER_DEFINE_MSG, UUID.randomUUID().toString()); +requestBackupUriRequest + .setMsgSubType(BootstrapProcess.REQUEST_BOOTSTRAP_URL); +requestBackupUriRequest.setMsgState(MessageState.NEW); + +// Set recipient criteria to find all replicas of specific partition +Criteria recipientCriteria = new Criteria(); +recipientCriteria.setInstanceName("%"); +recipientCriteria.setRecipientInstanceType(InstanceType.PARTICIPANT); +recipientCriteria.setDataSource(DataSource.EXTERNALVIEW); // Need resource-level info +recipientCriteria.setResource("MyDB"); // IMPORTANT: Specify exact resource, not "%" +recipientCriteria.setPartition("MyDB_0"); // Target specific partition +recipientCriteria.setSessionSpecific(true); + +// Wait for 30 seconds +int timeout = 30000; + +// Handler invoked when any recipient responds +BootstrapReplyHandler responseHandler = new BootstrapReplyHandler(); + +// Returns only after all recipients respond or after timeout +int sentMessageCount = messagingService.sendAndWait(recipientCriteria, + requestBackupUriRequest, responseHandler, timeout); ``` See HelixManager.getMessagingService for more info. diff --git a/website/1.3.2/src/site/markdown/tutorial_messaging.md b/website/1.3.2/src/site/markdown/tutorial_messaging.md index bdbd936730..7a574c1e92 100644 --- a/website/1.3.2/src/site/markdown/tutorial_messaging.md +++ b/website/1.3.2/src/site/markdown/tutorial_messaging.md @@ -25,6 +25,26 @@ under the License. In this chapter, we\'ll learn about messaging, a convenient feature in Helix for sending messages between nodes of a cluster. This is an interesting feature that is quite useful in practice. It is common that nodes in a distributed system require a mechanism to interact with each other. +### Performance Considerations + +**IMPORTANT:** When using the messaging API with `Criteria`, be aware of the following performance characteristics: + +- **ExternalView Scanning:** By default, the messaging service uses `DataSource.EXTERNALVIEW` to resolve criteria. This can scan **all** ExternalView znodes in the cluster, even when targeting specific instances. At high resource cardinality (thousands of resources), this can cause severe performance degradation. + +**Recommended Patterns:** + +- **Use `DataSource.LIVEINSTANCES`** when you only need to target live instances and do not require resource/partition-level filtering. This is much faster and more efficient. +- **Specify exact resource names** instead of wildcards if you must use ExternalView scanning. + +Example of efficient messaging: +```java +Criteria recipientCriteria = new Criteria(); +recipientCriteria.setInstanceName("instance123"); +recipientCriteria.setRecipientInstanceType(InstanceType.PARTICIPANT); +recipientCriteria.setDataSource(DataSource.LIVEINSTANCES); // Efficient: avoids EV scan +recipientCriteria.setSessionSpecific(true); +``` + ### Example: Bootstrapping a Replica Consider a search system where the index replica starts up and it does not have an index. A typical solution is to get the index from a common location, or to copy the index from another replica. diff --git a/website/1.4.3/src/site/markdown/Features.md b/website/1.4.3/src/site/markdown/Features.md index 476956a637..527621425b 100644 --- a/website/1.4.3/src/site/markdown/Features.md +++ b/website/1.4.3/src/site/markdown/Features.md @@ -220,30 +220,179 @@ Since Helix is aware of the global state of the system, it can send the message This is a very generic api and can also be used to schedule various periodic tasks in the cluster like data backups etc. System Admins can also perform adhoc tasks like on demand backup or execute a system command(like rm -rf ;-)) across all nodes. +#### Understanding Criteria and DataSource + +The `Criteria` object allows you to specify message recipients using various attributes. A critical configuration is the `DataSource`, which determines where Helix looks up the cluster state to resolve your criteria. + +**Available DataSource Options:** + +Helix provides four DataSource types, each reading from different znodes in ZooKeeper: + +| DataSource | Description | When to Use | Performance Characteristics | +|------------|-------------|-------------|----------------------------| +| **LIVEINSTANCES** | Reads from `/LIVEINSTANCES` znodes | Targeting live instances without needing resource/partition/state filtering | **Fastest** - Minimal data, only active instances | +| **INSTANCES** | Reads from `/INSTANCES/[instance]` znodes | Targeting specific configured instances (live or not) based on instance configuration | Fast - Reads instance configs only | +| **EXTERNALVIEW** | Reads from `/EXTERNALVIEWS/[resource]` znodes | Targeting based on actual replica placement, partition ownership, or replica state (MASTER/SLAVE) | **Slowest** - Can read thousands of znodes if wildcards used | +| **IDEALSTATES** | Reads from `/IDEALSTATES/[resource]` znodes | Targeting based on ideal state configuration (intended placement) | Moderate - Similar to ExternalView but less commonly used | + +**Key Differences:** + +- **LIVEINSTANCES**: Contains only instance names of currently connected participants. No resource/partition information. Smallest dataset. +- **INSTANCES**: Contains instance configuration (host, port, enabled/disabled status). No resource/partition information. +- **EXTERNALVIEW**: Contains actual current state - which instances own which partitions and their states (MASTER/SLAVE/OFFLINE). Large dataset at scale. +- **IDEALSTATES**: Contains desired state - which instances should own which partitions. Similar size to ExternalView. + +**Choosing the Right DataSource:** + +| Your Goal | Correct DataSource | Example Use Case | +|-----------|-------------------|------------------| +| Send to specific live instance(s) | `LIVEINSTANCES` | Health check, admin command to specific node | +| Send to all live instances | `LIVEINSTANCES` | Broadcast announcement, cluster-wide operation | +| Send to replicas of a specific partition | `EXTERNALVIEW` (with exact resource name) | Bootstrap replica from peers | +| Send to all MASTER replicas of a resource | `EXTERNALVIEW` (with exact resource name) | Trigger operation on masters only | +| Send based on partition state | `EXTERNALVIEW` (with exact resource name) | Target only ONLINE/MASTER/SLAVE replicas | + +#### CRITICAL: Performance Considerations + +**⚠️ WARNING:** Using `EXTERNALVIEW` as the DataSource can cause severe performance issues at scale. + +**The Problem:** +When using `DataSource.EXTERNALVIEW`, Helix will scan **ALL** ExternalView znodes in the cluster if: +- You use wildcards (`%` or `*`) in the resource name, OR +- You leave the resource name unspecified + +**This happens even when targeting specific instances!** The scan is NOT automatically optimized based on other criteria fields like `instanceName`. + +At high ExternalView cardinality (thousands of resources), this can cause severe performance degradation. + +#### How to Set Criteria Correctly + +**Pattern 1: Targeting Specific Instances (Most Common)** + +When you only need to send messages to specific instances and don't need resource/partition-level filtering: + +```java +// GOOD: Efficient - Uses LIVEINSTANCES, avoids ExternalView scan +Criteria criteria = new Criteria(); +criteria.setInstanceName("instance123"); // or "%" for all live instances +criteria.setRecipientInstanceType(InstanceType.PARTICIPANT); +criteria.setDataSource(DataSource.LIVEINSTANCES); // Key: Use LIVEINSTANCES +criteria.setSessionSpecific(true); +``` + +```java +// BAD: Inefficient - Scans ALL ExternalViews even though targeting specific instance +Criteria criteria = new Criteria(); +criteria.setInstanceName("instance123"); +criteria.setRecipientInstanceType(InstanceType.PARTICIPANT); +criteria.setDataSource(DataSource.EXTERNALVIEW); // Will scan ALL resources! +criteria.setResource("%"); // Wildcard triggers full scan +``` + +**Pattern 2: Targeting Specific Resource and Partition** + +When you need to send messages based on resource ownership (e.g., all replicas of a partition): + +```java +// GOOD: Efficient - Specifies exact resource name, scans only 1 ExternalView +Criteria criteria = new Criteria(); +criteria.setInstanceName("%"); +criteria.setRecipientInstanceType(InstanceType.PARTICIPANT); +criteria.setDataSource(DataSource.EXTERNALVIEW); +criteria.setResource("MyDB"); // Exact resource name - scans only this EV +criteria.setPartition("MyDB_0"); // Specific partition +criteria.setPartitionState("MASTER"); // Only send to masters +criteria.setSessionSpecific(true); +``` + +```java +// BAD: Inefficient - Wildcard resource scans ALL ExternalViews +Criteria criteria = new Criteria(); +criteria.setInstanceName("%"); +criteria.setRecipientInstanceType(InstanceType.PARTICIPANT); +criteria.setDataSource(DataSource.EXTERNALVIEW); +criteria.setResource("%"); // Wildcard scans ALL ExternalViews in cluster! +criteria.setPartition("MyDB_0"); +criteria.setSessionSpecific(true); ``` - ClusterMessagingService messagingService = manager.getMessagingService(); - //CONSTRUCT THE MESSAGE - Message requestBackupUriRequest = new Message( - MessageType.USER_DEFINE_MSG, UUID.randomUUID().toString()); - requestBackupUriRequest - .setMsgSubType(BootstrapProcess.REQUEST_BOOTSTRAP_URL); - requestBackupUriRequest.setMsgState(MessageState.NEW); - //SET THE RECIPIENT CRITERIA, All nodes that satisfy the criteria will receive the message - Criteria recipientCriteria = new Criteria(); - recipientCriteria.setInstanceName("%"); - recipientCriteria.setRecipientInstanceType(InstanceType.PARTICIPANT); - recipientCriteria.setResource("MyDB"); - recipientCriteria.setPartition(""); - //Should be processed only the process that is active at the time of sending the message. - //This means if the recipient is restarted after message is sent, it will not be processed. - recipientCriteria.setSessionSpecific(true); - // wait for 30 seconds - int timeout = 30000; - //The handler that will be invoked when any recipient responds to the message. - BootstrapReplyHandler responseHandler = new BootstrapReplyHandler(); - //This will return only after all recipients respond or after timeout. - int sentMessageCount = messagingService.sendAndWait(recipientCriteria, - requestBackupUriRequest, responseHandler, timeout); + +**Pattern 3: Broadcasting to All Live Instances** + +```java +// GOOD: Efficient broadcast to all live participants +Criteria criteria = new Criteria(); +criteria.setInstanceName("%"); +criteria.setRecipientInstanceType(InstanceType.PARTICIPANT); +criteria.setDataSource(DataSource.LIVEINSTANCES); // Fast broadcast +criteria.setSessionSpecific(true); +``` + +#### Criteria Configuration Reference + +The `Criteria` class provides the following configuration methods: + +| Method | Parameter | Description | Wildcard Support | Example | +|--------|-----------|-------------|------------------|---------| +| `setDataSource(DataSource)` | LIVEINSTANCES, INSTANCES, EXTERNALVIEW, IDEALSTATES | **MOST IMPORTANT:** Determines which znodes to read | N/A | `DataSource.LIVEINSTANCES` | +| `setInstanceName(String)` | Instance name | Target specific instance(s) by name | Yes (`%` = all) | `"localhost_12918"` or `"%"` | +| `setResource(String)` | Resource name | Filter by resource name (only meaningful for EXTERNALVIEW/IDEALSTATES) | Yes (`%` = all) | `"MyDatabase"` or `"%"` | +| `setPartition(String)` | Partition name | Filter by specific partition (only meaningful for EXTERNALVIEW/IDEALSTATES) | Yes (`%` = all) | `"MyDatabase_0"` or `"%"` | +| `setPartitionState(String)` | State name | Filter by replica state like MASTER, SLAVE, ONLINE, OFFLINE (only for EXTERNALVIEW/IDEALSTATES) | Yes (`%` = all) | `"MASTER"` or `"%"` | +| `setRecipientInstanceType(InstanceType)` | PARTICIPANT, CONTROLLER, SPECTATOR | Type of Helix process to target | No | `InstanceType.PARTICIPANT` | +| `setSessionSpecific(boolean)` | true/false | If true, message is only delivered to currently active sessions (not redelivered after restart) | No | `true` (recommended) | + +**Important Notes:** + +- **Wildcards:** Use `%` (SQL-style) or `*` to match all. Single underscore `_` matches any single character. +- **DataSource Compatibility:** Setting `resource`, `partition`, or `partitionState` only makes sense with `EXTERNALVIEW` or `IDEALSTATES` DataSource. They are ignored for `LIVEINSTANCES` and `INSTANCES`. +- **Session-Specific:** Set to `true` for most use cases to avoid redelivering messages after a participant restarts. +- **Empty vs Wildcard:** Empty string `""` and wildcard `"%"` are treated the same - both match all. + +**DataSource and Criteria Field Compatibility:** + +| DataSource | Works With | Ignores | +|------------|-----------|---------| +| LIVEINSTANCES | instanceName, recipientInstanceType, sessionSpecific | resource, partition, partitionState | +| INSTANCES | instanceName, recipientInstanceType, sessionSpecific | resource, partition, partitionState | +| EXTERNALVIEW | All fields | - | +| IDEALSTATES | All fields | - | + +**Key Rules:** +1. **Always set DataSource explicitly** - Don't rely on defaults +2. **Use LIVEINSTANCES when possible** - Avoid ExternalView unless you need resource/partition filtering +3. **Specify exact resource names** - Avoid wildcards when using EXTERNALVIEW +4. **Test at scale** - Performance degrades non-linearly with resource count + +#### Example: Bootstrap Replica from Peers + +```java +ClusterMessagingService messagingService = manager.getMessagingService(); + +// Construct the message +Message requestBackupUriRequest = new Message( + MessageType.USER_DEFINE_MSG, UUID.randomUUID().toString()); +requestBackupUriRequest + .setMsgSubType(BootstrapProcess.REQUEST_BOOTSTRAP_URL); +requestBackupUriRequest.setMsgState(MessageState.NEW); + +// Set recipient criteria to find all replicas of specific partition +Criteria recipientCriteria = new Criteria(); +recipientCriteria.setInstanceName("%"); +recipientCriteria.setRecipientInstanceType(InstanceType.PARTICIPANT); +recipientCriteria.setDataSource(DataSource.EXTERNALVIEW); // Need resource-level info +recipientCriteria.setResource("MyDB"); // IMPORTANT: Specify exact resource, not "%" +recipientCriteria.setPartition("MyDB_0"); // Target specific partition +recipientCriteria.setSessionSpecific(true); + +// Wait for 30 seconds +int timeout = 30000; + +// Handler invoked when any recipient responds +BootstrapReplyHandler responseHandler = new BootstrapReplyHandler(); + +// Returns only after all recipients respond or after timeout +int sentMessageCount = messagingService.sendAndWait(recipientCriteria, + requestBackupUriRequest, responseHandler, timeout); ``` See HelixManager.getMessagingService for more info. diff --git a/website/1.4.3/src/site/markdown/tutorial_messaging.md b/website/1.4.3/src/site/markdown/tutorial_messaging.md index 68135762bc..d9270dc8b6 100644 --- a/website/1.4.3/src/site/markdown/tutorial_messaging.md +++ b/website/1.4.3/src/site/markdown/tutorial_messaging.md @@ -25,6 +25,26 @@ under the License. In this chapter, we\'ll learn about messaging, a convenient feature in Helix for sending messages between nodes of a cluster. This is an interesting feature that is quite useful in practice. It is common that nodes in a distributed system require a mechanism to interact with each other. +### Performance Considerations + +**IMPORTANT:** When using the messaging API with `Criteria`, be aware of the following performance characteristics: + +- **ExternalView Scanning:** By default, the messaging service uses `DataSource.EXTERNALVIEW` to resolve criteria. This can scan **all** ExternalView znodes in the cluster, even when targeting specific instances. At high resource cardinality (thousands of resources), this can cause severe performance degradation. + +**Recommended Patterns:** + +- **Use `DataSource.LIVEINSTANCES`** when you only need to target live instances and do not require resource/partition-level filtering. This is much faster and more efficient. +- **Specify exact resource names** instead of wildcards if you must use ExternalView scanning. + +Example of efficient messaging: +```java +Criteria recipientCriteria = new Criteria(); +recipientCriteria.setInstanceName("instance123"); +recipientCriteria.setRecipientInstanceType(InstanceType.PARTICIPANT); +recipientCriteria.setDataSource(DataSource.LIVEINSTANCES); // Efficient: avoids EV scan +recipientCriteria.setSessionSpecific(true); +``` + ### Example: Bootstrapping a Replica Consider a search system where the index replica starts up and it does not have an index. A typical solution is to get the index from a common location, or to copy the index from another replica. From def0fade3a638cafa7529de041ae4683a5ab6671 Mon Sep 17 00:00:00 2001 From: Pranavi Ancha <99163324+PranaviAncha@users.noreply.github.com> Date: Fri, 5 Dec 2025 15:35:07 +0530 Subject: [PATCH 2/4] Apply suggestions from code review --- website/1.4.3/src/site/markdown/tutorial_messaging.md | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/website/1.4.3/src/site/markdown/tutorial_messaging.md b/website/1.4.3/src/site/markdown/tutorial_messaging.md index d9270dc8b6..1bc2a20a9d 100644 --- a/website/1.4.3/src/site/markdown/tutorial_messaging.md +++ b/website/1.4.3/src/site/markdown/tutorial_messaging.md @@ -29,7 +29,7 @@ In this chapter, we\'ll learn about messaging, a convenient feature in Helix for **IMPORTANT:** When using the messaging API with `Criteria`, be aware of the following performance characteristics: -- **ExternalView Scanning:** By default, the messaging service uses `DataSource.EXTERNALVIEW` to resolve criteria. This can scan **all** ExternalView znodes in the cluster, even when targeting specific instances. At high resource cardinality (thousands of resources), this can cause severe performance degradation. +- **ExternalView Scanning:** By default, the messaging service uses `DataSource.EXTERNALVIEW` to resolve criteria. This can scan **all** ExternalView znodes in the cluster, even when targeting specific instances. At high resource cardinality, this can cause severe performance degradation. **Recommended Patterns:** @@ -41,7 +41,7 @@ Example of efficient messaging: Criteria recipientCriteria = new Criteria(); recipientCriteria.setInstanceName("instance123"); recipientCriteria.setRecipientInstanceType(InstanceType.PARTICIPANT); -recipientCriteria.setDataSource(DataSource.LIVEINSTANCES); // Efficient: avoids EV scan +recipientCriteria.setDataSource(DataSource.LIVEINSTANCES); recipientCriteria.setSessionSpecific(true); ``` From 4d02cf359420c330ca89106f7f04864de29577af Mon Sep 17 00:00:00 2001 From: Pranavi Ancha Date: Fri, 5 Dec 2025 15:46:27 +0530 Subject: [PATCH 3/4] Document performance characteristics and safe usage patterns for messaging API --- .../apache/helix/ClusterMessagingService.java | 27 +----- .../main/java/org/apache/helix/Criteria.java | 84 +++++-------------- .../java/org/apache/helix/HelixManager.java | 2 + .../helix/messaging/CriteriaEvaluator.java | 2 +- .../apache/helix/messaging/package-info.java | 6 +- website/0.9.9/src/site/markdown/Features.md | 61 ++------------ .../src/site/markdown/tutorial_messaging.md | 2 +- website/1.3.2/src/site/markdown/Features.md | 61 ++------------ .../src/site/markdown/tutorial_messaging.md | 2 +- website/1.4.3/src/site/markdown/Features.md | 61 ++------------ 10 files changed, 57 insertions(+), 251 deletions(-) diff --git a/helix-core/src/main/java/org/apache/helix/ClusterMessagingService.java b/helix-core/src/main/java/org/apache/helix/ClusterMessagingService.java index e44d40392f..09c82b2fa2 100644 --- a/helix-core/src/main/java/org/apache/helix/ClusterMessagingService.java +++ b/helix-core/src/main/java/org/apache/helix/ClusterMessagingService.java @@ -40,7 +40,7 @@ public interface ClusterMessagingService { * *

PERFORMANCE WARNING: When recipientCriteria uses {@link DataSource#EXTERNALVIEW} * with wildcard or unspecified resource names, this scans ALL ExternalView znodes in the cluster, - * regardless of other criteria like instanceName. At scale (thousands of resources), this causes + * regardless of other criteria like instanceName. At scale, this causes * severe performance degradation. Use {@link DataSource#LIVEINSTANCES} when you don't need * resource/partition filtering, or specify exact resource names when using EXTERNALVIEW. * @@ -62,10 +62,6 @@ public interface ClusterMessagingService { * This is useful when message need to be sent and current thread need not * wait for response since processing will be done in another thread. * - *

PERFORMANCE WARNING: See performance considerations in {@link #send(Criteria, Message)}. - * Using {@link DataSource#EXTERNALVIEW} with wildcard resources can scan all ExternalView znodes - * and cause severe performance issues at scale. Prefer {@link DataSource#LIVEINSTANCES} when possible. - * * @see #send(Criteria, Message) * @param recipientCriteria * @param message @@ -76,10 +72,6 @@ public interface ClusterMessagingService { int send(Criteria recipientCriteria, Message message, AsyncCallback callbackOnReply, int timeOut); /** - *

PERFORMANCE WARNING: See performance considerations in {@link #send(Criteria, Message)}. - * Using {@link DataSource#EXTERNALVIEW} with wildcard resources can scan all ExternalView znodes - * and cause severe performance issues at scale. Prefer {@link DataSource#LIVEINSTANCES} when possible. - * * @see #send(Criteria, Message, AsyncCallback, int) * @param recipientCriteria * @param message @@ -102,11 +94,7 @@ int send(Criteria recipientCriteria, Message message, AsyncCallback callbackOnRe * The current thread can use callbackOnReply instance to store application * specific data. * - *

PERFORMANCE WARNING: See performance considerations in {@link #send(Criteria, Message)}. - * Using {@link DataSource#EXTERNALVIEW} with wildcard resources can scan all ExternalView znodes - * and cause severe performance issues at scale. Prefer {@link DataSource#LIVEINSTANCES} when possible. - * - * @see #send(Criteria, Message, AsyncCallback, int) + * @see #send(Criteria, Message) * @param recipientCriteria * @param message * @param callbackOnReply @@ -117,11 +105,7 @@ int sendAndWait(Criteria recipientCriteria, Message message, AsyncCallback callb int timeOut); /** - *

PERFORMANCE WARNING: See performance considerations in {@link #send(Criteria, Message)}. - * Using {@link DataSource#EXTERNALVIEW} with wildcard resources can scan all ExternalView znodes - * and cause severe performance issues at scale. Prefer {@link DataSource#LIVEINSTANCES} when possible. - * - * @see #send(Criteria, Message, AsyncCallback, int, int) + * @see #send(Criteria, Message) * @param receipientCriteria * @param message * @param callbackOnReply @@ -169,10 +153,7 @@ int sendAndWait(Criteria receipientCriteria, Message message, AsyncCallback call * This will generate all messages to be sent given the recipientCriteria and MessageTemplate, * the messages are not sent. * - *

PERFORMANCE WARNING: See performance considerations in {@link #send(Criteria, Message)}. - * Using {@link DataSource#EXTERNALVIEW} with wildcard resources can scan all ExternalView znodes - * and cause severe performance issues at scale. Prefer {@link DataSource#LIVEINSTANCES} when possible. - * + * @see #send(Criteria, Message) * @param recipientCriteria criteria to be met, defined as {@link Criteria} * @param messageTemplate the Message on which to base the messages to send * @return messages to be sent, grouped by the type of instance to send the message to diff --git a/helix-core/src/main/java/org/apache/helix/Criteria.java b/helix-core/src/main/java/org/apache/helix/Criteria.java index 61d495721c..02e1117bed 100644 --- a/helix-core/src/main/java/org/apache/helix/Criteria.java +++ b/helix-core/src/main/java/org/apache/helix/Criteria.java @@ -22,75 +22,46 @@ /** * Specifies recipient criteria for message delivery in a Helix cluster. * - *

The {@link Criteria} object defines which instances should receive a message by specifying - * attributes like instance name, resource, partition, and state. The most critical configuration - * is {@link DataSource}, which determines where Helix looks up cluster state to resolve recipients. - * *

PERFORMANCE WARNING: Using {@link DataSource#EXTERNALVIEW} with wildcard or unspecified * resource names causes Helix to scan ALL ExternalView znodes in the cluster, regardless of other - * criteria fields. At scale (thousands of resources), this causes severe performance degradation. + * criteria fields. At scale, this causes severe performance degradation. * - *

Quick Start - Common Patterns: + *

Example - Efficient Pattern: *

- * // Pattern 1: Send to specific live instance (most efficient)
+ * // GOOD: Target specific live instance
  * Criteria criteria = new Criteria();
  * criteria.setInstanceName("host_1234");
  * criteria.setRecipientInstanceType(InstanceType.PARTICIPANT);
- * criteria.setDataSource(DataSource.LIVEINSTANCES);
+ * criteria.setDataSource(DataSource.LIVEINSTANCES);  // Fast
  * criteria.setSessionSpecific(true);
  * 
- * // Pattern 2: Send to all replicas of a specific partition
+ * // BAD: Wildcard resource with ExternalView
  * Criteria criteria = new Criteria();
- * criteria.setInstanceName("%");
- * criteria.setRecipientInstanceType(InstanceType.PARTICIPANT);
+ * criteria.setInstanceName("host_1234");
  * criteria.setDataSource(DataSource.EXTERNALVIEW);
- * criteria.setResource("MyDatabase");  // IMPORTANT: Specify exact resource name
- * criteria.setPartition("MyDatabase_5");
- * criteria.setSessionSpecific(true);
- * 
- * // Pattern 3: Broadcast to all live instances
- * Criteria criteria = new Criteria();
- * criteria.setInstanceName("%");
- * criteria.setRecipientInstanceType(InstanceType.PARTICIPANT);
- * criteria.setDataSource(DataSource.LIVEINSTANCES);
- * criteria.setSessionSpecific(true);
+ * criteria.setResource("%");  // Scans ALL ExternalViews!
  * 
* - *

DataSource Selection Guide: + *

DataSource Selection: *

* * @see ClusterMessagingService#send(Criteria, org.apache.helix.model.Message) - * @see org.apache.helix.messaging.CriteriaEvaluator */ public class Criteria { /** * Specifies the source of cluster state information for resolving message recipients. * - *

The DataSource determines which ZooKeeper znodes Helix reads to match the criteria: - *

- * - *

Performance Impact: LIVEINSTANCES is fastest as it reads minimal data. EXTERNALVIEW - * and IDEALSTATES can be slow at scale if wildcards are used in resource names, as Helix must - * read and deserialize all resource znodes to match the criteria. + *

LIVEINSTANCES: Reads /LIVEINSTANCES - currently connected instances only. Fastest.
+ * EXTERNALVIEW: Reads /EXTERNALVIEWS/[resource] - actual replica placement/states. + * Wildcard resource names scan ALL resources.
+ * INSTANCES: Reads /INSTANCES/[instance] - instance configuration.
+ * IDEALSTATES: Reads /IDEALSTATES/[resource] - desired replica placement. */ public enum DataSource { IDEALSTATES, @@ -151,13 +122,8 @@ public DataSource getDataSource() { /** * Set the current source of truth for resolving message recipients. * - *

PERFORMANCE GUIDANCE: - *

+ *

Prefer {@link DataSource#LIVEINSTANCES} when not filtering by resource/partition. + * If using {@link DataSource#EXTERNALVIEW}, specify exact resource names to avoid full scans. * * @param source ideal state, external view, live instances, or instances */ @@ -241,14 +207,10 @@ public String getResource() { /** * Set the destination resource name. * - *

Note: This field is only meaningful when using {@link DataSource#EXTERNALVIEW} or - * {@link DataSource#IDEALSTATES}. It is ignored for LIVEINSTANCES and INSTANCES. - * - *

PERFORMANCE: When using EXTERNALVIEW, specifying an exact resource name (e.g., "MyDatabase") - * reads only that resource's ExternalView znode. Using wildcard "%" reads ALL ExternalView znodes - * in the cluster, which can cause severe performance issues at scale. + *

Only meaningful for {@link DataSource#EXTERNALVIEW} or {@link DataSource#IDEALSTATES}. + * Using wildcard "%" with EXTERNALVIEW reads ALL ExternalView znodes - use exact names instead. * - * @param resourceName the exact resource name, or "%" for all resources (avoid wildcard at scale) + * @param resourceName the exact resource name, or "%" for all resources */ public void setResource(String resourceName) { this.resourceName = resourceName; diff --git a/helix-core/src/main/java/org/apache/helix/HelixManager.java b/helix-core/src/main/java/org/apache/helix/HelixManager.java index c1d2ad18c5..cf3378314c 100644 --- a/helix-core/src/main/java/org/apache/helix/HelixManager.java +++ b/helix-core/src/main/java/org/apache/helix/HelixManager.java @@ -409,6 +409,8 @@ void addExternalViewChangeListener(org.apache.helix.ExternalViewChangeListener l /** * Messaging service which can be used to send cluster wide messages. + * See {@link ClusterMessagingService#send(Criteria, org.apache.helix.model.Message)} for usage. + * * @return messaging service */ ClusterMessagingService getMessagingService(); diff --git a/helix-core/src/main/java/org/apache/helix/messaging/CriteriaEvaluator.java b/helix-core/src/main/java/org/apache/helix/messaging/CriteriaEvaluator.java index 2e56b26af7..5ef64f4c8f 100644 --- a/helix-core/src/main/java/org/apache/helix/messaging/CriteriaEvaluator.java +++ b/helix-core/src/main/java/org/apache/helix/messaging/CriteriaEvaluator.java @@ -47,7 +47,7 @@ * (e.g., "%" or "*"). This scanning happens even when targeting specific instances, and is * NOT automatically optimized based on other criteria fields (like instanceName). * - *

At high ExternalView cardinality (thousands of resources), this can cause severe performance degradation. + *

At high ExternalView cardinality, this can cause severe performance degradation. * *

Safer Patterns: *