Skip to content

Commit 17f361b

Browse files
committed
Failsafe RetryPolicy instrumentation added
1 parent 603b737 commit 17f361b

File tree

3 files changed

+191
-0
lines changed

3 files changed

+191
-0
lines changed

instrumentation/failsafe-3.0/library/src/main/java/io/opentelemetry/instrumentation/failsafe/v3_0/FailsafeTelemetry.java

Lines changed: 47 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -13,18 +13,25 @@
1313

1414
import dev.failsafe.CircuitBreaker;
1515
import dev.failsafe.CircuitBreakerConfig;
16+
import dev.failsafe.RetryPolicy;
17+
import dev.failsafe.RetryPolicyConfig;
1618
import io.opentelemetry.api.OpenTelemetry;
1719
import io.opentelemetry.api.common.AttributeKey;
1820
import io.opentelemetry.api.common.Attributes;
1921
import io.opentelemetry.api.metrics.LongCounter;
22+
import io.opentelemetry.api.metrics.LongHistogram;
2023
import io.opentelemetry.api.metrics.Meter;
24+
import java.util.stream.Collectors;
25+
import java.util.stream.LongStream;
2126

2227
/** Entrypoint for instrumenting Failsafe components. */
2328
public final class FailsafeTelemetry {
2429
private static final String INSTRUMENTATION_NAME = "io.opentelemetry.failsafe-3.0";
2530

2631
private static final AttributeKey<String> CIRCUIT_BREAKER_NAME =
2732
AttributeKey.stringKey("failsafe.circuit_breaker.name");
33+
private static final AttributeKey<String> RETRY_POLICY_NAME =
34+
AttributeKey.stringKey("failsafe.retry_policy.name");
2835

2936
/** Returns a new {@link FailsafeTelemetry} configured with the given {@link OpenTelemetry}. */
3037
public static FailsafeTelemetry create(OpenTelemetry openTelemetry) {
@@ -70,4 +77,44 @@ public <R> CircuitBreaker<R> createCircuitBreaker(
7077
.onClose(buildInstrumentedCloseListener(userConfig, stateChangesCounter, attributes))
7178
.build();
7279
}
80+
81+
/**
82+
* Returns an instrumented {@link RetryPolicy} by given values.
83+
*
84+
* @param delegate user configured {@link RetryPolicy} to be instrumented
85+
* @param retryPolicyName identifier of given {@link RetryPolicy}
86+
* @param <R> {@link RetryPolicy}'s result type
87+
* @return instrumented {@link RetryPolicy}
88+
*/
89+
public <R> RetryPolicy<R> createRetryPolicy(RetryPolicy<R> delegate, String retryPolicyName) {
90+
RetryPolicyConfig<R> userConfig = delegate.getConfig();
91+
Meter meter = openTelemetry.getMeter(INSTRUMENTATION_NAME);
92+
LongCounter executionCounter =
93+
meter
94+
.counterBuilder("failsafe.retry_policy.execution.count")
95+
.setDescription(
96+
"Count of execution events processed by the retry policy. "
97+
+ "Each event represents one complete execution flow (initial attempt + any retries). "
98+
+ "This metric does not count individual retry attempts - it counts each time the policy is invoked.")
99+
.build();
100+
LongHistogram attemptsHistogram =
101+
meter
102+
.histogramBuilder("failsafe.retry_policy.attempts")
103+
.setDescription("Histogram of number of attempts for each execution.")
104+
.ofLongs()
105+
.setExplicitBucketBoundariesAdvice(
106+
LongStream.range(1, userConfig.getMaxAttempts() + 1)
107+
.boxed()
108+
.collect(Collectors.toList()))
109+
.build();
110+
Attributes attributes = Attributes.of(RETRY_POLICY_NAME, retryPolicyName);
111+
return RetryPolicy.builder(userConfig)
112+
.onFailure(
113+
RetryPolicyEventListenerBuilders.buildInstrumentedFailureListener(
114+
userConfig, executionCounter, attemptsHistogram, attributes))
115+
.onSuccess(
116+
RetryPolicyEventListenerBuilders.buildInstrumentedSuccessListener(
117+
userConfig, executionCounter, attemptsHistogram, attributes))
118+
.build();
119+
}
73120
}
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,53 @@
1+
/*
2+
* Copyright The OpenTelemetry Authors
3+
* SPDX-License-Identifier: Apache-2.0
4+
*/
5+
6+
package io.opentelemetry.instrumentation.failsafe.v3_0;
7+
8+
import static io.opentelemetry.api.common.AttributeKey.stringKey;
9+
10+
import dev.failsafe.RetryPolicyConfig;
11+
import dev.failsafe.event.EventListener;
12+
import dev.failsafe.event.ExecutionCompletedEvent;
13+
import io.opentelemetry.api.common.AttributeKey;
14+
import io.opentelemetry.api.common.Attributes;
15+
import io.opentelemetry.api.metrics.LongCounter;
16+
import io.opentelemetry.api.metrics.LongHistogram;
17+
18+
final class RetryPolicyEventListenerBuilders {
19+
private static final AttributeKey<String> OUTCOME_KEY =
20+
stringKey("failsafe.retry_policy.outcome");
21+
22+
private RetryPolicyEventListenerBuilders() {}
23+
24+
static <R> EventListener<ExecutionCompletedEvent<R>> buildInstrumentedFailureListener(
25+
RetryPolicyConfig<R> userConfig,
26+
LongCounter executionCounter,
27+
LongHistogram attemptsHistogram,
28+
Attributes commonAttributes) {
29+
Attributes attributes = commonAttributes.toBuilder().put(OUTCOME_KEY, "failure").build();
30+
return e -> {
31+
executionCounter.add(1, attributes);
32+
attemptsHistogram.record(e.getAttemptCount(), attributes);
33+
if (userConfig.getFailureListener() != null) {
34+
userConfig.getFailureListener().accept(e);
35+
}
36+
};
37+
}
38+
39+
static <R> EventListener<ExecutionCompletedEvent<R>> buildInstrumentedSuccessListener(
40+
RetryPolicyConfig<R> userConfig,
41+
LongCounter executionCounter,
42+
LongHistogram attemptsHistogram,
43+
Attributes commonAttributes) {
44+
Attributes attributes = commonAttributes.toBuilder().put(OUTCOME_KEY, "success").build();
45+
return e -> {
46+
executionCounter.add(1, attributes);
47+
attemptsHistogram.record(e.getAttemptCount(), attributes);
48+
if (userConfig.getFailureListener() != null) {
49+
userConfig.getFailureListener().accept(e);
50+
}
51+
};
52+
}
53+
}

instrumentation/failsafe-3.0/library/src/test/java/io/opentelemetry/instrumentation/failsafe/v3_0/FailsafeTelemetryTest.java

Lines changed: 91 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -11,12 +11,20 @@
1111
import dev.failsafe.CircuitBreaker;
1212
import dev.failsafe.CircuitBreakerOpenException;
1313
import dev.failsafe.Failsafe;
14+
import dev.failsafe.RetryPolicy;
1415
import io.opentelemetry.api.common.Attributes;
1516
import io.opentelemetry.instrumentation.testing.junit.InstrumentationExtension;
1617
import io.opentelemetry.instrumentation.testing.junit.LibraryInstrumentationExtension;
18+
import io.opentelemetry.sdk.metrics.data.HistogramData;
19+
import io.opentelemetry.sdk.metrics.data.HistogramPointData;
20+
import io.opentelemetry.sdk.metrics.data.LongPointData;
21+
import io.opentelemetry.sdk.metrics.data.SumData;
1722
import io.opentelemetry.sdk.testing.assertj.LongPointAssert;
1823
import java.time.Duration;
24+
import java.util.Arrays;
25+
import java.util.Collection;
1926
import java.util.Objects;
27+
import java.util.concurrent.atomic.AtomicInteger;
2028
import java.util.function.Consumer;
2129
import org.junit.jupiter.api.Test;
2230
import org.junit.jupiter.api.extension.RegisterExtension;
@@ -80,6 +88,82 @@ void captureCircuitBreakerMetrics() {
8088
1, "failsafe.circuit_breaker.state", "closed"))));
8189
}
8290

91+
@Test
92+
void captureRetryPolicyMetrics() {
93+
// given
94+
RetryPolicy<Object> userRetryPolicy =
95+
dev.failsafe.RetryPolicy.builder()
96+
.handleResultIf(Objects::isNull)
97+
.withMaxAttempts(3)
98+
.build();
99+
FailsafeTelemetry failsafeTelemetry = FailsafeTelemetry.create(testing.getOpenTelemetry());
100+
RetryPolicy<Object> instrumentedRetryPolicy =
101+
failsafeTelemetry.createRetryPolicy(userRetryPolicy, "testing");
102+
103+
// when
104+
for (int i = 0; i <= 3; i++) {
105+
int temp = i;
106+
AtomicInteger retry = new AtomicInteger(0);
107+
Failsafe.with(instrumentedRetryPolicy)
108+
.get(
109+
() -> {
110+
if (retry.get() < temp) {
111+
retry.incrementAndGet();
112+
return null;
113+
} else {
114+
return new Object();
115+
}
116+
});
117+
}
118+
119+
// then
120+
testing.waitAndAssertMetrics("io.opentelemetry.failsafe-3.0");
121+
assertThat(testing.metrics().size()).isEqualTo(2);
122+
123+
SumData<LongPointData> executionCountMetric =
124+
testing.metrics().stream()
125+
.filter(m -> m.getName().equals("failsafe.retry_policy.execution.count"))
126+
.findFirst()
127+
.get()
128+
.getLongSumData();
129+
assertThat(executionCountMetric.getPoints().size()).isEqualTo(2);
130+
assertThat(executionCountMetric.getPoints())
131+
.anyMatch(
132+
p ->
133+
p.getAttributes().equals(buildExpectedRetryPolicyAttributes("failure"))
134+
&& p.getValue() == 1);
135+
assertThat(executionCountMetric.getPoints())
136+
.anyMatch(
137+
p ->
138+
p.getAttributes().equals(buildExpectedRetryPolicyAttributes("success"))
139+
&& p.getValue() == 3);
140+
141+
HistogramData attemptsMetric =
142+
testing.metrics().stream()
143+
.filter(m -> m.getName().equals("failsafe.retry_policy.attempts"))
144+
.findFirst()
145+
.get()
146+
.getHistogramData();
147+
Collection<HistogramPointData> pointData = attemptsMetric.getPoints();
148+
assertThat(pointData).hasSize(2);
149+
assertThat(pointData)
150+
.anyMatch(
151+
p ->
152+
p.getCount() == 3
153+
&& p.getMin() == 1
154+
&& p.getMax() == 3
155+
&& p.getAttributes().equals(buildExpectedRetryPolicyAttributes("success"))
156+
&& Arrays.equals(p.getCounts().toArray(), new Long[] {1L, 1L, 1L, 0L}));
157+
assertThat(pointData)
158+
.anyMatch(
159+
p ->
160+
p.getCount() == 1
161+
&& p.getMin() == 3
162+
&& p.getMax() == 3
163+
&& p.getAttributes().equals(buildExpectedRetryPolicyAttributes("failure"))
164+
&& Arrays.equals(p.getCounts().toArray(), new Long[] {0L, 0L, 1L, 0L}));
165+
}
166+
83167
private static Consumer<LongPointAssert> buildCircuitBreakerAssertion(
84168
long expectedValue, String expectedAttributeKey, String expectedAttributeValue) {
85169
return longSumAssert ->
@@ -94,4 +178,11 @@ private static Consumer<LongPointAssert> buildCircuitBreakerAssertion(
94178
.build(),
95179
attributes));
96180
}
181+
182+
private static Attributes buildExpectedRetryPolicyAttributes(String expectedOutcome) {
183+
return Attributes.builder()
184+
.put("failsafe.retry_policy.name", "testing")
185+
.put("failsafe.retry_policy.outcome", expectedOutcome)
186+
.build();
187+
}
97188
}

0 commit comments

Comments
 (0)