Skip to content

Commit 8317760

Browse files
authored
Merge pull request #162 from abicky/integrate-with-opentelemetry
Integrate with OpenTelemetry
2 parents 8038f3e + ac09998 commit 8317760

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

57 files changed

+2026
-282
lines changed

.github/workflows/test.yml

Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -56,6 +56,13 @@ jobs:
5656
needs:
5757
- relayer-build
5858
- tendermint-build
59+
strategy:
60+
matrix:
61+
enable-telemetry:
62+
- 'false'
63+
- 'true'
64+
env:
65+
YRLY_ENABLE_TELEMETRY: ${{ matrix.enable-telemetry }}
5966
steps:
6067
- uses: actions/checkout@v4
6168
- name: Restore relayer binary cache
@@ -87,6 +94,13 @@ jobs:
8794
needs:
8895
- relayer-build
8996
- tendermint-build
97+
strategy:
98+
matrix:
99+
enable-telemetry:
100+
- 'false'
101+
- 'true'
102+
env:
103+
YRLY_ENABLE_TELEMETRY: ${{ matrix.enable-telemetry }}
90104
steps:
91105
- uses: actions/checkout@v4
92106
- name: Restore relayer binary cache

README.md

Lines changed: 105 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -103,3 +103,108 @@ You can use it by specifying the package name of the proto definition correspond
103103
}
104104
}
105105
```
106+
107+
## OpenTelemetry integration
108+
109+
OpenTelemetry integration can be enabled by specifying the `--enable-telemetry` flag or by setting `YRLY_ENABLE_TELEMETRY` environment variable to true.
110+
To see an example setup, refer to [examples/opentelemetry-integration](examples/opentelemetry-integration).
111+
112+
### Configurations
113+
114+
You can configure its behavior using environment variables supported by the Go SDK, as listed in the [Compliance of Implementations with Specification](https://github.com/open-telemetry/opentelemetry-specification/blob/main/spec-compliance-matrix.md#environment-variables).
115+
116+
In addition to these environment variables, yui-relayer supports the following variables, which are not available in the Go SDK:
117+
118+
* OTEL_PROPAGATORS
119+
* OTEL_TRACES_EXPORTER
120+
- Note that `"zipkin"` is not supported
121+
* OTEL_METRICS_EXPORTER
122+
* OTEL_LOGS_EXPORTER
123+
* OTEL_EXPORTER_PROMETHEUS_HOST
124+
* OTEL_EXPORTER_PROMETHEUS_PORT
125+
* OTEL_EXPORTER_CONSOLE_TRACES_WRITER
126+
* OTEL_EXPORTER_CONSOLE_LOGS_WRITER
127+
* OTEL_EXPORTER_CONSOLE_METRICS_WRITER
128+
129+
The `OTEL_EXPORTER_CONSOLE_*_WRITER` variables are specific to yui-relayer and allow you to change the output destination of the standard output exporters. To redirect output to standard error, set the value to `stderr`.
130+
131+
For more information about OpenTelemetry environment variables, refer to the [OpenTelemetry Environment Variable Specification](https://opentelemetry.io/docs/specs/otel/configuration/sdk-environment-variables).
132+
133+
134+
When OpenTelemetry integration is enabled, the OTLP log exporter is enabled by default and you may want to disable ordinal logs.
135+
In this case, you can disable them by setting `.global.logger.output` to `"null"` in the yui-relayer configuration file.
136+
137+
### Add spans and span attributes in external modules
138+
139+
#### Using tracing bridges
140+
141+
The Relayer provides OpenTelemetry tracing bridges: `otelcore.Chain` and `otelcore.Prover`.
142+
These bridges add tracing to the primary methods defined in the Chain and Prover interfaces.
143+
You can use the tracing bridges by returning them in `ChainConfig.Build` and `ProverConfig.Build`:
144+
145+
```go
146+
var tracer = otel.Tracer("example.com/my-module")
147+
148+
func (c ChainConfig) Build() (core.Chain, error) {
149+
chain := buildChainFromConfig(c)
150+
return otelcore.NewChain(chain, tracer), nil
151+
}
152+
153+
func (c ProverConfig) Build(chain core.Chain) (core.Prover, error) {
154+
prover := buildProverFromConfig(c)
155+
return otelcore.NewProver(prover, chain.ChainID(), tracer), nil
156+
}
157+
```
158+
159+
If you need to access the original Chain and Prover implementations, you can use `coreutil.UnwrapChain` and `coreutil.UnwrapProver`:
160+
161+
```go
162+
// The case where a ProvableChain contains a chain struct (module.Chain)
163+
chain, err := coreutil.UnwrapChain[module.Chain](provableChain)
164+
165+
// The case where a ProvableChain contains a chain struct pointer (*module.Chain)
166+
chainPtr, err := coreutil.UnwrapChain[*module.Chain](provableChain)
167+
```
168+
169+
Note that, if you call methods defined in your Chain module and Prover module directly, tracing data will not be recorded.
170+
171+
#### Manual tracing
172+
173+
In addition to using the tracing bridges, you can manually create spans when needed:
174+
175+
```go
176+
var tracer = otel.Tracer("example.com/my-module")
177+
178+
func someFunction(ctx context.Context) {
179+
ctx, span := tracer.Start(ctx, "someFunction")
180+
defer span.End()
181+
182+
// -- snip --
183+
}
184+
```
185+
186+
If a function or method receives a `core.QueryContext`, you can use `core.StartTraceWithQueryContext` to create a span:
187+
188+
```go
189+
func (c *Chain) QuerySomething(ctx core.QueryContext) (any, error) {
190+
ctx, span := core.StartTraceWithQueryContext(tracer, ctx, "Chain.QuerySomething", core.WithChainAttributes(c.ChainID()))
191+
defer span.End()
192+
193+
// -- snip --
194+
```
195+
196+
You can also add span attributes as follows:
197+
198+
```go
199+
func (c *Chain) GetMsgResult(ctx context.Context, id core.MsgID) (core.MsgResult, error) {
200+
msgID, ok := id.(*MsgID)
201+
if !ok {
202+
return nil, fmt.Errorf("unexpected message id type: %T", id)
203+
}
204+
205+
span := trace.SpanFromContext(ctx)
206+
span.SetAttributes(semconv.TxHashKey.String(msgID.TxHash))
207+
208+
// -- snip --
209+
}
210+
```

chains/tendermint/chain.go

Lines changed: 23 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -12,6 +12,10 @@ import (
1212

1313
"cosmossdk.io/errors"
1414
"github.com/avast/retry-go"
15+
"github.com/hyperledger-labs/yui-relayer/otelcore/semconv"
16+
"go.opentelemetry.io/contrib/instrumentation/net/http/otelhttp"
17+
"go.opentelemetry.io/otel/codes"
18+
"go.opentelemetry.io/otel/trace"
1519

1620
rpcclient "github.com/cometbft/cometbft/rpc/client"
1721
rpchttp "github.com/cometbft/cometbft/rpc/client/http"
@@ -182,6 +186,7 @@ func (c *Chain) sendMsgs(ctx context.Context, msgs []sdk.Msg) (*sdk.TxResponse,
182186
// CheckTx failed
183187
return nil, fmt.Errorf("CheckTx failed: %v", errors.ABCIError(res.Codespace, res.Code, res.RawLog))
184188
}
189+
trace.SpanFromContext(ctx).SetAttributes(semconv.TxHashKey.String(res.TxHash))
185190

186191
// wait for tx being committed
187192
if resTx, err := c.waitForCommit(ctx, res.TxHash); err != nil {
@@ -203,6 +208,9 @@ func (c *Chain) sendMsgs(ctx context.Context, msgs []sdk.Msg) (*sdk.TxResponse,
203208
}
204209

205210
func (c *Chain) rawSendMsgs(ctx context.Context, msgs []sdk.Msg) (*sdk.TxResponse, bool, error) {
211+
ctx, span := tracer.Start(ctx, "Chain.rawSendMsgs", core.WithChainAttributes(c.ChainID()))
212+
defer span.End()
213+
206214
// Instantiate the client context
207215
// NOTE: Although cosmos-sdk does not currently use CmdContext in Context.QueryWithData,
208216
// set ctx to clientCtx in case cosmos-sdk uses it in the future.
@@ -212,6 +220,7 @@ func (c *Chain) rawSendMsgs(ctx context.Context, msgs []sdk.Msg) (*sdk.TxRespons
212220
// Query account details
213221
txf, err := prepareFactory(clientCtx, c.TxFactory(0))
214222
if err != nil {
223+
span.SetStatus(codes.Error, err.Error())
215224
return nil, false, err
216225
}
217226

@@ -220,6 +229,7 @@ func (c *Chain) rawSendMsgs(ctx context.Context, msgs []sdk.Msg) (*sdk.TxRespons
220229
// If users pass gas adjustment, then calculate gas
221230
_, adjusted, err := CalculateGas(clientCtx.QueryWithData, txf, msgs...)
222231
if err != nil {
232+
span.SetStatus(codes.Error, err.Error())
223233
return nil, false, err
224234
}
225235

@@ -229,35 +239,41 @@ func (c *Chain) rawSendMsgs(ctx context.Context, msgs []sdk.Msg) (*sdk.TxRespons
229239
// Build the transaction builder
230240
txb, err := txf.BuildUnsignedTx(msgs...)
231241
if err != nil {
242+
span.SetStatus(codes.Error, err.Error())
232243
return nil, false, err
233244
}
234245

235246
// Attach the signature to the transaction
236247
err = tx.Sign(ctx, txf, c.config.Key, txb, false)
237248
if err != nil {
249+
span.SetStatus(codes.Error, err.Error())
238250
return nil, false, err
239251
}
240252

241253
// Generate the transaction bytes
242254
txBytes, err := clientCtx.TxConfig.TxEncoder()(txb.GetTx())
243255
if err != nil {
256+
span.SetStatus(codes.Error, err.Error())
244257
return nil, false, err
245258
}
246259

247260
// Broadcast those bytes
248261
res, err := clientCtx.BroadcastTx(txBytes)
249262
if err != nil {
263+
span.SetStatus(codes.Error, err.Error())
250264
return nil, false, err
251265
}
252266

253267
// transaction was executed, log the success or failure using the tx response code
254268
// NOTE: error is nil, logic should use the returned error to determine if the
255269
// transaction was successfully executed.
256270
if res.Code != 0 {
271+
span.SetStatus(codes.Error, "non-zero response code")
257272
c.LogFailedTx(res, err, msgs)
258273
return res, false, nil
259274
}
260275

276+
span.SetAttributes(semconv.TxHashKey.String(res.TxHash))
261277
c.LogSuccessTx(res, msgs)
262278
return res, true, nil
263279
}
@@ -429,6 +445,9 @@ func (c *Chain) GetMsgResult(ctx context.Context, id core.MsgID) (core.MsgResult
429445
return nil, fmt.Errorf("unexpected message id type: %T", id)
430446
}
431447

448+
span := trace.SpanFromContext(ctx)
449+
span.SetAttributes(semconv.TxHashKey.String(msgID.TxHash))
450+
432451
// find tx
433452
resTx, err := c.waitForCommit(ctx, msgID.TxHash)
434453
if err != nil {
@@ -558,6 +577,10 @@ func newRPCClient(addr string, timeout time.Duration) (*rpchttp.HTTP, error) {
558577
return nil, err
559578
}
560579

580+
// NOTE: The Cosmos SDK typically does not propagate parent contexts when making JSON-RPC calls,
581+
// so most spans recorded by otelhttp do not have a parent span.
582+
// cf. https://github.com/cosmos/cosmos-sdk/issues/24500
583+
httpClient.Transport = otelhttp.NewTransport(httpClient.Transport)
561584
httpClient.Timeout = timeout
562585
rpcClient, err := rpchttp.NewWithClient(addr, "/websocket", httpClient)
563586
if err != nil {

chains/tendermint/cmd/keys.go

Lines changed: 21 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,7 @@ import (
77
"github.com/cosmos/cosmos-sdk/crypto/hd"
88
"github.com/hyperledger-labs/yui-relayer/chains/tendermint"
99
"github.com/hyperledger-labs/yui-relayer/config"
10+
"github.com/hyperledger-labs/yui-relayer/coreutil"
1011
"github.com/spf13/cobra"
1112
)
1213

@@ -40,7 +41,11 @@ func keysAddCmd(ctx *config.Context) *cobra.Command {
4041
if err != nil {
4142
return err
4243
}
43-
chain := c.Chain.(*tendermint.Chain)
44+
45+
chain, err := coreutil.UnwrapChain[*tendermint.Chain](c)
46+
if err != nil {
47+
return fmt.Errorf("Chain %q is not a tendermint.Chain", args[0])
48+
}
4449

4550
var keyName string
4651
if len(args) == 2 {
@@ -100,7 +105,11 @@ func keysRestoreCmd(ctx *config.Context) *cobra.Command {
100105
if err != nil {
101106
return err
102107
}
103-
chain := c.Chain.(*tendermint.Chain)
108+
109+
chain, err := coreutil.UnwrapChain[*tendermint.Chain](c)
110+
if err != nil {
111+
return fmt.Errorf("Chain %q is not a tendermint.Chain", args[0])
112+
}
104113

105114
if chain.KeyExists(keyName) {
106115
return errKeyExists(keyName)
@@ -136,7 +145,11 @@ func keysShowCmd(ctx *config.Context) *cobra.Command {
136145
if err != nil {
137146
return err
138147
}
139-
chain := c.Chain.(*tendermint.Chain)
148+
149+
chain, err := coreutil.UnwrapChain[*tendermint.Chain](c)
150+
if err != nil {
151+
return fmt.Errorf("Chain %q is not a tendermint.Chain", args[0])
152+
}
140153

141154
var keyName string
142155
if len(args) == 2 {
@@ -177,7 +190,11 @@ func keysListCmd(ctx *config.Context) *cobra.Command {
177190
if err != nil {
178191
return err
179192
}
180-
chain := c.Chain.(*tendermint.Chain)
193+
194+
chain, err := coreutil.UnwrapChain[*tendermint.Chain](c)
195+
if err != nil {
196+
return fmt.Errorf("Chain %q is not a tendermint.Chain", args[0])
197+
}
181198

182199
info, err := chain.Keybase.List()
183200
if err != nil {

chains/tendermint/cmd/light.go

Lines changed: 29 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,7 @@ import (
88
tmclient "github.com/cosmos/ibc-go/v8/modules/light-clients/07-tendermint"
99
"github.com/hyperledger-labs/yui-relayer/chains/tendermint"
1010
"github.com/hyperledger-labs/yui-relayer/config"
11+
"github.com/hyperledger-labs/yui-relayer/coreutil"
1112
"github.com/spf13/cobra"
1213
)
1314

@@ -40,8 +41,15 @@ func initLightCmd(ctx *config.Context) *cobra.Command {
4041
if err != nil {
4142
return err
4243
}
43-
chain := c.Chain.(*tendermint.Chain)
44-
prover := c.Prover.(*tendermint.Prover)
44+
45+
chain, err := coreutil.UnwrapChain[*tendermint.Chain](c)
46+
if err != nil {
47+
return fmt.Errorf("Chain %q is not a tendermint.Chain", args[0])
48+
}
49+
prover, err := coreutil.UnwrapProver[*tendermint.Prover](c)
50+
if err != nil {
51+
return fmt.Errorf("Chain %q is not a tendermint.Prover", args[0])
52+
}
4553

4654
db, df, err := prover.NewLightDB(cmd.Context())
4755
if err != nil {
@@ -96,7 +104,11 @@ func updateLightCmd(ctx *config.Context) *cobra.Command {
96104
if err != nil {
97105
return err
98106
}
99-
prover := c.Prover.(*tendermint.Prover)
107+
108+
prover, err := coreutil.UnwrapProver[*tendermint.Prover](c)
109+
if err != nil {
110+
return fmt.Errorf("Chain %q is not a tendermint.Prover", args[0])
111+
}
100112

101113
bh, err := prover.GetLatestLightHeader(cmd.Context())
102114
if err != nil {
@@ -129,8 +141,15 @@ func lightHeaderCmd(ctx *config.Context) *cobra.Command {
129141
if err != nil {
130142
return err
131143
}
132-
chain := c.Chain.(*tendermint.Chain)
133-
prover := c.Prover.(*tendermint.Prover)
144+
145+
chain, err := coreutil.UnwrapChain[*tendermint.Chain](c)
146+
if err != nil {
147+
return fmt.Errorf("Chain %q is not a tendermint.Chain", args[0])
148+
}
149+
prover, err := coreutil.UnwrapProver[*tendermint.Prover](c)
150+
if err != nil {
151+
return fmt.Errorf("Chain %q is not a tendermint.Prover", args[0])
152+
}
134153

135154
var header *tmclient.Header
136155

@@ -188,7 +207,11 @@ func deleteLightCmd(ctx *config.Context) *cobra.Command {
188207
if err != nil {
189208
return err
190209
}
191-
prover := c.Prover.(*tendermint.Prover)
210+
211+
prover, err := coreutil.UnwrapProver[*tendermint.Prover](c)
212+
if err != nil {
213+
return fmt.Errorf("Chain %q is not a tendermint.Prover", args[0])
214+
}
192215

193216
err = prover.DeleteLightDB()
194217
if err != nil {

0 commit comments

Comments
 (0)