@@ -13,6 +13,7 @@ import (
1313 "github.com/prometheus/client_golang/prometheus"
1414
1515 "github.com/ava-labs/avalanchego/ids"
16+ "github.com/ava-labs/avalanchego/utils/constants"
1617 "github.com/ava-labs/avalanchego/utils/math"
1718 "github.com/ava-labs/avalanchego/utils/wrappers"
1819)
@@ -22,11 +23,12 @@ var (
2223)
2324
2425type adaptiveTimeout struct {
25- index int // Index in the wait queue
26- id ids.ID // Unique ID of this timeout
27- handler func () // Function to execute if timed out
28- duration time.Duration // How long this timeout was set for
29- deadline time.Time // When this timeout should be fired
26+ index int // Index in the wait queue
27+ id ids.ID // Unique ID of this timeout
28+ handler func () // Function to execute if timed out
29+ duration time.Duration // How long this timeout was set for
30+ deadline time.Time // When this timeout should be fired
31+ msgType constants.MsgType // Type of this outstanding request
3032}
3133
3234// A timeoutQueue implements heap.Interface and holds adaptiveTimeouts.
@@ -78,6 +80,7 @@ type AdaptiveTimeoutManager struct {
7880 // Tells the time. Can be faked for testing.
7981 clock Clock
8082 networkTimeoutMetric , avgLatency prometheus.Gauge
83+ numTimeouts prometheus.Counter
8184 // Averages the response time from all peers
8285 averager math.Averager
8386 // Timeout is [timeoutCoefficient] * average response time
@@ -103,6 +106,11 @@ func (tm *AdaptiveTimeoutManager) Initialize(config *AdaptiveTimeoutConfig) erro
103106 Name : "avg_network_latency" ,
104107 Help : "Average network latency in nanoseconds" ,
105108 })
109+ tm .numTimeouts = prometheus .NewCounter (prometheus.CounterOpts {
110+ Namespace : config .MetricsNamespace ,
111+ Name : "request_timeouts" ,
112+ Help : "Number of timed out requests" ,
113+ })
106114
107115 switch {
108116 case config .InitialTimeout > config .MaximumTimeout :
@@ -126,6 +134,7 @@ func (tm *AdaptiveTimeoutManager) Initialize(config *AdaptiveTimeoutConfig) erro
126134 errs := & wrappers.Errs {}
127135 errs .Add (config .Registerer .Register (tm .networkTimeoutMetric ))
128136 errs .Add (config .Registerer .Register (tm .avgLatency ))
137+ errs .Add (config .Registerer .Register (tm .numTimeouts ))
129138 return errs .Err
130139}
131140
@@ -145,14 +154,14 @@ func (tm *AdaptiveTimeoutManager) Stop() { tm.timer.Stop() }
145154// Put registers a timeout for [id]. If the timeout occurs, [timeoutHandler] is called.
146155// Returns the time at which the timeout will fire if it is not first
147156// removed by calling [tm.Remove].
148- func (tm * AdaptiveTimeoutManager ) Put (id ids.ID , timeoutHandler func ()) time.Time {
157+ func (tm * AdaptiveTimeoutManager ) Put (id ids.ID , msgType constants. MsgType , timeoutHandler func ()) time.Time {
149158 tm .lock .Lock ()
150159 defer tm .lock .Unlock ()
151- return tm .put (id , timeoutHandler )
160+ return tm .put (id , msgType , timeoutHandler )
152161}
153162
154163// Assumes [tm.lock] is held
155- func (tm * AdaptiveTimeoutManager ) put (id ids.ID , handler func ()) time.Time {
164+ func (tm * AdaptiveTimeoutManager ) put (id ids.ID , msgType constants. MsgType , handler func ()) time.Time {
156165 currentTime := tm .clock .Time ()
157166 tm .remove (id , currentTime )
158167
@@ -161,6 +170,7 @@ func (tm *AdaptiveTimeoutManager) put(id ids.ID, handler func()) time.Time {
161170 handler : handler ,
162171 duration : tm .currentTimeout ,
163172 deadline : currentTime .Add (tm .currentTimeout ),
173+ msgType : msgType ,
164174 }
165175 tm .timeoutMap [id ] = timeout
166176 heap .Push (& tm .timeoutQueue , timeout )
@@ -185,9 +195,14 @@ func (tm *AdaptiveTimeoutManager) remove(id ids.ID, now time.Time) {
185195 }
186196
187197 // Observe the response time to update average network response time
188- timeoutRegisteredAt := timeout .deadline .Add (- 1 * timeout .duration )
189- latency := now .Sub (timeoutRegisteredAt )
190- tm .observeLatencyAndUpdateTimeout (latency , now )
198+ // Don't include Get requests in calculation, since an adversary
199+ // can cause you to issue a Get request and then cause it to timeout,
200+ // increasing your timeout.
201+ if timeout .msgType != constants .GetMsg {
202+ timeoutRegisteredAt := timeout .deadline .Add (- 1 * timeout .duration )
203+ latency := now .Sub (timeoutRegisteredAt )
204+ tm .observeLatencyAndUpdateTimeout (latency , now )
205+ }
191206
192207 // Remove the timeout from the map
193208 delete (tm .timeoutMap , id )
@@ -213,6 +228,7 @@ func (tm *AdaptiveTimeoutManager) timeout() {
213228 if timeoutHandler == nil {
214229 break
215230 }
231+ tm .numTimeouts .Inc ()
216232
217233 // Don't execute a callback with a lock held
218234 tm .lock .Unlock ()
0 commit comments