-
Notifications
You must be signed in to change notification settings - Fork 317
Fix flaky connection pool tests for FIFO ordering #3751
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: main
Are you sure you want to change the base?
Changes from 2 commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -281,7 +281,6 @@ out DbConnectionInternal? recycledConnection | |
| } | ||
|
|
||
| [Fact] | ||
| [ActiveIssue("https://github.com/dotnet/SqlClient/issues/3730")] | ||
| public async Task GetConnectionMaxPoolSize_ShouldRespectOrderOfRequest() | ||
| { | ||
| // Arrange | ||
|
|
@@ -308,29 +307,33 @@ out DbConnectionInternal? internalConnection | |
| Assert.NotNull(internalConnection); | ||
| } | ||
|
|
||
| // Use ManualResetEventSlim to synchronize the tasks | ||
| // and force the request queueing order. | ||
| using ManualResetEventSlim mresQueueOrder = new(); | ||
| using CountdownEvent allRequestsQueued = new(2); | ||
| // Use TaskCompletionSource for coordination to avoid mixing async/await with native synchronization | ||
| TaskCompletionSource<bool> firstTaskReady = new(); | ||
| TaskCompletionSource<bool> secondTaskReady = new(); | ||
| TaskCompletionSource<bool> startRequests = new(); | ||
|
|
||
| // Act | ||
| var recycledTask = Task.Run(() => | ||
| var recycledTask = Task.Run(async () => | ||
| { | ||
| mresQueueOrder.Set(); | ||
| allRequestsQueued.Signal(); | ||
| firstTaskReady.SetResult(true); | ||
| await startRequests.Task; | ||
| pool.TryGetConnection( | ||
| new SqlConnection(""), | ||
| new SqlConnection("Timeout=5000"), | ||
| null, | ||
| new DbConnectionOptions("", null), | ||
| out DbConnectionInternal? recycledConnection | ||
| ); | ||
| return recycledConnection; | ||
| }); | ||
| var failedTask = Task.Run(() => | ||
|
|
||
| var failedTask = Task.Run(async () => | ||
| { | ||
| // Force this request to be second in the queue. | ||
| mresQueueOrder.Wait(); | ||
| allRequestsQueued.Signal(); | ||
| secondTaskReady.SetResult(true); | ||
| await startRequests.Task; | ||
| // Add a small delay to ensure this request comes after the first. | ||
| // This is necessary because the channel-based pool queues requests in FIFO order, | ||
| // and we need to guarantee the order for this test to be deterministic. | ||
| await Task.Delay(50); | ||
|
||
| pool.TryGetConnection( | ||
| new SqlConnection("Timeout=1"), | ||
| null, | ||
|
|
@@ -340,7 +343,22 @@ out DbConnectionInternal? failedConnection | |
| return failedConnection; | ||
| }); | ||
|
|
||
| allRequestsQueued.Wait(); | ||
| // Wait for both tasks to be ready before starting the requests | ||
| await firstTaskReady.Task; | ||
| await secondTaskReady.Task; | ||
|
|
||
| // Allow both tasks to reach their wait state before proceeding | ||
| await Task.Delay(100); | ||
|
|
||
| // Start both requests | ||
| startRequests.SetResult(true); | ||
|
|
||
| // Give time for both requests to be queued. | ||
| // This delay ensures that both TryGetConnection calls have been made and are waiting in the channel | ||
| // before we return the connection, which is necessary to test FIFO ordering. | ||
| await Task.Delay(200); | ||
|
|
||
| // Return the connection which should satisfy the first queued request | ||
| pool.ReturnInternalConnection(firstConnection!, firstOwningConnection); | ||
| var recycledConnection = await recycledTask; | ||
|
|
||
|
|
@@ -350,7 +368,6 @@ out DbConnectionInternal? failedConnection | |
| } | ||
|
|
||
| [Fact] | ||
| [ActiveIssue("https://github.com/dotnet/SqlClient/issues/3730")] | ||
| public async Task GetConnectionAsyncMaxPoolSize_ShouldRespectOrderOfRequest() | ||
| { | ||
| // Arrange | ||
|
|
@@ -382,14 +399,16 @@ out DbConnectionInternal? internalConnection | |
|
|
||
| // Act | ||
| var exceeded = pool.TryGetConnection( | ||
| new SqlConnection(""), | ||
| new SqlConnection("Timeout=5000"), | ||
| recycledTaskCompletionSource, | ||
| new DbConnectionOptions("", null), | ||
| out DbConnectionInternal? recycledConnection | ||
| ); | ||
|
|
||
| // Gives time for the recycled connection to be queued before the failed request is initiated. | ||
| await Task.Delay(1000); | ||
| // Ensure sufficient time for the recycled connection request to be fully queued. | ||
| // This delay is necessary because the channel-based pool queues async requests, | ||
| // and we need to guarantee the first request is in the queue before the second one. | ||
| await Task.Delay(200); | ||
|
|
||
| var exceeded2 = pool.TryGetConnection( | ||
| new SqlConnection("Timeout=1"), | ||
|
|
@@ -398,6 +417,10 @@ out DbConnectionInternal? recycledConnection | |
| out DbConnectionInternal? failedConnection | ||
| ); | ||
|
|
||
| // Ensure the second request is also queued before returning the connection. | ||
| // This guarantees that both requests are waiting in FIFO order. | ||
| await Task.Delay(100); | ||
|
|
||
| pool.ReturnInternalConnection(firstConnection!, firstOwningConnection); | ||
| recycledConnection = await recycledTaskCompletionSource.Task; | ||
|
|
||
|
|
||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I think making these async introduced a deadlock. In some conditions, they'll hang on to threads and prevent future async operations from going through. I'm going to revert these changes other than the SpinWait -> Thread.Sleep()
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
It'd be nice to get a better understanding of why the task completion source causes deadlocks in this case. I feel like this might be masking a bigger issue, or a lack of understanding of how the mechanisms are actually working.