Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
139 changes: 136 additions & 3 deletions csharp/src/Drivers/BigQuery/BigQueryConnection.cs
Original file line number Diff line number Diff line change
Expand Up @@ -93,7 +93,8 @@ public BigQueryConnection(IReadOnlyDictionary<string, string> properties) : base

if (this.properties.TryGetValue(BigQueryParameters.DefaultClientLocation, out string? location) &&
!string.IsNullOrEmpty(location) &&
BigQueryConstants.ValidLocations.Any(l => l.Equals(location, StringComparison.OrdinalIgnoreCase)))
(location.Equals(BigQueryConstants.AutoDetectLocation, StringComparison.OrdinalIgnoreCase) ||
BigQueryConstants.ValidLocations.Any(l => l.Equals(location, StringComparison.OrdinalIgnoreCase))))
{
DefaultClientLocation = location;
}
Expand Down Expand Up @@ -142,6 +143,8 @@ private bool TryInitTracerProvider(out FileActivityListener? fileActivityListene
// if this value is null, the BigQuery API chooses the location (typically the `US` multi-region)
internal string? DefaultClientLocation { get; private set; }

internal bool CreateLargeResultsDataset { get; private set; } = true;

public override string AssemblyVersion => BigQueryUtils.BigQueryAssemblyVersion;

public override string AssemblyName => BigQueryUtils.BigQueryAssemblyName;
Expand Down Expand Up @@ -204,6 +207,13 @@ internal BigQueryClient Open(string? projectId = null)
activity?.AddBigQueryParameterTag(BigQueryParameters.ClientTimeout, seconds);
}

if (this.properties.TryGetValue(BigQueryParameters.CreateLargeResultsDataset, out string? sCreateLargeResultDataset) &&
bool.TryParse(sCreateLargeResultDataset, out bool createLargeResultDataset))
{
CreateLargeResultsDataset = createLargeResultDataset;
activity?.AddBigQueryParameterTag(BigQueryParameters.CreateLargeResultsDataset, createLargeResultDataset);
}

SetCredential();

BigQueryClientBuilder bigQueryClientBuilder = new BigQueryClientBuilder()
Expand All @@ -223,7 +233,30 @@ internal BigQueryClient Open(string? projectId = null)
// User does not have permission to query table bigquery-public-data:blockchain_analytics_ethereum_mainnet_us.accounts,
// or perhaps it does not exist.'

bigQueryClientBuilder.DefaultLocation = DefaultClientLocation;
if (DefaultClientLocation!.Equals(BigQueryConstants.AutoDetectLocation, StringComparison.OrdinalIgnoreCase) == true)
{
// get a temporary client to detect the location
Client = bigQueryClientBuilder.Build();

string ensuredProjectId = EnsureProjectIdIsConfigured(activity);
string? detectedLocation = DetectLocation(ensuredProjectId);

if (!string.IsNullOrEmpty(detectedLocation))
{
DefaultClientLocation = detectedLocation;
bigQueryClientBuilder.DefaultLocation = DefaultClientLocation;
activity?.AddBigQueryTag("client.detected_location", detectedLocation);
}
else
{
activity?.AddBigQueryTag("client.detected_location", null);
}
}
else
{
bigQueryClientBuilder.DefaultLocation = DefaultClientLocation;
}

activity?.AddBigQueryParameterTag(BigQueryParameters.DefaultClientLocation, DefaultClientLocation);
}
else
Expand All @@ -243,6 +276,105 @@ internal BigQueryClient Open(string? projectId = null)
});
}

private string? DetectLocation(string projectId)
{
return this.TraceActivity(activity =>
{
ListDatasetsOptions options = new ListDatasetsOptions()
{
IncludeHidden = true
};

Func<Task<PagedEnumerable<DatasetList, BigQueryDataset>?>> func = () => Task.Run(() =>
{
return Client?.ListDatasets(options);
});

PagedEnumerable<DatasetList, BigQueryDataset>? datasets = ExecuteWithRetriesAsync<PagedEnumerable<DatasetList, BigQueryDataset>?>(func, activity).GetAwaiter().GetResult();

if (datasets == null) return null;

// the ListDatasets call doesn't include the Location value so we have to
// grab the details of N number of datasets to determine the most common location
Dictionary<string, int> locationCounts = new Dictionary<string, int>();

foreach (BigQueryDataset ds in datasets.Take(20))
{
try
{
Func<Task<BigQueryDataset?>> getDatasetFunc = () => Task.Run(() =>
{
return Client?.GetDataset(ds.Reference.DatasetId);
});

BigQueryDataset? fullDataset = ExecuteWithRetriesAsync<BigQueryDataset?>(getDatasetFunc, activity).GetAwaiter().GetResult();// Client?.GetDataset(ds.Reference.DatasetId);
string? location = fullDataset?.Resource.Location;
if (!string.IsNullOrEmpty(location))
{
if (locationCounts.TryGetValue(location!, out int currentCount))
{
locationCounts[location!] = currentCount + 1;
}
else
{
locationCounts[location!] = 1;
}
}
}
catch (Exception ex)
{
activity?.AddException(ex);
}
}

// Return the most common location
return locationCounts
.OrderByDescending(kvp => kvp.Value)
.Select(kvp => kvp.Key)
.FirstOrDefault();
});
}

internal string EnsureProjectIdIsConfigured(Activity? activity)
{
string? projectId = Client?.ProjectId;

if (Client?.ProjectId == BigQueryConstants.DetectProjectId)
{
activity?.AddBigQueryTag("client_project_id", BigQueryConstants.DetectProjectId);

// An error occurs when calling CreateQueryJob without the ID set,
// so use the first one that is found. This does not prevent from calling
// to other 'project IDs' (catalogs) with a query.
Func<Task<PagedEnumerable<ProjectList, CloudProject>?>> func = () => Task.Run(() =>
{
return Client?.ListProjects();
});

PagedEnumerable<ProjectList, CloudProject>? projects = ExecuteWithRetriesAsync<PagedEnumerable<ProjectList, CloudProject>?>(func, activity).GetAwaiter().GetResult();

if (projects != null)
{
string? firstProjectId = projects.Select(x => x.ProjectId).FirstOrDefault();

if (firstProjectId != null)
{
projectId = firstProjectId;
activity?.AddBigQueryTag("detected_client_project_id", firstProjectId);
// need to reopen the Client with the projectId specified
this.Open(firstProjectId);
}
}
}

if (string.IsNullOrEmpty(projectId))
{
throw new ArgumentException("A valid project ID could not be determined. Please specify a project ID using the adbc.bigquery.project_id parameter.");
}

return projectId!;
}

internal void SetCredential()
{
this.TraceActivity(activity =>
Expand Down Expand Up @@ -1314,7 +1446,8 @@ private Dictionary<string, string> ParseOptions()
BigQueryParameters.MaxFetchConcurrency,
BigQueryParameters.StatementType,
BigQueryParameters.StatementIndex,
BigQueryParameters.EvaluationKind
BigQueryParameters.EvaluationKind,
BigQueryParameters.IsMetadataCommand
};

foreach (string key in statementOptions)
Expand Down
24 changes: 19 additions & 5 deletions csharp/src/Drivers/BigQuery/BigQueryParameters.cs
Original file line number Diff line number Diff line change
Expand Up @@ -52,14 +52,27 @@ internal class BigQueryParameters
public const string StatementType = "adbc.bigquery.multiple_statement.statement_type";
public const string UseLegacySQL = "adbc.bigquery.use_legacy_sql";

/// <summary>
/// Indicates whether the driver should create the dataset specified in the
/// <see cref="LargeResultsDataset"/> parameter if it does not already exist.
/// </summary>
public const string CreateLargeResultsDataset = "adbc.bigquery.create_large_results_dataset";

/// <summary>
/// The indicator of whether the <c>AdbcStatement.ExecuteQuery[Async]</c> should execute a metadata command query.
/// In the case this indicator is set to <c>True</c>, the method will execute a metadata command using the native API where
/// the name of the command is given in the <c>AdbcStatement.SqlQuery</c> property value.
/// </summary>
public const string IsMetadataCommand = "adbc.bigquery.statement.is_metadata_command";

// these values are safe to log any time
private static HashSet<string> safeToLog = new HashSet<string>(StringComparer.OrdinalIgnoreCase)
{
AllowLargeResults, AuthenticationType, BillingProjectId, ClientId, ClientTimeout, DefaultClientLocation, EvaluationKind, GetQueryResultsOptionsTimeout,
EvaluationKind, GetQueryResultsOptionsTimeout, IncludeConstraintsWithGetObjects,
IncludePublicProjectId, LargeDecimalsAsString, LargeResultsDataset, LargeResultsDestinationTable,
IncludePublicProjectId, LargeDecimalsAsString, CreateLargeResultsDataset, LargeResultsDataset, LargeResultsDestinationTable,
MaxFetchConcurrency, MaximumRetryAttempts, ProjectId, RetryDelayMs, StatementIndex,
StatementType, UseLegacySQL
StatementType, UseLegacySQL, IsMetadataCommand, DefaultClientLocation
};

public static bool IsSafeToLog(string name)
Expand Down Expand Up @@ -92,14 +105,15 @@ internal class BigQueryConstants
// default value per https://pkg.go.dev/cloud.google.com/go/bigquery#section-readme
public const string DetectProjectId = "*detect-project-id*";

// if a location is not specified, use this value to auto-detect based on the most commonly
// found location for the datasets in the project
public const string AutoDetectLocation = "*detect-location*";

// matches the pattern for odbc, but for adbc
public const string DefaultLargeDatasetId = "_bqadbc_temp_tables";

public const string PublicProjectId = "bigquery-public-data";

// this is what the BigQuery API uses as the default location
public const string DefaultClientLocation = "US";

// from https://cloud.google.com/bigquery/docs/locations#locations_and_regions as of Sept 28, 2025
public static IReadOnlyList<string> ValidLocations = new List<string>()
{
Expand Down
Loading
Loading