diff --git a/google/cloud/aiplatform/utils/gcs_utils.py b/google/cloud/aiplatform/utils/gcs_utils.py index 144e5c415d..69f57f311f 100644 --- a/google/cloud/aiplatform/utils/gcs_utils.py +++ b/google/cloud/aiplatform/utils/gcs_utils.py @@ -17,14 +17,19 @@ import datetime import glob + +# Version detection and compatibility layer for google-cloud-storage v2/v3 +from importlib.metadata import version as get_version import logging import os import pathlib import tempfile from typing import Optional, TYPE_CHECKING +import warnings from google.auth import credentials as auth_credentials from google.cloud import storage +from packaging.version import Version from google.cloud.aiplatform import initializer from google.cloud.aiplatform.utils import resource_manager_utils @@ -35,6 +40,66 @@ _logger = logging.getLogger(__name__) +# Detect google-cloud-storage version once at module load +try: + _GCS_VERSION = Version(get_version("google-cloud-storage")) +except Exception: + # Fallback if version detection fails (should not happen in normal use) + _GCS_VERSION = Version("2.0.0") + +_USE_FROM_URI = _GCS_VERSION >= Version("3.0.0") + +# Warn users on v2 about upcoming deprecation +if _GCS_VERSION < Version("3.0.0"): + warnings.warn( + "Support for google-cloud-storage < 3.0.0 will be removed in a future" + " version of google-cloud-aiplatform. Please upgrade to" + " google-cloud-storage >= 3.0.0.", + FutureWarning, + stacklevel=2, + ) + + +def blob_from_uri(uri: str, client: storage.Client) -> storage.Blob: + """Create a Blob from a GCS URI, compatible with v2 and v3. + + This function provides compatibility across google-cloud-storage versions: + - v3.x: Uses Blob.from_uri() + - v2.x: Uses Blob.from_string() (deprecated in v3) + + Args: + uri: GCS URI (e.g., 'gs://bucket/path/to/blob') + client: Storage client instance + + Returns: + storage.Blob: Blob instance + """ + if _USE_FROM_URI: + return storage.Blob.from_uri(uri, client=client) + else: + return storage.Blob.from_string(uri, client=client) + + +def bucket_from_uri(uri: str, client: storage.Client) -> storage.Bucket: + """Create a Bucket from a GCS URI, compatible with v2 and v3. + + This function provides compatibility across google-cloud-storage versions: + - v3.x: Uses Bucket.from_uri() + - v2.x: Uses Bucket.from_string() (deprecated in v3) + + Args: + uri: GCS bucket URI (e.g., 'gs://bucket-name') + client: Storage client instance + + Returns: + storage.Bucket: Bucket instance + """ + if _USE_FROM_URI: + return storage.Bucket.from_uri(uri, client=client) + else: + return storage.Bucket.from_string(uri, client=client) + + def upload_to_gcs( source_path: str, destination_uri: str, @@ -79,18 +144,18 @@ def upload_to_gcs( destination_file_uri = ( destination_uri.rstrip("/") + "/" + source_file_relative_posix_path ) - _logger.debug(f'Uploading "{source_file_path}" to "{destination_file_uri}"') - destination_blob = storage.Blob.from_string( + _logger.debug( + 'Uploading "%s" to "%s"', source_file_path, destination_file_uri + ) + destination_blob = blob_from_uri( destination_file_uri, client=storage_client ) destination_blob.upload_from_filename(filename=source_file_path) else: source_file_path = source_path destination_file_uri = destination_uri - _logger.debug(f'Uploading "{source_file_path}" to "{destination_file_uri}"') - destination_blob = storage.Blob.from_string( - destination_file_uri, client=storage_client - ) + _logger.debug('Uploading "%s" to "%s"', source_file_path, destination_file_uri) + destination_blob = blob_from_uri(destination_file_uri, client=storage_client) destination_blob.upload_from_filename(filename=source_file_path) @@ -234,7 +299,7 @@ def create_gcs_bucket_for_pipeline_artifacts_if_it_does_not_exist( credentials=credentials, ) - pipelines_bucket = storage.Bucket.from_string( + pipelines_bucket = bucket_from_uri( uri=output_artifacts_gcs_dir, client=storage_client, ) @@ -294,9 +359,9 @@ def download_file_from_gcs( credentials = credentials or initializer.global_config.credentials storage_client = storage.Client(project=project, credentials=credentials) - source_blob = storage.Blob.from_string(source_file_uri, client=storage_client) + source_blob = blob_from_uri(source_file_uri, client=storage_client) - _logger.debug(f'Downloading "{source_file_uri}" to "{destination_file_path}"') + _logger.debug('Downloading "%s" to "%s"', source_file_uri, destination_file_path) source_blob.download_to_filename(filename=destination_file_path) @@ -354,13 +419,10 @@ def _upload_pandas_df_to_gcs( """Uploads the provided Pandas DataFrame to a GCS bucket. Args: - df (pandas.DataFrame): - Required. The Pandas DataFrame to upload. - upload_gcs_path (str): - Required. The GCS path to upload the data file. - file_format (str): - Required. The format to export the DataFrame to. Currently - only JSONL is supported. + df (pandas.DataFrame): Required. The Pandas DataFrame to upload. + upload_gcs_path (str): Required. The GCS path to upload the data file. + file_format (str): Required. The format to export the DataFrame to. + Currently only JSONL is supported. Raises: ValueError: When a file format other than JSONL is provided. @@ -378,9 +440,9 @@ def _upload_pandas_df_to_gcs( project=initializer.global_config.project, credentials=initializer.global_config.credentials, ) - storage.Blob.from_string( - uri=upload_gcs_path, client=storage_client - ).upload_from_filename(filename=local_dataset_path) + blob_from_uri(uri=upload_gcs_path, client=storage_client).upload_from_filename( + filename=local_dataset_path + ) def validate_gcs_path(gcs_path: str) -> None: diff --git a/google/cloud/aiplatform/utils/yaml_utils.py b/google/cloud/aiplatform/utils/yaml_utils.py index 4c660957a1..fbda7d1080 100644 --- a/google/cloud/aiplatform/utils/yaml_utils.py +++ b/google/cloud/aiplatform/utils/yaml_utils.py @@ -22,6 +22,7 @@ from google.auth import transport from google.cloud import storage from google.cloud.aiplatform.constants import pipeline as pipeline_constants +from google.cloud.aiplatform.utils.gcs_utils import blob_from_uri # Pattern for an Artifact Registry URL. _VALID_AR_URL = pipeline_constants._VALID_AR_URL @@ -98,7 +99,7 @@ def _load_yaml_from_gs_uri( """ yaml = _maybe_import_yaml() storage_client = storage.Client(project=project, credentials=credentials) - blob = storage.Blob.from_string(uri, storage_client) + blob = blob_from_uri(uri, storage_client) return yaml.safe_load(blob.download_as_bytes()) diff --git a/setup.py b/setup.py index 8e5e3e62c8..e95d484ceb 100644 --- a/setup.py +++ b/setup.py @@ -307,8 +307,8 @@ "proto-plus >= 1.22.3, <2.0.0", "protobuf>=3.20.2,<7.0.0,!=4.21.0,!=4.21.1,!=4.21.2,!=4.21.3,!=4.21.4,!=4.21.5", "packaging >= 14.3", - "google-cloud-storage >= 1.32.0, < 3.0.0; python_version<'3.13'", - "google-cloud-storage >= 2.10.0, < 3.0.0; python_version>='3.13'", + "google-cloud-storage >= 1.32.0, < 4.0.0; python_version<'3.13'", + "google-cloud-storage >= 2.10.0, < 4.0.0; python_version>='3.13'", "google-cloud-bigquery >= 1.15.0, < 4.0.0, !=3.20.0", "google-cloud-resource-manager >= 1.3.3, < 3.0.0", "shapely < 3.0.0", diff --git a/testing/constraints-3.10.txt b/testing/constraints-3.10.txt index bd404dff75..b0d6db065b 100644 --- a/testing/constraints-3.10.txt +++ b/testing/constraints-3.10.txt @@ -6,7 +6,7 @@ google-auth==2.35.0 # Tests google-auth with rest async support proto-plus==1.22.3 protobuf mock==4.0.2 -google-cloud-storage==2.2.1 # Increased for kfp 2.0 compatibility +google-cloud-storage==3.0.0 # Updated to v3.x, backward compatible with v2.x via wrapper packaging==24.1 # Increased to unbreak canonicalize_version error (b/377774673) grpcio-testing==1.34.0 mlflow==2.16.0 # Pinned to speed up installation diff --git a/testing/constraints-3.11.txt b/testing/constraints-3.11.txt index 5689dc0ad1..e3985cbf0c 100644 --- a/testing/constraints-3.11.txt +++ b/testing/constraints-3.11.txt @@ -6,7 +6,7 @@ google-auth==2.35.0 # Tests google-auth with rest async support proto-plus protobuf mock==4.0.2 -google-cloud-storage==2.2.1 # Increased for kfp 2.0 compatibility +google-cloud-storage==3.0.0 # Updated to v3.x, backward compatible with v2.x via wrapper packaging==24.1 # Increased to unbreak canonicalize_version error (b/377774673) pytest-xdist==3.3.1 # Pinned to unbreak unit tests ray==2.5.0 # Pinned until 2.9.3 is verified for Ray tests diff --git a/testing/constraints-3.12.txt b/testing/constraints-3.12.txt index 7403aeafd3..4e251e7962 100644 --- a/testing/constraints-3.12.txt +++ b/testing/constraints-3.12.txt @@ -5,7 +5,7 @@ google-api-core==2.21.0 # Tests google-api-core with rest async support google-auth==2.35.0 # Tests google-auth with rest async support proto-plus mock==4.0.2 -google-cloud-storage==2.2.1 # Increased for kfp 2.0 compatibility +google-cloud-storage==3.0.0 # Updated to v3.x, backward compatible with v2.x via wrapper packaging==24.1 # Increased to unbreak canonicalize_version error (b/377774673) pytest-xdist==3.3.1 # Pinned to unbreak unit tests ray==2.5.0 # Pinned until 2.9.3 is verified for Ray tests diff --git a/testing/constraints-3.13.txt b/testing/constraints-3.13.txt index a1fae03754..e910b2d528 100644 --- a/testing/constraints-3.13.txt +++ b/testing/constraints-3.13.txt @@ -5,7 +5,7 @@ google-api-core==2.21.0 # Tests google-api-core with rest async support google-auth==2.35.0 # Tests google-auth with rest async support proto-plus mock==4.0.2 -google-cloud-storage==2.10.0 # Increased for kfp 2.0 compatibility +google-cloud-storage==3.0.0 # Updated to v3.x, backward compatible with v2.x via wrapper packaging==24.1 # Increased to unbreak canonicalize_version error (b/377774673) pytest-xdist==3.3.1 # Pinned to unbreak unit tests ray==2.5.0 # Pinned until 2.9.3 is verified for Ray tests diff --git a/testing/constraints-3.8.txt b/testing/constraints-3.8.txt index a4382793b4..cbd2bf5690 100644 --- a/testing/constraints-3.8.txt +++ b/testing/constraints-3.8.txt @@ -7,7 +7,7 @@ google-auth==2.14.1 # Tests google-auth without rest async support proto-plus==1.22.3 protobuf mock==4.0.2 -google-cloud-storage==2.2.1 # Increased for kfp 2.0 compatibility +google-cloud-storage==3.0.0 # Updated to v3.x, backward compatible with v2.x via wrapper packaging==24.1 # Increased to unbreak canonicalize_version error (b/377774673) grpcio-testing==1.34.0 pytest-xdist==3.3.1 # Pinned to unbreak unit tests diff --git a/testing/constraints-3.9.txt b/testing/constraints-3.9.txt index fe09f7d279..64404afe71 100644 --- a/testing/constraints-3.9.txt +++ b/testing/constraints-3.9.txt @@ -6,7 +6,7 @@ google-auth==2.35.0 # Tests google-auth with rest async support proto-plus==1.22.3 protobuf mock==4.0.2 -google-cloud-storage==2.2.1 # Increased for kfp 2.0 compatibility +google-cloud-storage==3.0.0 # Updated to v3.x, backward compatible with v2.x via wrapper packaging==24.1 # Increased to unbreak canonicalize_version error (b/377774673) grpcio-testing==1.34.0 pytest-xdist==3.3.1 # Pinned to unbreak unit tests diff --git a/testing/constraints-ray-2.33.0.txt b/testing/constraints-ray-2.33.0.txt index 7a955ba26c..10bc7c96f9 100644 --- a/testing/constraints-ray-2.33.0.txt +++ b/testing/constraints-ray-2.33.0.txt @@ -4,7 +4,7 @@ google-api-core proto-plus==1.22.3 protobuf mock==4.0.2 -google-cloud-storage==2.2.1 # Increased for kfp 2.0 compatibility +google-cloud-storage==3.0.0 # Updated to v3.x, backward compatible with v2.x via wrapper packaging==24.1 # Increased to unbreak canonicalize_version error (b/377774673) grpcio-testing==1.34.0 mlflow==1.30.1 # Pinned to speed up installation diff --git a/testing/constraints-ray-2.4.0.txt b/testing/constraints-ray-2.4.0.txt index de876f6dc2..b0bcbc0b5b 100644 --- a/testing/constraints-ray-2.4.0.txt +++ b/testing/constraints-ray-2.4.0.txt @@ -4,7 +4,7 @@ google-api-core proto-plus==1.22.3 protobuf mock==4.0.2 -google-cloud-storage==2.2.1 # Increased for kfp 2.0 compatibility +google-cloud-storage==3.0.0 # Updated to v3.x, backward compatible with v2.x via wrapper packaging==20.0 # Increased for compatibility with MLFlow grpcio-testing==1.34.0 mlflow==1.30.1 # Pinned to speed up installation diff --git a/testing/constraints-ray-2.42.0.txt b/testing/constraints-ray-2.42.0.txt index 4b12597466..5844dca74f 100644 --- a/testing/constraints-ray-2.42.0.txt +++ b/testing/constraints-ray-2.42.0.txt @@ -4,7 +4,7 @@ google-api-core proto-plus==1.22.3 protobuf mock==4.0.2 -google-cloud-storage==2.2.1 # Increased for kfp 2.0 compatibility +google-cloud-storage==3.0.0 # Updated to v3.x, backward compatible with v2.x via wrapper packaging==24.1 # Increased to unbreak canonicalize_version error (b/377774673) grpcio-testing==1.34.0 mlflow==1.30.1 # Pinned to speed up installation diff --git a/testing/constraints-ray-2.47.1.txt b/testing/constraints-ray-2.47.1.txt index 2a366694a3..2615289db0 100644 --- a/testing/constraints-ray-2.47.1.txt +++ b/testing/constraints-ray-2.47.1.txt @@ -4,7 +4,7 @@ google-api-core proto-plus==1.22.3 protobuf mock==4.0.2 -google-cloud-storage==2.2.1 # Increased for kfp 2.0 compatibility +google-cloud-storage==3.0.0 # Updated to v3.x, backward compatible with v2.x via wrapper packaging==24.1 # Increased to unbreak canonicalize_version error (b/377774673) grpcio-testing==1.34.0 mlflow==1.30.1 # Pinned to speed up installation diff --git a/tests/system/aiplatform/test_experiments.py b/tests/system/aiplatform/test_experiments.py index 9fe0d190a0..6022819450 100644 --- a/tests/system/aiplatform/test_experiments.py +++ b/tests/system/aiplatform/test_experiments.py @@ -29,6 +29,7 @@ ) from tests.system.aiplatform import e2e_base from tests.system.aiplatform import test_model_upload +from google.cloud.aiplatform.utils.gcs_utils import blob_from_uri import numpy as np import sklearn @@ -332,7 +333,7 @@ def test_log_execution_and_artifact(self, shared_state): shared_state["resources"].append(model) storage_client = storage.Client(project=e2e_base._PROJECT) - model_blob = storage.Blob.from_string( + model_blob = blob_from_uri( uri=test_model_upload._XGBOOST_MODEL_URI, client=storage_client ) model_path = tempfile.mktemp() + ".my_model.xgb" diff --git a/tests/system/aiplatform/test_model_upload.py b/tests/system/aiplatform/test_model_upload.py index d622b28a1c..9b58215204 100644 --- a/tests/system/aiplatform/test_model_upload.py +++ b/tests/system/aiplatform/test_model_upload.py @@ -23,6 +23,7 @@ from google.cloud import storage from tests.system.aiplatform import e2e_base +from google.cloud.aiplatform.utils.gcs_utils import blob_from_uri _XGBOOST_MODEL_URI = "gs://cloud-samples-data-us-central1/vertex-ai/google-cloud-aiplatform-ci-artifacts/models/iris_xgboost/model.bst" @@ -42,9 +43,7 @@ def test_upload_and_deploy_xgboost_model(self, shared_state): ) storage_client = storage.Client(project=e2e_base._PROJECT) - model_blob = storage.Blob.from_string( - uri=_XGBOOST_MODEL_URI, client=storage_client - ) + model_blob = blob_from_uri(uri=_XGBOOST_MODEL_URI, client=storage_client) model_path = tempfile.mktemp() + ".my_model.xgb" model_blob.download_to_filename(filename=model_path) @@ -53,9 +52,7 @@ def test_upload_and_deploy_xgboost_model(self, shared_state): ) shared_state["resources"] = [model] - staging_bucket = storage.Blob.from_string( - uri=model.uri, client=storage_client - ).bucket + staging_bucket = blob_from_uri(uri=model.uri, client=storage_client).bucket # Checking that the bucket is auto-generated assert "-vertex-staging-" in staging_bucket.name diff --git a/tests/system/aiplatform/test_model_version_management.py b/tests/system/aiplatform/test_model_version_management.py index e58b06ff98..8b5d6bddb9 100644 --- a/tests/system/aiplatform/test_model_version_management.py +++ b/tests/system/aiplatform/test_model_version_management.py @@ -26,6 +26,7 @@ from tests.system.aiplatform import e2e_base from tests.system.aiplatform import test_model_upload +from google.cloud.aiplatform.utils.gcs_utils import blob_from_uri @pytest.mark.usefixtures("tear_down_resources") @@ -42,7 +43,7 @@ def test_upload_deploy_manage_versioned_model(self, shared_state): ) storage_client = storage.Client(project=e2e_base._PROJECT) - model_blob = storage.Blob.from_string( + model_blob = blob_from_uri( uri=test_model_upload._XGBOOST_MODEL_URI, client=storage_client ) model_path = tempfile.mktemp() + ".my_model.xgb" @@ -60,9 +61,7 @@ def test_upload_deploy_manage_versioned_model(self, shared_state): ) shared_state["resources"] = [model] - staging_bucket = storage.Blob.from_string( - uri=model.uri, client=storage_client - ).bucket + staging_bucket = blob_from_uri(uri=model.uri, client=storage_client).bucket # Checking that the bucket is auto-generated assert "-vertex-staging-" in staging_bucket.name diff --git a/tests/unit/aiplatform/test_language_models.py b/tests/unit/aiplatform/test_language_models.py index 84722e2d61..74c264d907 100644 --- a/tests/unit/aiplatform/test_language_models.py +++ b/tests/unit/aiplatform/test_language_models.py @@ -1570,9 +1570,13 @@ def mock_load_yaml_and_json(job_spec): @pytest.fixture -def mock_gcs_from_string(): - with mock.patch.object(storage.Blob, "from_string") as mock_from_string: - yield mock_from_string +def mock_gcs_from_source(): + if hasattr(storage.Blob, "from_string"): + with mock.patch.object(storage.Blob, "from_string") as mock_from_source: + yield mock_from_source + else: + with mock.patch.object(storage.Blob, "from_uri") as mock_from_source: + yield mock_from_source @pytest.fixture @@ -2405,7 +2409,7 @@ def test_tune_text_embedding_model( mock_pipeline_bucket_exists, job_spec, mock_load_yaml_and_json, - mock_gcs_from_string, + mock_gcs_from_source, mock_gcs_upload, mock_request_urlopen_gecko, mock_deploy_tuned_embedding_model, @@ -2522,7 +2526,7 @@ def test_tune_text_generation_model( mock_pipeline_bucket_exists, job_spec, mock_load_yaml_and_json, - mock_gcs_from_string, + mock_gcs_from_source, mock_gcs_upload, mock_request_urlopen, mock_get_tuned_model, @@ -2608,7 +2612,7 @@ def test_tune_text_generation_model_ga( mock_pipeline_bucket_exists, job_spec, mock_load_yaml_and_json, - mock_gcs_from_string, + mock_gcs_from_source, mock_gcs_upload, mock_request_urlopen, mock_get_tuned_model, @@ -2700,7 +2704,7 @@ def test_tune_text_generation_model_evaluation_with_only_tensorboard( mock_pipeline_bucket_exists, job_spec, mock_load_yaml_and_json, - mock_gcs_from_string, + mock_gcs_from_source, mock_gcs_upload, mock_request_urlopen, mock_get_tuned_model, @@ -2752,7 +2756,7 @@ def test_tune_text_generation_model_staging_bucket( mock_pipeline_bucket_exists, job_spec, mock_load_yaml_and_json, - mock_gcs_from_string, + mock_gcs_from_source, mock_gcs_upload, mock_request_urlopen, mock_get_tuned_model, @@ -2799,7 +2803,7 @@ def test_tune_chat_model( mock_pipeline_bucket_exists, job_spec, mock_load_yaml_and_json, - mock_gcs_from_string, + mock_gcs_from_source, mock_gcs_upload, mock_request_urlopen, mock_get_tuned_model, @@ -2883,7 +2887,7 @@ def test_tune_code_generation_model( mock_pipeline_bucket_exists, job_spec, mock_load_yaml_and_json, - mock_gcs_from_string, + mock_gcs_from_source, mock_gcs_upload, mock_request_urlopen, mock_get_tuned_model, @@ -2931,7 +2935,7 @@ def test_tune_code_chat_model( mock_pipeline_bucket_exists, job_spec, mock_load_yaml_and_json, - mock_gcs_from_string, + mock_gcs_from_source, mock_gcs_upload, mock_request_urlopen, mock_get_tuned_model, @@ -3050,7 +3054,7 @@ def test_tune_text_generation_model_rlhf( mock_pipeline_bucket_exists, job_spec, mock_load_yaml_and_json, - mock_gcs_from_string, + mock_gcs_from_source, mock_gcs_upload, mock_request_urlopen_rlhf, mock_get_tuned_model, @@ -3153,7 +3157,7 @@ def test_tune_chat_generation_model_rlhf( mock_pipeline_bucket_exists, job_spec, mock_load_yaml_and_json, - mock_gcs_from_string, + mock_gcs_from_source, mock_gcs_upload, mock_request_urlopen_rlhf, mock_get_tuned_model, @@ -5190,7 +5194,7 @@ def test_text_generation_model_distill_from( mock_pipeline_bucket_exists, job_spec, mock_load_yaml_and_json, - mock_gcs_from_string, + mock_gcs_from_source, mock_gcs_upload, mock_urllib_request_urlopen, mock_get_tuned_model, diff --git a/tests/unit/architecture/test_vertexai_import.py b/tests/unit/architecture/test_vertexai_import.py index eb0ad2a16d..2170c7fa9e 100644 --- a/tests/unit/architecture/test_vertexai_import.py +++ b/tests/unit/architecture/test_vertexai_import.py @@ -121,6 +121,7 @@ def _test_external_imports(new_modules: list): builtin_modules = { "concurrent", # "email", # Imported by google.cloud.storage + "importlib_metadata", "pickle", # Needed for external tests "packaging", diff --git a/tests/unit/vertexai/conftest.py b/tests/unit/vertexai/conftest.py index 73b6099083..a12ea8a96d 100644 --- a/tests/unit/vertexai/conftest.py +++ b/tests/unit/vertexai/conftest.py @@ -171,6 +171,10 @@ def from_string(cls, destination_file_uri: str, client: Any): destination_file_uri = destination_file_uri.split("/")[-1] return cls(destination_file_uri, client) + @classmethod + def from_uri(cls, destination_file_uri: str, client: Any): + return cls.from_string(destination_file_uri, client) + def upload_from_filename(self, filename: str): shutil.copy(filename, self.destination_file_uri) @@ -208,6 +212,10 @@ def from_string(cls, destination_file_uri: str, client: Any): ) return cls(destination_file_uri, client) + @classmethod + def from_uri(cls, destination_file_uri: str, client: Any): + return cls.from_string(destination_file_uri, client) + def upload_from_filename(self, filename: str): shutil.copy(filename, self.destination_file_uri) diff --git a/tests/unit/vertexai/test_autorater_yaml.py b/tests/unit/vertexai/test_autorater_yaml.py index ba8aadf295..669f8c0851 100644 --- a/tests/unit/vertexai/test_autorater_yaml.py +++ b/tests/unit/vertexai/test_autorater_yaml.py @@ -148,6 +148,7 @@ def google_auth_mock(): def mock_storage_blob(): with mock.patch("google.cloud.storage.Blob") as mock_blob: mock_blob.from_string.return_value = mock_blob + mock_blob.from_uri.return_value = mock_blob yield mock_blob @@ -474,9 +475,9 @@ def test_roundtrip_rubric(self): def test_upload_string_to_gcs(self, mock_storage_blob): """Test upload evaluation results to GCS.""" utils._upload_string_to_gcs(_TEST_GCS_PATH, _TEST_POINTWISE_YAML) - mock_storage_blob.from_string.assert_called_once_with( - uri=_TEST_GCS_PATH, client=mock.ANY - ) + string_calls = mock_storage_blob.from_string.call_count # gcs < 3.0.0 + uri_calls = mock_storage_blob.from_uri.call_count # gcs >= 3.0.0 + assert uri_calls + string_calls == 1 mock_storage_blob.upload_from_string.assert_called_once() data = mock_storage_blob.upload_from_string.call_args.args[0] assert data == _TEST_POINTWISE_YAML diff --git a/tests/unit/vertexai/test_evaluation.py b/tests/unit/vertexai/test_evaluation.py index 747285984a..48fc5683e3 100644 --- a/tests/unit/vertexai/test_evaluation.py +++ b/tests/unit/vertexai/test_evaluation.py @@ -81,6 +81,7 @@ from vertexai.preview.evaluation.metrics import ( rubric_based_metric, ) +from google.cloud.aiplatform.utils.gcs_utils import blob_from_uri import numpy as np import pandas as pd import pytest @@ -798,8 +799,16 @@ def mock_experiment_tracker(): @pytest.fixture def mock_storage_blob_from_string(): - with mock.patch("google.cloud.storage.Blob.from_string") as mock_blob_from_string: - yield mock_blob_from_string + if hasattr(blob_from_uri.__globals__["storage"].Blob, "from_uri"): + with mock.patch.object( + blob_from_uri.__globals__["storage"].Blob, "from_uri" + ) as mock_blob_from_uri: + yield mock_blob_from_uri + else: + with mock.patch.object( + blob_from_uri.__globals__["storage"].Blob, "from_string" + ) as mock_blob_from_string: + yield mock_blob_from_string @pytest.mark.usefixtures("google_auth_mock") @@ -2557,11 +2566,11 @@ def test_upload_results(self, mock_storage_blob_from_string): ) mock_storage_blob_from_string.assert_any_call( - uri="gs://test-bucket/test-file-name/test-file-name.csv", + "gs://test-bucket/test-file-name/test-file-name.csv", client=mock.ANY, ) mock_storage_blob_from_string.assert_any_call( - uri="gs://test-bucket/test-file-name/summary_metrics.json", + "gs://test-bucket/test-file-name/summary_metrics.json", client=mock.ANY, ) mock_json_dump.assert_called_once_with( @@ -2611,7 +2620,7 @@ def test_upload_results_with_default_output_file_name( ) _ = test_eval_task.evaluate() mock_storage_blob_from_string.assert_any_call( - uri="gs://test-bucket/eval_results_2025-02-10-12-00-00-12345/summary_metrics.json", + "gs://test-bucket/eval_results_2025-02-10-12-00-00-12345/summary_metrics.json", client=mock.ANY, ) diff --git a/vertexai/_genai/_gcs_utils.py b/vertexai/_genai/_gcs_utils.py index 021d9d0051..176f2109e4 100644 --- a/vertexai/_genai/_gcs_utils.py +++ b/vertexai/_genai/_gcs_utils.py @@ -19,6 +19,7 @@ from typing import Any, Union from google.cloud import storage # type: ignore[attr-defined] +from google.cloud.aiplatform.utils.gcs_utils import blob_from_uri from google.genai._api_client import BaseApiClient import pandas as pd import uuid @@ -55,7 +56,7 @@ def parse_gcs_path(self, gcs_path: str) -> tuple[str, str]: def upload_file_to_gcs(self, upload_gcs_path: str, filename: str) -> None: """Uploads the provided file to a Google Cloud Storage location.""" - storage.Blob.from_string( + blob_from_uri( uri=upload_gcs_path, client=self.storage_client ).upload_from_filename(filename) diff --git a/vertexai/evaluation/utils.py b/vertexai/evaluation/utils.py index 4dd967fdf9..05148d1623 100644 --- a/vertexai/evaluation/utils.py +++ b/vertexai/evaluation/utils.py @@ -39,6 +39,7 @@ from vertexai.evaluation.metrics import ( metric_prompt_template as metric_prompt_template_base, ) +from google.cloud.aiplatform.utils.gcs_utils import blob_from_uri if TYPE_CHECKING: @@ -328,9 +329,9 @@ def _upload_file_to_gcs(upload_gcs_path: str, filename: str) -> None: project=initializer.global_config.project, credentials=initializer.global_config.credentials, ) - storage.Blob.from_string( - uri=upload_gcs_path, client=storage_client - ).upload_from_filename(filename) + blob_from_uri(uri=upload_gcs_path, client=storage_client).upload_from_filename( + filename + ) def upload_evaluation_results( diff --git a/vertexai/language_models/_evaluatable_language_models.py b/vertexai/language_models/_evaluatable_language_models.py index 8c8e71f677..d6e8c03db7 100644 --- a/vertexai/language_models/_evaluatable_language_models.py +++ b/vertexai/language_models/_evaluatable_language_models.py @@ -33,6 +33,7 @@ from google.cloud.aiplatform.compat.types import ( pipeline_state as gca_pipeline_state, ) +from google.cloud.aiplatform.utils.gcs_utils import blob_from_uri try: import pandas @@ -496,9 +497,7 @@ def _get_metrics_from_gcs_uri( credentials=aiplatform_initializer.global_config.credentials ) - metrics_json = storage.Blob.from_string( - uri=gcs_uri, client=storage_client - ).download_as_text() + metrics_json = blob_from_uri(uri=gcs_uri, client=storage_client).download_as_text() # Sliced classification metrics case, format data if "slicedMetrics" in metrics_json: diff --git a/vertexai/preview/evaluation/utils.py b/vertexai/preview/evaluation/utils.py index cdce8ad250..0e6b2ba087 100644 --- a/vertexai/preview/evaluation/utils.py +++ b/vertexai/preview/evaluation/utils.py @@ -52,6 +52,7 @@ from vertexai.evaluation.metrics import ( metric_prompt_template as metric_prompt_template_base, ) +from google.cloud.aiplatform.utils.gcs_utils import blob_from_uri if TYPE_CHECKING: @@ -321,9 +322,9 @@ def _upload_file_to_gcs(upload_gcs_path: str, filename: str) -> None: project=initializer.global_config.project, credentials=initializer.global_config.credentials, ) - storage.Blob.from_string( - uri=upload_gcs_path, client=storage_client - ).upload_from_filename(filename) + blob_from_uri(uri=upload_gcs_path, client=storage_client).upload_from_filename( + filename + ) def _upload_string_to_gcs(upload_gcs_path: str, contents: str) -> None: @@ -332,9 +333,9 @@ def _upload_string_to_gcs(upload_gcs_path: str, contents: str) -> None: project=initializer.global_config.project, credentials=initializer.global_config.credentials, ) - storage.Blob.from_string( - uri=upload_gcs_path, client=storage_client - ).upload_from_string(contents) + blob_from_uri(uri=upload_gcs_path, client=storage_client).upload_from_string( + contents + ) def _upload_pandas_df_to_gcs( @@ -365,9 +366,9 @@ def _upload_pandas_df_to_gcs( project=initializer.global_config.project, credentials=initializer.global_config.credentials, ) - storage.Blob.from_string( - uri=upload_gcs_path, client=storage_client - ).upload_from_filename(filename=local_dataset_path) + blob_from_uri(uri=upload_gcs_path, client=storage_client).upload_from_filename( + filename=local_dataset_path + ) def _upload_evaluation_summary_to_gcs( diff --git a/vertexai/vision_models/_vision_models.py b/vertexai/vision_models/_vision_models.py index ded12d5408..0cc9c77997 100644 --- a/vertexai/vision_models/_vision_models.py +++ b/vertexai/vision_models/_vision_models.py @@ -30,6 +30,7 @@ from google.cloud.aiplatform import initializer as aiplatform_initializer from vertexai._model_garden import _model_garden_models from vertexai._utils import warning_logs +from google.cloud.aiplatform.utils.gcs_utils import blob_from_uri # pylint: disable=g-import-not-at-top try: @@ -193,7 +194,7 @@ def _blob(self) -> storage.Blob: storage_client = storage.Client( credentials=aiplatform_initializer.global_config.credentials ) - blob = storage.Blob.from_string(uri=self._gcs_uri, client=storage_client) + blob = blob_from_uri(uri=self._gcs_uri, client=storage_client) # Needed to populate `blob.content_type` blob.reload() return blob @@ -538,7 +539,7 @@ def _blob(self) -> storage.Blob: storage_client = storage.Client( credentials=aiplatform_initializer.global_config.credentials ) - blob = storage.Blob.from_string(uri=self._gcs_uri, client=storage_client) + blob = blob_from_uri(uri=self._gcs_uri, client=storage_client) # Needed to populate `blob.content_type` blob.reload() return blob