From f47377e4a642cc61895e436739b67d0a0c643f74 Mon Sep 17 00:00:00 2001 From: Aimee Barciauskas Date: Sat, 21 Jun 2025 09:01:47 -0700 Subject: [PATCH 01/26] Fix import --- cdk/lambda/lambda_function.py | 127 ++++++++-------------------------- 1 file changed, 29 insertions(+), 98 deletions(-) diff --git a/cdk/lambda/lambda_function.py b/cdk/lambda/lambda_function.py index 44333a4..2d73801 100644 --- a/cdk/lambda/lambda_function.py +++ b/cdk/lambda/lambda_function.py @@ -1,4 +1,5 @@ import earthaccess +from earthaccess import DataGranule import json import icechunk import boto3 @@ -15,7 +16,7 @@ store_name = "MUR-JPL-L4-GLOB-v4.1-virtual-v1-p2" drop_vars = ["dt_1km_data", "sst_anomaly"] collection_short_name = "MUR-JPL-L4-GLOB-v4.1" - + # 馃嵄 there is a lot of overlap between this and lithops code and icechunk-nasa code 馃 def open_icechunk_repo(bucket_name: str, store_name: str, ea_creds: Optional[dict] = None): storage = icechunk.s3_storage( @@ -50,11 +51,7 @@ def get_last_timestep(session: icechunk.Session): dt_array = np.array([epoch + timedelta(seconds=int(t)) for t in zarr_store['time'][:]]) return dt_array[-1] -def write_to_icechunk(repo: icechunk.Repository, start_date: str, end_date: str, granule_ur: str): - print("searching for granules") - granule_results = earthaccess.search_data( - temporal=(start_date, end_date), short_name=collection_short_name - ) +def write_to_icechunk(repo: icechunk.Repository, granule_results: list[DataGranule], start_date: str, end_date: str): print("opening virtual dataset") vds = earthaccess.open_virtual_mfdataset( granule_results, @@ -69,12 +66,16 @@ def write_to_icechunk(repo: icechunk.Repository, start_date: str, end_date: str, # write to the icechunk store vds = vds.drop_vars(drop_vars, errors="ignore") print("writing to icechunk") - session = repo.writable_session(branch="main") - vds.virtualize.to_icechunk(session.store, append_dim='time') - print("committing") - return session.commit(f"Committed data for {start_date} to {end_date} using {granule_ur}") + commit_message = f"Committed data for {start_date} to {end_date}." + if os.environ.get("DRY_RUN", "false") == "true": + print(f"Dry run, skipping write to icechunk: {commit_message}") + return commit_message + else: + session = repo.writable_session(branch="main") + vds.virtualize.to_icechunk(session.store, append_dim='time') + return session.commit(commit_message) -def write_to_icechunk_or_fail(granule_cmr_url: str): +def write_to_icechunk_or_fail(): print("logging in") earthaccess.login() print("getting s3 credentials") @@ -84,26 +85,19 @@ def write_to_icechunk_or_fail(granule_cmr_url: str): repo = open_icechunk_repo(bucket, store_name, ea_creds) print("getting last timestep") session = repo.readonly_session(branch="main") - last_timestep = get_last_timestep(session) - print("getting granule data") - granule_data = requests.get(granule_cmr_url).json() - # the beginning and ending datetime have a time of 21:00:00 (e.g. 2024-09-02T21:00:00.000Z to 2024-09-03T21:00:00.000Z) but when you open the data the datetime with a time of 09:00 hours on the same date as the EndingDateTime. which corresponds to the filename. So I think it is appropriate to normalize the search to 09:00 on the date of the EndingDateTime. - granule_end_date_str = granule_data['TemporalExtent']['RangeDateTime']['EndingDateTime'] - granule_end_date = datetime.date(datetime.strptime(granule_end_date_str, '%Y-%m-%dT%H:%M:%S.%fZ')) - # check if the granule is at leastone day greater than the last timestep - one_day_later = last_timestep.date() + timedelta(days=1) - if granule_end_date >= one_day_later: - # write to the icechunk store - return write_to_icechunk( - repo, - str(one_day_later) + " 09:00:00", - str(granule_end_date) + " 09:00:00", - granule_data['GranuleUR'] - ) - else: - # fail - print(f"Granule {granule_cmr_url} end date {granule_end_date} is not greater than the last timestep {last_timestep}") + last_timestep = str(get_last_timestep(session)) + " 09:00:00" + print("Searching for granules") + current_date = str(datetime.now().date()) + " 09:00:00" + # In CMR, granules have a beginning and ending datetime have a time of 21:00:00 (e.g. 2024-09-02T21:00:00.000Z to 2024-09-03T21:00:00.000Z) but when you open the data the datetime with a time of 09:00 hours on the same date as the EndingDateTime. which corresponds to the filename. So I think it is appropriate to normalize the search to 09:00 on the date of the EndingDateTime. + granule_results = earthaccess.search_data( + temporal=(last_timestep, current_date), short_name=collection_short_name + ) + if len(granule_results) == 0: + print("No granules found") return None + else: + # write to the icechunk store + return write_to_icechunk(repo, granule_results) def get_secret(): secret_name = os.environ['SECRET_ARN'] @@ -140,72 +134,9 @@ def lambda_handler(event, context: dict = {}): s3 = boto3.client('s3') bucket_name = os.environ['S3_BUCKET_NAME'] - try: - # Process each record in the event - for record in event['Records']: - # Extract message body - message_body = json.loads(record['body']) - print(f"Processing message: {json.dumps(message_body)}") - - # Extract relevant information - # example message body: - # { - # "Type" : "Notification", - # "MessageId" : "fbdb230e-befe-56a6-99ff-7cbe3f7e74ec", - # "TopicArn" : "", - # "Subject" : "Update Notification", - # "Message" : "{\"concept-id\": \"G1200463969-CMR_ONLY\", \"granule-ur\": \"SWOT_L2_HR_PIXC_578_020_221R_20230710T223456_20230710T223506_PIA1_01\", \"producer-granule-id\": \"SWOT_L2_HR_PIXC_578_020_221R_20230710T223456_20230710T223506_PIA1_01.nc\", \"location\": \"https://cmr.earthdata.nasa.gov/search/concepts/G1200463969-CMR_ONLY/16\"}", - # "Timestamp" : "2024-11-14T22:52:48.010Z", - # "SignatureVersion" : "1", - # "Signature" : "VElbKqyRuWNDgI/GB...rjTP+yhjyzdWLomsGA==", - # "SigningCertURL" : "https://sns..amazonaws.com/SimpleNotificationService-9c6465fa...1136.pem", - # "UnsubscribeURL" : "https://sns..amazonaws.com/?Action=Unsubscribe&SubscriptionArn=", - # "MessageAttributes" : { - # "collection-concept-id" : {"Type":"String","Value":"C1200463968-CMR_ONLY"}, - # "mode" : {"Type":"String","Value":"Update"} - # } - # } - message = json.loads(message_body.get('Message')) - - # Create a timestamp for the filename - timestamp = datetime.utcnow().strftime('%Y%m%d_%H%M%S') - - # Create S3 key for storing the message - s3_key = f"cmr-notifications/{message_body.get('MessageId')}_{timestamp}.json" - - # Store the message in S3 - s3.put_object( - Bucket=bucket_name, - Key=s3_key, - Body=json.dumps(message_body), - ContentType='application/json' - ) + write_to_icechunk_or_fail() - print(f"Stored message in S3: {s3_key}") - - # next I would want to check if the date is the next datetime for the collection - # example mur sst granule URL: https://cmr.earthdata.nasa.gov/search/concepts/G3507162174-POCLOUD - try: - granule_cmr_url = message.get('location') - write_to_icechunk_or_fail(granule_cmr_url) - except Exception as e: - print(f"Error writing to icechunk: {e}") - s3_key = f"cmr-notifications/errors/{message_body.get('MessageId')}_{timestamp}.json" - # write error to s3 - s3.put_object( - Bucket=bucket_name, - Key=s3_key, - Body=json.dumps(e), - ContentType='application/json' - ) - raise - - - return { - 'statusCode': 200, - 'body': json.dumps('Successfully processed messages') - } - - except Exception as e: - print(f"Error processing messages: {str(e)}") - raise \ No newline at end of file + return { + 'statusCode': 200, + 'body': json.dumps('Successfully processed messages') + } \ No newline at end of file From 872fb378598a146f581185ac91a2eaab39d9a2ab Mon Sep 17 00:00:00 2001 From: Aimee Barciauskas Date: Sat, 21 Jun 2025 17:00:28 +0000 Subject: [PATCH 02/26] Fix some dates and calling write_to_icechunk --- cdk/lambda/lambda_function.py | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) diff --git a/cdk/lambda/lambda_function.py b/cdk/lambda/lambda_function.py index 2d73801..934b238 100644 --- a/cdk/lambda/lambda_function.py +++ b/cdk/lambda/lambda_function.py @@ -76,17 +76,17 @@ def write_to_icechunk(repo: icechunk.Repository, granule_results: list[DataGranu return session.commit(commit_message) def write_to_icechunk_or_fail(): - print("logging in") + print("earthaccess.login()") earthaccess.login() - print("getting s3 credentials") + print("earthaccess.get_s3_credentials") ea_creds = earthaccess.get_s3_credentials(daac='PODAAC') print("opening icechunk repo") # check date is next datetime for the icechunk store or fail repo = open_icechunk_repo(bucket, store_name, ea_creds) - print("getting last timestep") session = repo.readonly_session(branch="main") last_timestep = str(get_last_timestep(session)) + " 09:00:00" - print("Searching for granules") + print(f"Last timestep in icechunk store: {last_timestep}") + print("Searching for granules...") current_date = str(datetime.now().date()) + " 09:00:00" # In CMR, granules have a beginning and ending datetime have a time of 21:00:00 (e.g. 2024-09-02T21:00:00.000Z to 2024-09-03T21:00:00.000Z) but when you open the data the datetime with a time of 09:00 hours on the same date as the EndingDateTime. which corresponds to the filename. So I think it is appropriate to normalize the search to 09:00 on the date of the EndingDateTime. granule_results = earthaccess.search_data( @@ -96,8 +96,9 @@ def write_to_icechunk_or_fail(): print("No granules found") return None else: + print(f"Number of granules found: {len(granule_results)}") # write to the icechunk store - return write_to_icechunk(repo, granule_results) + return write_to_icechunk(repo, granule_results, start_date=last_timestep, end_date=current_date) def get_secret(): secret_name = os.environ['SECRET_ARN'] From 2983fefd207ca9dfab4a4356e928b167766c8b02 Mon Sep 17 00:00:00 2001 From: Aimee Barciauskas Date: Sat, 21 Jun 2025 17:01:36 +0000 Subject: [PATCH 03/26] Add testing notebook --- cdk/lambda/testing.ipynb | 196 +++++++++++++++++++++++++++++++++++++++ 1 file changed, 196 insertions(+) create mode 100644 cdk/lambda/testing.ipynb diff --git a/cdk/lambda/testing.ipynb b/cdk/lambda/testing.ipynb new file mode 100644 index 0000000..dc5a3b3 --- /dev/null +++ b/cdk/lambda/testing.ipynb @@ -0,0 +1,196 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "id": "d6252a84-b3bd-4979-a173-cba91056f67e", + "metadata": {}, + "source": [ + "# Testing the lambda handler" + ] + }, + { + "cell_type": "code", + "execution_count": 46, + "id": "edcdd905-0700-4c3b-9da0-fa9e9e652b4e", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "The autoreload extension is already loaded. To reload it, use:\n", + " %reload_ext autoreload\n" + ] + } + ], + "source": [ + "%load_ext autoreload\n", + "%autoreload 2\n", + "from lambda_function import lambda_handler, open_icechunk_repo\n", + "\n", + "import earthaccess\n", + "import os\n", + "import xarray as xr\n", + "import zarr" + ] + }, + { + "cell_type": "code", + "execution_count": 47, + "id": "416d2208-52de-4e56-a515-b0ea165ce1d5", + "metadata": {}, + "outputs": [], + "source": [ + "os.environ['SECRET_ARN'] = 'arn:aws:secretsmanager:us-west-2:444055461661:secret:mursst_lambda_edl_credentials-9dKy1C'\n", + "os.environ['S3_BUCKET_NAME'] = 'mursst-cmr-notifications'" + ] + }, + { + "cell_type": "code", + "execution_count": 48, + "id": "ff920b67-793e-4fe3-ae82-6ce038af0e87", + "metadata": {}, + "outputs": [], + "source": [ + "#lambda_handler({})" + ] + }, + { + "cell_type": "markdown", + "id": "7b0b213b-bfda-42e3-85ac-1b5250e89b6e", + "metadata": {}, + "source": [ + "# Testing the store" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "fadb5413-8375-4a5c-9f8e-111dfd2b457f", + "metadata": {}, + "outputs": [], + "source": [ + "earthaccess.login()\n", + "ea_creds = earthaccess.get_s3_credentials(daac='PODAAC')\n", + "bucket = 'nasa-eodc-public'\n", + "store_name = \"MUR-JPL-L4-GLOB-v4.1-virtual-v1-p2\"" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "605fc2c3-378c-4986-84b3-6fe2020c00dd", + "metadata": {}, + "outputs": [], + "source": [ + "repo = open_icechunk_repo(bucket, store_name, ea_creds)\n", + "session = repo.readonly_session(branch=\"main\")\n", + "xds = xr.open_zarr(session.store, zarr_version=3, chunks={}, consolidated=False)" + ] + }, + { + "cell_type": "code", + "execution_count": 45, + "id": "c476af97-621c-4c52-a032-0a131573d1d4", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "array(284.17548539)" + ] + }, + "execution_count": 45, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "xds.sel(lat=slice(47.6, 47.7), lon=slice(-122.4, -122.3)).analysed_sst.mean().values" + ] + }, + { + "cell_type": "markdown", + "id": "0cb434fa-f083-4b8f-8604-08bd2f0a66de", + "metadata": {}, + "source": [ + "# Deleting data" + ] + }, + { + "cell_type": "code", + "execution_count": 31, + "id": "69543cdd-a254-45dd-bbb5-1a5c090c5cd8", + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "/opt/conda/lib/python3.11/site-packages/numcodecs/zarr3.py:145: UserWarning: Numcodecs codecs are not in the Zarr version 3 specification and may not be supported by other zarr implementations.\n", + " super().__init__(**codec_config)\n" + ] + } + ], + "source": [ + "session = repo.writable_session(branch=\"main\")\n", + "store = session.store\n", + "variables = [\"analysed_sst\", \"analysis_error\", \"sea_ice_fraction\", \"mask\", \"time\"]\n", + "resize = 364\n", + "for var in variables:\n", + " # Open your array\n", + " group = zarr.open_group(store)\n", + " array = group[var]\n", + " \n", + " # Truncate the array to remove time indices > 364\n", + " # This effectively removes chunks for indices 365, 366, etc.\n", + " new_shape = list(array.shape)\n", + " new_shape[0] = 364 # Assuming time is the first dimension\n", + " array.resize(new_shape)" + ] + }, + { + "cell_type": "code", + "execution_count": 38, + "id": "38b1b1b9-55f7-4ba8-80f8-ca750fac4631", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "'44B9XPA2C0QH6FVT5AF0'" + ] + }, + "execution_count": 38, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "# DANGER!\n", + "# session.commit(\"Removed data for time > 2025-05-31\")" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3 (ipykernel)", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.11.10" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} From ea999eb3c7ba8bd0b998208e2def6c3de11f00a8 Mon Sep 17 00:00:00 2001 From: Aimee Barciauskas Date: Sat, 28 Jun 2025 09:45:35 -0700 Subject: [PATCH 04/26] Switch to scheduled run --- cdk/lambda/lambda_function.py | 23 +++---- cdk/mursst_stack.py | 122 +++++++++++++++++----------------- 2 files changed, 70 insertions(+), 75 deletions(-) diff --git a/cdk/lambda/lambda_function.py b/cdk/lambda/lambda_function.py index 2d73801..5f972ac 100644 --- a/cdk/lambda/lambda_function.py +++ b/cdk/lambda/lambda_function.py @@ -43,7 +43,7 @@ def open_icechunk_repo(bucket_name: str, store_name: str, ea_creds: Optional[dic repo_config['virtual_chunk_credentials'] = earthdata_credentials return icechunk.Repository.open(**repo_config) -def get_last_timestep(session: icechunk.Session): +def get_last_timestep(session: icechunk.Session) -> datetime: # get the last timestep from the icechunk store # return the last timestep zarr_store = zarr.open(session.store, mode="r") @@ -85,10 +85,12 @@ def write_to_icechunk_or_fail(): repo = open_icechunk_repo(bucket, store_name, ea_creds) print("getting last timestep") session = repo.readonly_session(branch="main") - last_timestep = str(get_last_timestep(session)) + " 09:00:00" + # MUR SST granules have a temporal range of date 1 21:00:00 to date 2 21:00:00, e.g. granule 20240627090000 has datetime range of 2024-06-26 21:00:00:00 to 2024-06-27 21:00:00:00 + # so granules overlap in time. + # Here we increment the latest timestep of the icechunkstore by 1 minute to make sure we only get granules outside of the latest date covered by the icechunk store + last_timestep = str(get_last_timestep(session)) + " 21:00:01" print("Searching for granules") - current_date = str(datetime.now().date()) + " 09:00:00" - # In CMR, granules have a beginning and ending datetime have a time of 21:00:00 (e.g. 2024-09-02T21:00:00.000Z to 2024-09-03T21:00:00.000Z) but when you open the data the datetime with a time of 09:00 hours on the same date as the EndingDateTime. which corresponds to the filename. So I think it is appropriate to normalize the search to 09:00 on the date of the EndingDateTime. + current_date = str(datetime.now().date()) + " 21:00:00" granule_results = earthaccess.search_data( temporal=(last_timestep, current_date), short_name=collection_short_name ) @@ -97,7 +99,7 @@ def write_to_icechunk_or_fail(): return None else: # write to the icechunk store - return write_to_icechunk(repo, granule_results) + return write_to_icechunk(repo, granule_results, start_date=last_timestep, end_date=current_date) def get_secret(): secret_name = os.environ['SECRET_ARN'] @@ -121,8 +123,7 @@ def get_secret(): def lambda_handler(event, context: dict = {}): """ - Process messages from SQS queue containing CMR notifications. - Each message contains information about new or updated granules. + Update the icechunk store with the latest MUR-JPL-L4-GLOB-v4.1 data. """ # Fetch secrets secrets = get_secret() @@ -130,13 +131,9 @@ def lambda_handler(event, context: dict = {}): os.environ['EARTHDATA_PASSWORD'] = secrets['EARTHDATA_PASSWORD'] print(f"Received event: {json.dumps(event)}") - # Initialize S3 client for storing processed messages - s3 = boto3.client('s3') - bucket_name = os.environ['S3_BUCKET_NAME'] - - write_to_icechunk_or_fail() + result = write_to_icechunk_or_fail() return { 'statusCode': 200, - 'body': json.dumps('Successfully processed messages') + 'body': json.dumps(f'Successfully processed messages: {result}') } \ No newline at end of file diff --git a/cdk/mursst_stack.py b/cdk/mursst_stack.py index bf79f3f..f31f868 100644 --- a/cdk/mursst_stack.py +++ b/cdk/mursst_stack.py @@ -2,30 +2,21 @@ Stack, aws_lambda as _lambda, aws_iam as iam, - aws_sqs as sqs, aws_s3 as s3, aws_sns as sns, aws_cloudwatch as cloudwatch, aws_cloudwatch_actions as cloudwatch_actions, + aws_events as events, + aws_events_targets as targets, Duration, RemovalPolicy, ) from constructs import Construct import os -QUEUE_ARN_FILE = '../queue_arn.txt' class MursstStack(Stack): def __init__(self, scope: Construct, construct_id: str, **kwargs) -> None: super().__init__(scope, construct_id, **kwargs) - - # Create S3 bucket for storing CMR notifications - bucket = s3.Bucket( - self, "MursstCmrNotificationsBucket", - bucket_name="mursst-cmr-notifications", - removal_policy=RemovalPolicy.DESTROY, - auto_delete_objects=True - ) - # Create or import IAM role for Lambda based on environment variable lambda_role = None if 'LAMBDA_FUNCTION_ROLE' in os.environ: @@ -40,14 +31,14 @@ def __init__(self, scope: Construct, construct_id: str, **kwargs) -> None: self, "MursstLambdaRole", assumed_by=iam.ServicePrincipal("lambda.amazonaws.com"), managed_policies=[ - iam.ManagedPolicy.from_aws_managed_policy_name("service-role/AWSLambdaSQSQueueExecutionRole"), + iam.ManagedPolicy.from_aws_managed_policy_name("service-role/AWSLambdaBasicExecutionRole"), iam.ManagedPolicy.from_aws_managed_policy_name("AmazonS3FullAccess") ] ) # Create Lambda function using the determined role lambda_function = _lambda.Function( - self, "MursstCmrNotificationProcessor", + self, "MursstIcechunkUpdater", runtime=_lambda.Runtime.PYTHON_3_12, handler="lambda_function.lambda_handler", code=_lambda.Code.from_docker_build( @@ -57,83 +48,90 @@ def __init__(self, scope: Construct, construct_id: str, **kwargs) -> None: ), role=lambda_role, environment={ - "S3_BUCKET_NAME": bucket.bucket_name, "SECRET_ARN": "arn:aws:secretsmanager:us-west-2:444055461661:secret:mursst_lambda_edl_credentials-9dKy1C" # Replace with your secret ARN }, timeout=Duration.seconds(30), memory_size=2048 ) - # Reference existing SQS queue - with open(QUEUE_ARN_FILE, 'r') as f: - queue_arn = f.read().strip() - - queue = sqs.Queue.from_queue_arn( - self, "MursstCmrNotificationQueue", - queue_arn=queue_arn - ) - - # Add SQS permissions to the role - lambda_role.add_to_policy( - iam.PolicyStatement( - actions=[ - "sqs:ReceiveMessage", - "sqs:DeleteMessage", - "sqs:GetQueueAttributes" - ], - resources=[queue.queue_arn] - ) - ) - - # Add SQS trigger to Lambda - lambda_function.add_event_source_mapping( - "MursstSqsTrigger", - event_source_arn=queue.queue_arn, - batch_size=1 - ) - - # Grant Lambda permissions to write to S3 bucket - bucket.grant_write(lambda_function) - - # Grant Lambda permissions to access Secrets Manager - lambda_role.add_to_policy( - iam.PolicyStatement( - actions=[ - "secretsmanager:GetSecretValue" - ], - resources=["arn:aws:secretsmanager:us-west-2:444055461661:secret:mursst_lambda_edl_credentials-9dKy1C"] # Replace with your secret ARN - ) - ) - # Create SNS topic for notifications notification_topic = sns.Topic( - self, "MursstNotificationTopic", - topic_name="mursst-lambda-notifications" + self, "MursstIcechunkUpdaterNotificationTopic", + topic_name="mursst-icechunk-updater-notifications" ) # Add email subscription to SNS topic + # Note: You'll need to confirm the subscription in your email sns.Subscription( - self, - id="MursstCmrProcessingEmailSubscription", + self, "MursstIcechunkUpdaterEmailSubscription", topic=notification_topic, protocol=sns.SubscriptionProtocol.EMAIL, endpoint="aimee@developmentseed.org" # Replace with your email ) - # Create CloudWatch alarm for Lambda invocations + # Create EventBridge rule to trigger Lambda daily at 6am PT (14:00 UTC) + daily_rule = events.Rule( + self, "MursstDailyRule", + schedule=events.Schedule.cron( + minute="0", + hour="14", # 6am PT = 14:00 UTC (during PDT) or 13:00 UTC (during PST) + day="*", + month="*", + year="*" + ), + description="Trigger Mursst Lambda function daily at 6am PT" + ) + + # Add Lambda as target for the EventBridge rule + daily_rule.add_target(targets.LambdaFunction(lambda_function)) + + # Create CloudWatch alarm for Lambda errors + lambda_error_alarm = cloudwatch.Alarm( + self, "MursstLambdaErrorAlarm", + metric=lambda_function.metric_errors(), + threshold=1, + evaluation_periods=1, + comparison_operator=cloudwatch.ComparisonOperator.GREATER_THAN_OR_EQUAL_TO_THRESHOLD, + alarm_description="Alarm when Lambda function encounters errors" + ) + + # Add SNS action to the error alarm + lambda_error_alarm.add_alarm_action( + cloudwatch_actions.SnsAction(notification_topic) + ) + + # Create CloudWatch alarm for Lambda invocations (success notifications) lambda_invocation_alarm = cloudwatch.Alarm( self, "MursstLambdaInvocationAlarm", metric=lambda_function.metric_invocations(), threshold=1, evaluation_periods=1, comparison_operator=cloudwatch.ComparisonOperator.GREATER_THAN_OR_EQUAL_TO_THRESHOLD, - alarm_description="Alarm when Lambda function is invoked" + alarm_description="Alarm when Lambda function is invoked successfully" ) - # Add SNS action to the alarm + # Add SNS action to the invocation alarm lambda_invocation_alarm.add_alarm_action( cloudwatch_actions.SnsAction(notification_topic) ) # Grant Lambda permissions to publish to SNS notification_topic.grant_publish(lambda_function) + + # Add EventBridge permissions to invoke Lambda + lambda_function.add_permission( + "AllowEventBridgeInvoke", + principal=iam.ServicePrincipal("events.amazonaws.com"), + action="lambda:InvokeFunction", + source_arn=daily_rule.rule_arn + ) + + # Grant Lambda permissions to access Secrets Manager + lambda_role.add_to_policy( + iam.PolicyStatement( + actions=[ + "secretsmanager:GetSecretValue" + ], + resources=["arn:aws:secretsmanager:us-west-2:444055461661:secret:mursst_lambda_edl_credentials-9dKy1C"] # Replace with your secret ARN + ) + ) From 34c7d77cc731f80d66800a4ed064fe7e8b511368 Mon Sep 17 00:00:00 2001 From: Aimee Barciauskas Date: Sat, 28 Jun 2025 09:45:42 -0700 Subject: [PATCH 05/26] Add testing notebook --- cdk/lambda/testing.ipynb | 338 +++++++++++++++++++++++++++++++++++++++ 1 file changed, 338 insertions(+) create mode 100644 cdk/lambda/testing.ipynb diff --git a/cdk/lambda/testing.ipynb b/cdk/lambda/testing.ipynb new file mode 100644 index 0000000..6d3578e --- /dev/null +++ b/cdk/lambda/testing.ipynb @@ -0,0 +1,338 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "id": "d6252a84-b3bd-4979-a173-cba91056f67e", + "metadata": {}, + "source": [ + "# Testing the lambda handler" + ] + }, + { + "cell_type": "code", + "execution_count": 27, + "id": "edcdd905-0700-4c3b-9da0-fa9e9e652b4e", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "The autoreload extension is already loaded. To reload it, use:\n", + " %reload_ext autoreload\n" + ] + } + ], + "source": [ + "%load_ext autoreload\n", + "%autoreload 2\n", + "from lambda_function import lambda_handler, open_icechunk_repo\n", + "\n", + "import earthaccess\n", + "import os\n", + "import xarray as xr\n", + "import zarr\n", + "\n", + "import warnings\n", + "warnings.filterwarnings(\"ignore\") " + ] + }, + { + "cell_type": "code", + "execution_count": 9, + "id": "416d2208-52de-4e56-a515-b0ea165ce1d5", + "metadata": {}, + "outputs": [], + "source": [ + "os.environ['SECRET_ARN'] = 'arn:aws:secretsmanager:us-west-2:444055461661:secret:mursst_lambda_edl_credentials-9dKy1C'\n", + "os.environ['DRY_RUN'] = \"true\"" + ] + }, + { + "cell_type": "code", + "execution_count": 63, + "id": "ff920b67-793e-4fe3-ae82-6ce038af0e87", + "metadata": {}, + "outputs": [], + "source": [ + "# lambda_handler({})" + ] + }, + { + "cell_type": "markdown", + "id": "7b0b213b-bfda-42e3-85ac-1b5250e89b6e", + "metadata": {}, + "source": [ + "# Testing the store" + ] + }, + { + "cell_type": "code", + "execution_count": 37, + "id": "fadb5413-8375-4a5c-9f8e-111dfd2b457f", + "metadata": {}, + "outputs": [], + "source": [ + "earthaccess.login()\n", + "ea_creds = earthaccess.get_s3_credentials(daac='PODAAC')\n", + "bucket = 'nasa-eodc-public'\n", + "store_name = \"MUR-JPL-L4-GLOB-v4.1-virtual-v1-p2\"\n", + "lat_slice = slice(47.6, 47.7)\n", + "lon_slice = slice(-122.4, -122.3)\n", + "time_range = [\"2024-06-02\", \"2024-06-30\"]" + ] + }, + { + "cell_type": "markdown", + "id": "311a5dee-3d74-49b2-89e7-a7462c3f534a", + "metadata": {}, + "source": [ + "### Get a value from the icechunk store" + ] + }, + { + "cell_type": "code", + "execution_count": 58, + "id": "605fc2c3-378c-4986-84b3-6fe2020c00dd", + "metadata": {}, + "outputs": [], + "source": [ + "repo = open_icechunk_repo(bucket, store_name, ea_creds)\n", + "session = repo.readonly_session(branch=\"main\")\n", + "xds = xr.open_zarr(session.store, zarr_version=3, chunks={}, consolidated=False)" + ] + }, + { + "cell_type": "code", + "execution_count": 39, + "id": "c476af97-621c-4c52-a032-0a131573d1d4", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "(29, 11, 11)\n" + ] + }, + { + "data": { + "text/plain": [ + "array(285.61836207)" + ] + }, + "execution_count": 39, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "subset = xds.sel(lat=lat_slice, lon=lon_slice, time=slice(*time_range))\n", + "print(subset.analysed_sst.shape)\n", + "subset.analysed_sst.mean().values" + ] + }, + { + "cell_type": "markdown", + "id": "4a3e767e-9f0e-4e47-ab68-7389e384e0ee", + "metadata": {}, + "source": [ + "### Get same value from original data" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "1b6df651-9d2f-4987-bbce-eb9141974100", + "metadata": {}, + "outputs": [], + "source": [ + "results = earthaccess.search_data(\n", + " short_name='MUR-JPL-L4-GLOB-v4.1',\n", + " temporal=(time_range[0] + \" 09:00:00\", time_range[1] + \" 09:00:00\"),\n", + ")\n", + "\n", + "direct_access_links = [granule.data_links(access=\"direct\")[0] for granule in results]\n", + "\n", + "fileset = earthaccess.open(direct_access_links, provider='POCLOUD')\n", + "\n", + "og_ds = xr.open_mfdataset(fileset)" + ] + }, + { + "cell_type": "code", + "execution_count": 49, + "id": "4030a24b-e2b1-432b-b32f-ea29b59dd0bf", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "(29, 11, 11)\n" + ] + }, + { + "data": { + "text/plain": [ + "array(285.61836207)" + ] + }, + "execution_count": 49, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "og_subset = og_ds.sel(lat=lat_slice, lon=lon_slice, time=slice(*time_range))\n", + "print(og_subset.analysed_sst.shape)\n", + "og_subset.analysed_sst.mean().values" + ] + }, + { + "cell_type": "markdown", + "id": "0cb434fa-f083-4b8f-8604-08bd2f0a66de", + "metadata": {}, + "source": [ + "# Deleting data\n", + "\n", + "You can delete data directly if necessary or remove previous commits.\n", + "\n", + "### Option 1: first option resizes the arrays." + ] + }, + { + "cell_type": "code", + "execution_count": 31, + "id": "69543cdd-a254-45dd-bbb5-1a5c090c5cd8", + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "/opt/conda/lib/python3.11/site-packages/numcodecs/zarr3.py:145: UserWarning: Numcodecs codecs are not in the Zarr version 3 specification and may not be supported by other zarr implementations.\n", + " super().__init__(**codec_config)\n" + ] + } + ], + "source": [ + "session = repo.writable_session(branch=\"main\")\n", + "store = session.store\n", + "variables = [\"analysed_sst\", \"analysis_error\", \"sea_ice_fraction\", \"mask\", \"time\"]\n", + "resize = 364\n", + "for var in variables:\n", + " # Open your array\n", + " group = zarr.open_group(store)\n", + " array = group[var]\n", + " \n", + " # Truncate the array to remove time indices > 364\n", + " # This effectively removes chunks for indices 365, 366, etc.\n", + " new_shape = list(array.shape)\n", + " new_shape[0] = 364 # Assuming time is the first dimension\n", + " array.resize(new_shape)" + ] + }, + { + "cell_type": "code", + "execution_count": 38, + "id": "38b1b1b9-55f7-4ba8-80f8-ca750fac4631", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "'44B9XPA2C0QH6FVT5AF0'" + ] + }, + "execution_count": 38, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "# DANGER!\n", + "# session.commit(\"Removed data for time > 2025-05-31\")" + ] + }, + { + "cell_type": "markdown", + "id": "72ccfdcf-7a6b-4b97-aba8-02ade6f48847", + "metadata": {}, + "source": [ + "### Option 2: Reset to a previous commit.\n", + "\n", + "First list commits:" + ] + }, + { + "cell_type": "code", + "execution_count": 64, + "id": "7ae9a196-49b5-4f83-9730-19612c3c7124", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "[('Committed data for 2025-05-31 09:00:00 09:00:00 to 2025-06-28 09:00:00.',\n", + " '7PZK6744FDHXHKPYVX8G'),\n", + " ('Removed data for time > 2025-05-31', '44B9XPA2C0QH6FVT5AF0'),\n", + " ('Commit data 2024-10-01 to 2025-06-04', 'GPRDBT2XK9ZERYSQ7EA0'),\n", + " ('Commit data 2024-09-13 to 2024-09-30', 'CP5PHVT9V88VPZTZ0E00'),\n", + " ('Committed data for 2024-09-12 09:00:00 to 2024-09-12 09:00:00 using 20240912090000-JPL-L4_GHRSST-SSTfnd-MUR-GLOB-v02.0-fv04.1',\n", + " 'W7Z0Y2FAGZ8WFPMJYZTG'),\n", + " ('Committed data for 2024-09-11 09:00:00 to 2024-09-11 09:00:00 using 20240911090000-JPL-L4_GHRSST-SSTfnd-MUR-GLOB-v02.0-fv04.1',\n", + " 'QRB8HZE1WEK9AA1FWH00'),\n", + " ('Commit data 2024-09-05 to 2024-09-10', '3R6SDVDMWP0SVB6KW0ZG'),\n", + " ('Committed data for 2024-09-04 09:00:00 to 2024-09-04 09:00:00 using 20240904090000-JPL-L4_GHRSST-SSTfnd-MUR-GLOB-v02.0-fv04.1',\n", + " 'MSX9YMGN8EPP3S2Z8K50'),\n", + " ('Committed data for 2024-09-01 09:00:00 to 2024-09-03 09:00:00 using 20240903090000-JPL-L4_GHRSST-SSTfnd-MUR-GLOB-v02.0-fv04.1',\n", + " 'SE5QYNGDA0KSQCKTHGQ0'),\n", + " ('Commit data 2024-08-02 to 2024-08-30', 'K31CHGA0N2FXWCNSJVDG'),\n", + " ('Commit data 2024-07-02 to 2024-07-31', 'Q37QBVDX4A58FPSY9190'),\n", + " ('Commit data 2024-06-02 to 2024-06-30', 'DWCSFZX4TPNA4SEREME0'),\n", + " ('Repository initialized', 'AYXEAVT6QYRBDXC1455G')]" + ] + }, + "execution_count": 64, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "[(ancestor.message, ancestor.id) for ancestor in repo.ancestry(branch=\"main\")]" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "a4214649-11b2-4a35-9e97-bed52a2e0dcd", + "metadata": {}, + "outputs": [], + "source": [ + "# repo.reset_branch(\"main\", \"commit id\")" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3 (ipykernel)", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.12.7" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} From 859ae5445348ec0eedc0bc91e5f52015092a717c Mon Sep 17 00:00:00 2001 From: Aimee Barciauskas Date: Thu, 3 Jul 2025 16:13:38 +0000 Subject: [PATCH 06/26] testing --- cdk/lambda/testing.ipynb | 272 +++++++++++++++++++++++++++++++++++---- 1 file changed, 244 insertions(+), 28 deletions(-) diff --git a/cdk/lambda/testing.ipynb b/cdk/lambda/testing.ipynb index dc5a3b3..50b629c 100644 --- a/cdk/lambda/testing.ipynb +++ b/cdk/lambda/testing.ipynb @@ -10,49 +10,40 @@ }, { "cell_type": "code", - "execution_count": 46, + "execution_count": 3, "id": "edcdd905-0700-4c3b-9da0-fa9e9e652b4e", "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "The autoreload extension is already loaded. To reload it, use:\n", - " %reload_ext autoreload\n" - ] - } - ], + "outputs": [], "source": [ - "%load_ext autoreload\n", - "%autoreload 2\n", "from lambda_function import lambda_handler, open_icechunk_repo\n", - "\n", "import earthaccess\n", "import os\n", "import xarray as xr\n", - "import zarr" + "import zarr\n", + "\n", + "import warnings\n", + "warnings.filterwarnings(\"ignore\") " ] }, { "cell_type": "code", - "execution_count": 47, + "execution_count": 4, "id": "416d2208-52de-4e56-a515-b0ea165ce1d5", "metadata": {}, "outputs": [], "source": [ "os.environ['SECRET_ARN'] = 'arn:aws:secretsmanager:us-west-2:444055461661:secret:mursst_lambda_edl_credentials-9dKy1C'\n", - "os.environ['S3_BUCKET_NAME'] = 'mursst-cmr-notifications'" + "os.environ['DRY_RUN'] = \"true\"" ] }, { "cell_type": "code", - "execution_count": 48, + "execution_count": 14, "id": "ff920b67-793e-4fe3-ae82-6ce038af0e87", "metadata": {}, "outputs": [], "source": [ - "#lambda_handler({})" + "lambda_handler({})" ] }, { @@ -65,20 +56,49 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 5, "id": "fadb5413-8375-4a5c-9f8e-111dfd2b457f", "metadata": {}, - "outputs": [], + "outputs": [ + { + "name": "stdin", + "output_type": "stream", + "text": [ + "Enter your Earthdata Login username: aimeeb\n", + "Enter your Earthdata password: 路路路路路路路路\n" + ] + } + ], "source": [ "earthaccess.login()\n", "ea_creds = earthaccess.get_s3_credentials(daac='PODAAC')\n", "bucket = 'nasa-eodc-public'\n", - "store_name = \"MUR-JPL-L4-GLOB-v4.1-virtual-v1-p2\"" + "store_name = \"MUR-JPL-L4-GLOB-v4.1-virtual-v1-p2\"\n", + "lat_slice = slice(47.6, 47.7)\n", + "lon_slice = slice(-122.4, -122.3)" ] }, { "cell_type": "code", - "execution_count": null, + "execution_count": 12, + "id": "e8f6b892-477d-43be-bf6c-9a720104ad37", + "metadata": {}, + "outputs": [], + "source": [ + "time_range = [\"2025-06-30\", \"2025-07-02\"]" + ] + }, + { + "cell_type": "markdown", + "id": "311a5dee-3d74-49b2-89e7-a7462c3f534a", + "metadata": {}, + "source": [ + "### Get a value from the icechunk store" + ] + }, + { + "cell_type": "code", + "execution_count": 6, "id": "605fc2c3-378c-4986-84b3-6fe2020c00dd", "metadata": {}, "outputs": [], @@ -90,23 +110,147 @@ }, { "cell_type": "code", - "execution_count": 45, + "execution_count": 16, + "id": "bfa9d9d6-3f19-4788-bfe2-b1b5efa620c7", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "slice('2025-06-30', '2025-07-02', None)" + ] + }, + "execution_count": 16, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "slice(*time_range)" + ] + }, + { + "cell_type": "code", + "execution_count": 21, "id": "c476af97-621c-4c52-a032-0a131573d1d4", "metadata": {}, "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "(1, 11, 11)\n" + ] + }, + { + "ename": "error", + "evalue": "Error -3 while decompressing data: incorrect header check", + "output_type": "error", + "traceback": [ + "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", + "\u001b[0;31merror\u001b[0m Traceback (most recent call last)", + "Cell \u001b[0;32mIn[21], line 3\u001b[0m\n\u001b[1;32m 1\u001b[0m subset \u001b[38;5;241m=\u001b[39m xds\u001b[38;5;241m.\u001b[39msel(lat\u001b[38;5;241m=\u001b[39mlat_slice, lon\u001b[38;5;241m=\u001b[39mlon_slice, time\u001b[38;5;241m=\u001b[39m\u001b[38;5;124m'\u001b[39m\u001b[38;5;124m2025-07-01\u001b[39m\u001b[38;5;124m'\u001b[39m)\n\u001b[1;32m 2\u001b[0m \u001b[38;5;28mprint\u001b[39m(subset\u001b[38;5;241m.\u001b[39manalysed_sst\u001b[38;5;241m.\u001b[39mshape)\n\u001b[0;32m----> 3\u001b[0m \u001b[43msubset\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43manalysed_sst\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mmean\u001b[49m\u001b[43m(\u001b[49m\u001b[43m)\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mvalues\u001b[49m\n", + "File \u001b[0;32m/opt/conda/lib/python3.11/site-packages/xarray/core/dataarray.py:815\u001b[0m, in \u001b[0;36mDataArray.values\u001b[0;34m(self)\u001b[0m\n\u001b[1;32m 802\u001b[0m \u001b[38;5;129m@property\u001b[39m\n\u001b[1;32m 803\u001b[0m \u001b[38;5;28;01mdef\u001b[39;00m \u001b[38;5;21mvalues\u001b[39m(\u001b[38;5;28mself\u001b[39m) \u001b[38;5;241m-\u001b[39m\u001b[38;5;241m>\u001b[39m np\u001b[38;5;241m.\u001b[39mndarray:\n\u001b[1;32m 804\u001b[0m \u001b[38;5;250m \u001b[39m\u001b[38;5;124;03m\"\"\"\u001b[39;00m\n\u001b[1;32m 805\u001b[0m \u001b[38;5;124;03m The array's data converted to numpy.ndarray.\u001b[39;00m\n\u001b[1;32m 806\u001b[0m \n\u001b[0;32m (...)\u001b[0m\n\u001b[1;32m 813\u001b[0m \u001b[38;5;124;03m to this array may be reflected in the DataArray as well.\u001b[39;00m\n\u001b[1;32m 814\u001b[0m \u001b[38;5;124;03m \"\"\"\u001b[39;00m\n\u001b[0;32m--> 815\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mvariable\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mvalues\u001b[49m\n", + "File \u001b[0;32m/opt/conda/lib/python3.11/site-packages/xarray/core/variable.py:516\u001b[0m, in \u001b[0;36mVariable.values\u001b[0;34m(self)\u001b[0m\n\u001b[1;32m 513\u001b[0m \u001b[38;5;129m@property\u001b[39m\n\u001b[1;32m 514\u001b[0m \u001b[38;5;28;01mdef\u001b[39;00m \u001b[38;5;21mvalues\u001b[39m(\u001b[38;5;28mself\u001b[39m) \u001b[38;5;241m-\u001b[39m\u001b[38;5;241m>\u001b[39m np\u001b[38;5;241m.\u001b[39mndarray:\n\u001b[1;32m 515\u001b[0m \u001b[38;5;250m \u001b[39m\u001b[38;5;124;03m\"\"\"The variable's data as a numpy.ndarray\"\"\"\u001b[39;00m\n\u001b[0;32m--> 516\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[43m_as_array_or_item\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43m_data\u001b[49m\u001b[43m)\u001b[49m\n", + "File \u001b[0;32m/opt/conda/lib/python3.11/site-packages/xarray/core/variable.py:302\u001b[0m, in \u001b[0;36m_as_array_or_item\u001b[0;34m(data)\u001b[0m\n\u001b[1;32m 288\u001b[0m \u001b[38;5;28;01mdef\u001b[39;00m \u001b[38;5;21m_as_array_or_item\u001b[39m(data):\n\u001b[1;32m 289\u001b[0m \u001b[38;5;250m \u001b[39m\u001b[38;5;124;03m\"\"\"Return the given values as a numpy array, or as an individual item if\u001b[39;00m\n\u001b[1;32m 290\u001b[0m \u001b[38;5;124;03m it's a 0d datetime64 or timedelta64 array.\u001b[39;00m\n\u001b[1;32m 291\u001b[0m \n\u001b[0;32m (...)\u001b[0m\n\u001b[1;32m 300\u001b[0m \u001b[38;5;124;03m TODO: remove this (replace with np.asarray) once these issues are fixed\u001b[39;00m\n\u001b[1;32m 301\u001b[0m \u001b[38;5;124;03m \"\"\"\u001b[39;00m\n\u001b[0;32m--> 302\u001b[0m data \u001b[38;5;241m=\u001b[39m \u001b[43mnp\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43masarray\u001b[49m\u001b[43m(\u001b[49m\u001b[43mdata\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 303\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m data\u001b[38;5;241m.\u001b[39mndim \u001b[38;5;241m==\u001b[39m \u001b[38;5;241m0\u001b[39m:\n\u001b[1;32m 304\u001b[0m kind \u001b[38;5;241m=\u001b[39m data\u001b[38;5;241m.\u001b[39mdtype\u001b[38;5;241m.\u001b[39mkind\n", + "File \u001b[0;32m/opt/conda/lib/python3.11/site-packages/dask/array/core.py:1724\u001b[0m, in \u001b[0;36mArray.__array__\u001b[0;34m(self, dtype, copy, **kwargs)\u001b[0m\n\u001b[1;32m 1717\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m copy \u001b[38;5;129;01mis\u001b[39;00m \u001b[38;5;28;01mFalse\u001b[39;00m:\n\u001b[1;32m 1718\u001b[0m warnings\u001b[38;5;241m.\u001b[39mwarn(\n\u001b[1;32m 1719\u001b[0m \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mCan\u001b[39m\u001b[38;5;124m'\u001b[39m\u001b[38;5;124mt acquire a memory view of a Dask array. \u001b[39m\u001b[38;5;124m\"\u001b[39m\n\u001b[1;32m 1720\u001b[0m \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mThis will raise in the future.\u001b[39m\u001b[38;5;124m\"\u001b[39m,\n\u001b[1;32m 1721\u001b[0m \u001b[38;5;167;01mFutureWarning\u001b[39;00m,\n\u001b[1;32m 1722\u001b[0m )\n\u001b[0;32m-> 1724\u001b[0m x \u001b[38;5;241m=\u001b[39m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mcompute\u001b[49m\u001b[43m(\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 1726\u001b[0m \u001b[38;5;66;03m# Apply requested dtype and convert non-numpy backends to numpy.\u001b[39;00m\n\u001b[1;32m 1727\u001b[0m \u001b[38;5;66;03m# If copy is True, numpy is going to perform its own deep copy\u001b[39;00m\n\u001b[1;32m 1728\u001b[0m \u001b[38;5;66;03m# after this method returns.\u001b[39;00m\n\u001b[1;32m 1729\u001b[0m \u001b[38;5;66;03m# If copy is None, finalize() ensures that the returned object\u001b[39;00m\n\u001b[1;32m 1730\u001b[0m \u001b[38;5;66;03m# does not share memory with an object stored in the graph or on a\u001b[39;00m\n\u001b[1;32m 1731\u001b[0m \u001b[38;5;66;03m# process-local Worker.\u001b[39;00m\n\u001b[1;32m 1732\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m np\u001b[38;5;241m.\u001b[39masarray(x, dtype\u001b[38;5;241m=\u001b[39mdtype)\n", + "File \u001b[0;32m/opt/conda/lib/python3.11/site-packages/dask/base.py:373\u001b[0m, in \u001b[0;36mDaskMethodsMixin.compute\u001b[0;34m(self, **kwargs)\u001b[0m\n\u001b[1;32m 349\u001b[0m \u001b[38;5;28;01mdef\u001b[39;00m \u001b[38;5;21mcompute\u001b[39m(\u001b[38;5;28mself\u001b[39m, \u001b[38;5;241m*\u001b[39m\u001b[38;5;241m*\u001b[39mkwargs):\n\u001b[1;32m 350\u001b[0m \u001b[38;5;250m \u001b[39m\u001b[38;5;124;03m\"\"\"Compute this dask collection\u001b[39;00m\n\u001b[1;32m 351\u001b[0m \n\u001b[1;32m 352\u001b[0m \u001b[38;5;124;03m This turns a lazy Dask collection into its in-memory equivalent.\u001b[39;00m\n\u001b[0;32m (...)\u001b[0m\n\u001b[1;32m 371\u001b[0m \u001b[38;5;124;03m dask.compute\u001b[39;00m\n\u001b[1;32m 372\u001b[0m \u001b[38;5;124;03m \"\"\"\u001b[39;00m\n\u001b[0;32m--> 373\u001b[0m (result,) \u001b[38;5;241m=\u001b[39m \u001b[43mcompute\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;28;43mself\u001b[39;49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mtraverse\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[38;5;28;43;01mFalse\u001b[39;49;00m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43mkwargs\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 374\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m result\n", + "File \u001b[0;32m/opt/conda/lib/python3.11/site-packages/dask/base.py:681\u001b[0m, in \u001b[0;36mcompute\u001b[0;34m(traverse, optimize_graph, scheduler, get, *args, **kwargs)\u001b[0m\n\u001b[1;32m 678\u001b[0m expr \u001b[38;5;241m=\u001b[39m expr\u001b[38;5;241m.\u001b[39moptimize()\n\u001b[1;32m 679\u001b[0m keys \u001b[38;5;241m=\u001b[39m \u001b[38;5;28mlist\u001b[39m(flatten(expr\u001b[38;5;241m.\u001b[39m__dask_keys__()))\n\u001b[0;32m--> 681\u001b[0m results \u001b[38;5;241m=\u001b[39m \u001b[43mschedule\u001b[49m\u001b[43m(\u001b[49m\u001b[43mexpr\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mkeys\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43mkwargs\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 683\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m repack(results)\n", + "File \u001b[0;32m/opt/conda/lib/python3.11/site-packages/xarray/core/indexing.py:575\u001b[0m, in \u001b[0;36mImplicitToExplicitIndexingAdapter.__array__\u001b[0;34m(self, dtype, copy)\u001b[0m\n\u001b[1;32m 571\u001b[0m \u001b[38;5;28;01mdef\u001b[39;00m \u001b[38;5;21m__array__\u001b[39m(\n\u001b[1;32m 572\u001b[0m \u001b[38;5;28mself\u001b[39m, dtype: np\u001b[38;5;241m.\u001b[39mtyping\u001b[38;5;241m.\u001b[39mDTypeLike \u001b[38;5;241m=\u001b[39m \u001b[38;5;28;01mNone\u001b[39;00m, \u001b[38;5;241m/\u001b[39m, \u001b[38;5;241m*\u001b[39m, copy: \u001b[38;5;28mbool\u001b[39m \u001b[38;5;241m|\u001b[39m \u001b[38;5;28;01mNone\u001b[39;00m \u001b[38;5;241m=\u001b[39m \u001b[38;5;28;01mNone\u001b[39;00m\n\u001b[1;32m 573\u001b[0m ) \u001b[38;5;241m-\u001b[39m\u001b[38;5;241m>\u001b[39m np\u001b[38;5;241m.\u001b[39mndarray:\n\u001b[1;32m 574\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m Version(np\u001b[38;5;241m.\u001b[39m__version__) \u001b[38;5;241m>\u001b[39m\u001b[38;5;241m=\u001b[39m Version(\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124m2.0.0\u001b[39m\u001b[38;5;124m\"\u001b[39m):\n\u001b[0;32m--> 575\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m np\u001b[38;5;241m.\u001b[39masarray(\u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mget_duck_array\u001b[49m\u001b[43m(\u001b[49m\u001b[43m)\u001b[49m, dtype\u001b[38;5;241m=\u001b[39mdtype, copy\u001b[38;5;241m=\u001b[39mcopy)\n\u001b[1;32m 576\u001b[0m \u001b[38;5;28;01melse\u001b[39;00m:\n\u001b[1;32m 577\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m np\u001b[38;5;241m.\u001b[39masarray(\u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mget_duck_array(), dtype\u001b[38;5;241m=\u001b[39mdtype)\n", + "File \u001b[0;32m/opt/conda/lib/python3.11/site-packages/xarray/core/indexing.py:580\u001b[0m, in \u001b[0;36mImplicitToExplicitIndexingAdapter.get_duck_array\u001b[0;34m(self)\u001b[0m\n\u001b[1;32m 579\u001b[0m \u001b[38;5;28;01mdef\u001b[39;00m \u001b[38;5;21mget_duck_array\u001b[39m(\u001b[38;5;28mself\u001b[39m):\n\u001b[0;32m--> 580\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43marray\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mget_duck_array\u001b[49m\u001b[43m(\u001b[49m\u001b[43m)\u001b[49m\n", + "File \u001b[0;32m/opt/conda/lib/python3.11/site-packages/xarray/core/indexing.py:791\u001b[0m, in \u001b[0;36mCopyOnWriteArray.get_duck_array\u001b[0;34m(self)\u001b[0m\n\u001b[1;32m 790\u001b[0m \u001b[38;5;28;01mdef\u001b[39;00m \u001b[38;5;21mget_duck_array\u001b[39m(\u001b[38;5;28mself\u001b[39m):\n\u001b[0;32m--> 791\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43marray\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mget_duck_array\u001b[49m\u001b[43m(\u001b[49m\u001b[43m)\u001b[49m\n", + "File \u001b[0;32m/opt/conda/lib/python3.11/site-packages/xarray/core/indexing.py:661\u001b[0m, in \u001b[0;36mLazilyIndexedArray.get_duck_array\u001b[0;34m(self)\u001b[0m\n\u001b[1;32m 656\u001b[0m \u001b[38;5;66;03m# self.array[self.key] is now a numpy array when\u001b[39;00m\n\u001b[1;32m 657\u001b[0m \u001b[38;5;66;03m# self.array is a BackendArray subclass\u001b[39;00m\n\u001b[1;32m 658\u001b[0m \u001b[38;5;66;03m# and self.key is BasicIndexer((slice(None, None, None),))\u001b[39;00m\n\u001b[1;32m 659\u001b[0m \u001b[38;5;66;03m# so we need the explicit check for ExplicitlyIndexed\u001b[39;00m\n\u001b[1;32m 660\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;28misinstance\u001b[39m(array, ExplicitlyIndexed):\n\u001b[0;32m--> 661\u001b[0m array \u001b[38;5;241m=\u001b[39m \u001b[43marray\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mget_duck_array\u001b[49m\u001b[43m(\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 662\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m _wrap_numpy_scalars(array)\n", + "File \u001b[0;32m/opt/conda/lib/python3.11/site-packages/xarray/coding/common.py:76\u001b[0m, in \u001b[0;36m_ElementwiseFunctionArray.get_duck_array\u001b[0;34m(self)\u001b[0m\n\u001b[1;32m 75\u001b[0m \u001b[38;5;28;01mdef\u001b[39;00m \u001b[38;5;21mget_duck_array\u001b[39m(\u001b[38;5;28mself\u001b[39m):\n\u001b[0;32m---> 76\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mfunc(\u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43marray\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mget_duck_array\u001b[49m\u001b[43m(\u001b[49m\u001b[43m)\u001b[49m)\n", + "File \u001b[0;32m/opt/conda/lib/python3.11/site-packages/xarray/coding/common.py:76\u001b[0m, in \u001b[0;36m_ElementwiseFunctionArray.get_duck_array\u001b[0;34m(self)\u001b[0m\n\u001b[1;32m 75\u001b[0m \u001b[38;5;28;01mdef\u001b[39;00m \u001b[38;5;21mget_duck_array\u001b[39m(\u001b[38;5;28mself\u001b[39m):\n\u001b[0;32m---> 76\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mfunc(\u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43marray\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mget_duck_array\u001b[49m\u001b[43m(\u001b[49m\u001b[43m)\u001b[49m)\n", + "File \u001b[0;32m/opt/conda/lib/python3.11/site-packages/xarray/core/indexing.py:654\u001b[0m, in \u001b[0;36mLazilyIndexedArray.get_duck_array\u001b[0;34m(self)\u001b[0m\n\u001b[1;32m 650\u001b[0m array \u001b[38;5;241m=\u001b[39m apply_indexer(\u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39marray, \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mkey)\n\u001b[1;32m 651\u001b[0m \u001b[38;5;28;01melse\u001b[39;00m:\n\u001b[1;32m 652\u001b[0m \u001b[38;5;66;03m# If the array is not an ExplicitlyIndexedNDArrayMixin,\u001b[39;00m\n\u001b[1;32m 653\u001b[0m \u001b[38;5;66;03m# it may wrap a BackendArray so use its __getitem__\u001b[39;00m\n\u001b[0;32m--> 654\u001b[0m array \u001b[38;5;241m=\u001b[39m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43marray\u001b[49m\u001b[43m[\u001b[49m\u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mkey\u001b[49m\u001b[43m]\u001b[49m\n\u001b[1;32m 656\u001b[0m \u001b[38;5;66;03m# self.array[self.key] is now a numpy array when\u001b[39;00m\n\u001b[1;32m 657\u001b[0m \u001b[38;5;66;03m# self.array is a BackendArray subclass\u001b[39;00m\n\u001b[1;32m 658\u001b[0m \u001b[38;5;66;03m# and self.key is BasicIndexer((slice(None, None, None),))\u001b[39;00m\n\u001b[1;32m 659\u001b[0m \u001b[38;5;66;03m# so we need the explicit check for ExplicitlyIndexed\u001b[39;00m\n\u001b[1;32m 660\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;28misinstance\u001b[39m(array, ExplicitlyIndexed):\n", + "File \u001b[0;32m/opt/conda/lib/python3.11/site-packages/xarray/backends/zarr.py:223\u001b[0m, in \u001b[0;36mZarrArrayWrapper.__getitem__\u001b[0;34m(self, key)\u001b[0m\n\u001b[1;32m 221\u001b[0m \u001b[38;5;28;01melif\u001b[39;00m \u001b[38;5;28misinstance\u001b[39m(key, indexing\u001b[38;5;241m.\u001b[39mOuterIndexer):\n\u001b[1;32m 222\u001b[0m method \u001b[38;5;241m=\u001b[39m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_oindex\n\u001b[0;32m--> 223\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[43mindexing\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mexplicit_indexing_adapter\u001b[49m\u001b[43m(\u001b[49m\n\u001b[1;32m 224\u001b[0m \u001b[43m \u001b[49m\u001b[43mkey\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43marray\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mshape\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mindexing\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mIndexingSupport\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mVECTORIZED\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mmethod\u001b[49m\n\u001b[1;32m 225\u001b[0m \u001b[43m\u001b[49m\u001b[43m)\u001b[49m\n", + "File \u001b[0;32m/opt/conda/lib/python3.11/site-packages/xarray/core/indexing.py:1015\u001b[0m, in \u001b[0;36mexplicit_indexing_adapter\u001b[0;34m(key, shape, indexing_support, raw_indexing_method)\u001b[0m\n\u001b[1;32m 993\u001b[0m \u001b[38;5;250m\u001b[39m\u001b[38;5;124;03m\"\"\"Support explicit indexing by delegating to a raw indexing method.\u001b[39;00m\n\u001b[1;32m 994\u001b[0m \n\u001b[1;32m 995\u001b[0m \u001b[38;5;124;03mOuter and/or vectorized indexers are supported by indexing a second time\u001b[39;00m\n\u001b[0;32m (...)\u001b[0m\n\u001b[1;32m 1012\u001b[0m \u001b[38;5;124;03mIndexing result, in the form of a duck numpy-array.\u001b[39;00m\n\u001b[1;32m 1013\u001b[0m \u001b[38;5;124;03m\"\"\"\u001b[39;00m\n\u001b[1;32m 1014\u001b[0m raw_key, numpy_indices \u001b[38;5;241m=\u001b[39m decompose_indexer(key, shape, indexing_support)\n\u001b[0;32m-> 1015\u001b[0m result \u001b[38;5;241m=\u001b[39m \u001b[43mraw_indexing_method\u001b[49m\u001b[43m(\u001b[49m\u001b[43mraw_key\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mtuple\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 1016\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m numpy_indices\u001b[38;5;241m.\u001b[39mtuple:\n\u001b[1;32m 1017\u001b[0m \u001b[38;5;66;03m# index the loaded duck array\u001b[39;00m\n\u001b[1;32m 1018\u001b[0m indexable \u001b[38;5;241m=\u001b[39m as_indexable(result)\n", + "File \u001b[0;32m/opt/conda/lib/python3.11/site-packages/xarray/backends/zarr.py:213\u001b[0m, in \u001b[0;36mZarrArrayWrapper._getitem\u001b[0;34m(self, key)\u001b[0m\n\u001b[1;32m 212\u001b[0m \u001b[38;5;28;01mdef\u001b[39;00m \u001b[38;5;21m_getitem\u001b[39m(\u001b[38;5;28mself\u001b[39m, key):\n\u001b[0;32m--> 213\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43m_array\u001b[49m\u001b[43m[\u001b[49m\u001b[43mkey\u001b[49m\u001b[43m]\u001b[49m\n", + "File \u001b[0;32m/opt/conda/lib/python3.11/site-packages/zarr/core/array.py:2441\u001b[0m, in \u001b[0;36mArray.__getitem__\u001b[0;34m(self, selection)\u001b[0m\n\u001b[1;32m 2439\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mvindex[cast(CoordinateSelection \u001b[38;5;241m|\u001b[39m MaskSelection, selection)]\n\u001b[1;32m 2440\u001b[0m \u001b[38;5;28;01melif\u001b[39;00m is_pure_orthogonal_indexing(pure_selection, \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mndim):\n\u001b[0;32m-> 2441\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mget_orthogonal_selection\u001b[49m\u001b[43m(\u001b[49m\u001b[43mpure_selection\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mfields\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mfields\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 2442\u001b[0m \u001b[38;5;28;01melse\u001b[39;00m:\n\u001b[1;32m 2443\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mget_basic_selection(cast(BasicSelection, pure_selection), fields\u001b[38;5;241m=\u001b[39mfields)\n", + "File \u001b[0;32m/opt/conda/lib/python3.11/site-packages/zarr/_compat.py:43\u001b[0m, in \u001b[0;36m_deprecate_positional_args.._inner_deprecate_positional_args..inner_f\u001b[0;34m(*args, **kwargs)\u001b[0m\n\u001b[1;32m 41\u001b[0m extra_args \u001b[38;5;241m=\u001b[39m \u001b[38;5;28mlen\u001b[39m(args) \u001b[38;5;241m-\u001b[39m \u001b[38;5;28mlen\u001b[39m(all_args)\n\u001b[1;32m 42\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m extra_args \u001b[38;5;241m<\u001b[39m\u001b[38;5;241m=\u001b[39m \u001b[38;5;241m0\u001b[39m:\n\u001b[0;32m---> 43\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[43mf\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43margs\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43mkwargs\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 45\u001b[0m \u001b[38;5;66;03m# extra_args > 0\u001b[39;00m\n\u001b[1;32m 46\u001b[0m args_msg \u001b[38;5;241m=\u001b[39m [\n\u001b[1;32m 47\u001b[0m \u001b[38;5;124mf\u001b[39m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;132;01m{\u001b[39;00mname\u001b[38;5;132;01m}\u001b[39;00m\u001b[38;5;124m=\u001b[39m\u001b[38;5;132;01m{\u001b[39;00marg\u001b[38;5;132;01m}\u001b[39;00m\u001b[38;5;124m\"\u001b[39m\n\u001b[1;32m 48\u001b[0m \u001b[38;5;28;01mfor\u001b[39;00m name, arg \u001b[38;5;129;01min\u001b[39;00m \u001b[38;5;28mzip\u001b[39m(kwonly_args[:extra_args], args[\u001b[38;5;241m-\u001b[39mextra_args:], strict\u001b[38;5;241m=\u001b[39m\u001b[38;5;28;01mFalse\u001b[39;00m)\n\u001b[1;32m 49\u001b[0m ]\n", + "File \u001b[0;32m/opt/conda/lib/python3.11/site-packages/zarr/core/array.py:2883\u001b[0m, in \u001b[0;36mArray.get_orthogonal_selection\u001b[0;34m(self, selection, out, fields, prototype)\u001b[0m\n\u001b[1;32m 2881\u001b[0m prototype \u001b[38;5;241m=\u001b[39m default_buffer_prototype()\n\u001b[1;32m 2882\u001b[0m indexer \u001b[38;5;241m=\u001b[39m OrthogonalIndexer(selection, \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mshape, \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mmetadata\u001b[38;5;241m.\u001b[39mchunk_grid)\n\u001b[0;32m-> 2883\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[43msync\u001b[49m\u001b[43m(\u001b[49m\n\u001b[1;32m 2884\u001b[0m \u001b[43m \u001b[49m\u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43m_async_array\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43m_get_selection\u001b[49m\u001b[43m(\u001b[49m\n\u001b[1;32m 2885\u001b[0m \u001b[43m \u001b[49m\u001b[43mindexer\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mindexer\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mout\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mout\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mfields\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mfields\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mprototype\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mprototype\u001b[49m\n\u001b[1;32m 2886\u001b[0m \u001b[43m \u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 2887\u001b[0m \u001b[43m\u001b[49m\u001b[43m)\u001b[49m\n", + "File \u001b[0;32m/opt/conda/lib/python3.11/site-packages/zarr/core/sync.py:163\u001b[0m, in \u001b[0;36msync\u001b[0;34m(coro, loop, timeout)\u001b[0m\n\u001b[1;32m 160\u001b[0m return_result \u001b[38;5;241m=\u001b[39m \u001b[38;5;28mnext\u001b[39m(\u001b[38;5;28miter\u001b[39m(finished))\u001b[38;5;241m.\u001b[39mresult()\n\u001b[1;32m 162\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;28misinstance\u001b[39m(return_result, \u001b[38;5;167;01mBaseException\u001b[39;00m):\n\u001b[0;32m--> 163\u001b[0m \u001b[38;5;28;01mraise\u001b[39;00m return_result\n\u001b[1;32m 164\u001b[0m \u001b[38;5;28;01melse\u001b[39;00m:\n\u001b[1;32m 165\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m return_result\n", + "File \u001b[0;32m/opt/conda/lib/python3.11/site-packages/zarr/core/sync.py:119\u001b[0m, in \u001b[0;36m_runner\u001b[0;34m(coro)\u001b[0m\n\u001b[1;32m 114\u001b[0m \u001b[38;5;250m\u001b[39m\u001b[38;5;124;03m\"\"\"\u001b[39;00m\n\u001b[1;32m 115\u001b[0m \u001b[38;5;124;03mAwait a coroutine and return the result of running it. If awaiting the coroutine raises an\u001b[39;00m\n\u001b[1;32m 116\u001b[0m \u001b[38;5;124;03mexception, the exception will be returned.\u001b[39;00m\n\u001b[1;32m 117\u001b[0m \u001b[38;5;124;03m\"\"\"\u001b[39;00m\n\u001b[1;32m 118\u001b[0m \u001b[38;5;28;01mtry\u001b[39;00m:\n\u001b[0;32m--> 119\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28;01mawait\u001b[39;00m coro\n\u001b[1;32m 120\u001b[0m \u001b[38;5;28;01mexcept\u001b[39;00m \u001b[38;5;167;01mException\u001b[39;00m \u001b[38;5;28;01mas\u001b[39;00m ex:\n\u001b[1;32m 121\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m ex\n", + "File \u001b[0;32m/opt/conda/lib/python3.11/site-packages/zarr/core/array.py:1298\u001b[0m, in \u001b[0;36mAsyncArray._get_selection\u001b[0;34m(self, indexer, prototype, out, fields)\u001b[0m\n\u001b[1;32m 1295\u001b[0m _config \u001b[38;5;241m=\u001b[39m replace(_config, order\u001b[38;5;241m=\u001b[39m\u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39morder)\n\u001b[1;32m 1297\u001b[0m \u001b[38;5;66;03m# reading chunks and decoding them\u001b[39;00m\n\u001b[0;32m-> 1298\u001b[0m \u001b[38;5;28;01mawait\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mcodec_pipeline\u001b[38;5;241m.\u001b[39mread(\n\u001b[1;32m 1299\u001b[0m [\n\u001b[1;32m 1300\u001b[0m (\n\u001b[1;32m 1301\u001b[0m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mstore_path \u001b[38;5;241m/\u001b[39m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mmetadata\u001b[38;5;241m.\u001b[39mencode_chunk_key(chunk_coords),\n\u001b[1;32m 1302\u001b[0m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mmetadata\u001b[38;5;241m.\u001b[39mget_chunk_spec(chunk_coords, _config, prototype\u001b[38;5;241m=\u001b[39mprototype),\n\u001b[1;32m 1303\u001b[0m chunk_selection,\n\u001b[1;32m 1304\u001b[0m out_selection,\n\u001b[1;32m 1305\u001b[0m is_complete_chunk,\n\u001b[1;32m 1306\u001b[0m )\n\u001b[1;32m 1307\u001b[0m \u001b[38;5;28;01mfor\u001b[39;00m chunk_coords, chunk_selection, out_selection, is_complete_chunk \u001b[38;5;129;01min\u001b[39;00m indexer\n\u001b[1;32m 1308\u001b[0m ],\n\u001b[1;32m 1309\u001b[0m out_buffer,\n\u001b[1;32m 1310\u001b[0m drop_axes\u001b[38;5;241m=\u001b[39mindexer\u001b[38;5;241m.\u001b[39mdrop_axes,\n\u001b[1;32m 1311\u001b[0m )\n\u001b[1;32m 1312\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;28misinstance\u001b[39m(indexer, BasicIndexer) \u001b[38;5;129;01mand\u001b[39;00m indexer\u001b[38;5;241m.\u001b[39mshape \u001b[38;5;241m==\u001b[39m ():\n\u001b[1;32m 1313\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m out_buffer\u001b[38;5;241m.\u001b[39mas_scalar()\n", + "File \u001b[0;32m/opt/conda/lib/python3.11/site-packages/zarr/core/codec_pipeline.py:464\u001b[0m, in \u001b[0;36mBatchedCodecPipeline.read\u001b[0;34m(self, batch_info, out, drop_axes)\u001b[0m\n\u001b[1;32m 458\u001b[0m \u001b[38;5;28;01masync\u001b[39;00m \u001b[38;5;28;01mdef\u001b[39;00m \u001b[38;5;21mread\u001b[39m(\n\u001b[1;32m 459\u001b[0m \u001b[38;5;28mself\u001b[39m,\n\u001b[1;32m 460\u001b[0m batch_info: Iterable[\u001b[38;5;28mtuple\u001b[39m[ByteGetter, ArraySpec, SelectorTuple, SelectorTuple, \u001b[38;5;28mbool\u001b[39m]],\n\u001b[1;32m 461\u001b[0m out: NDBuffer,\n\u001b[1;32m 462\u001b[0m drop_axes: \u001b[38;5;28mtuple\u001b[39m[\u001b[38;5;28mint\u001b[39m, \u001b[38;5;241m.\u001b[39m\u001b[38;5;241m.\u001b[39m\u001b[38;5;241m.\u001b[39m] \u001b[38;5;241m=\u001b[39m (),\n\u001b[1;32m 463\u001b[0m ) \u001b[38;5;241m-\u001b[39m\u001b[38;5;241m>\u001b[39m \u001b[38;5;28;01mNone\u001b[39;00m:\n\u001b[0;32m--> 464\u001b[0m \u001b[38;5;28;01mawait\u001b[39;00m concurrent_map(\n\u001b[1;32m 465\u001b[0m [\n\u001b[1;32m 466\u001b[0m (single_batch_info, out, drop_axes)\n\u001b[1;32m 467\u001b[0m \u001b[38;5;28;01mfor\u001b[39;00m single_batch_info \u001b[38;5;129;01min\u001b[39;00m batched(batch_info, \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mbatch_size)\n\u001b[1;32m 468\u001b[0m ],\n\u001b[1;32m 469\u001b[0m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mread_batch,\n\u001b[1;32m 470\u001b[0m config\u001b[38;5;241m.\u001b[39mget(\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124masync.concurrency\u001b[39m\u001b[38;5;124m\"\u001b[39m),\n\u001b[1;32m 471\u001b[0m )\n", + "File \u001b[0;32m/opt/conda/lib/python3.11/site-packages/zarr/core/common.py:69\u001b[0m, in \u001b[0;36mconcurrent_map\u001b[0;34m(items, func, limit)\u001b[0m\n\u001b[1;32m 66\u001b[0m \u001b[38;5;28;01masync\u001b[39;00m \u001b[38;5;28;01mwith\u001b[39;00m sem:\n\u001b[1;32m 67\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28;01mawait\u001b[39;00m func(\u001b[38;5;241m*\u001b[39mitem)\n\u001b[0;32m---> 69\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28;01mawait\u001b[39;00m asyncio\u001b[38;5;241m.\u001b[39mgather(\u001b[38;5;241m*\u001b[39m[asyncio\u001b[38;5;241m.\u001b[39mensure_future(run(item)) \u001b[38;5;28;01mfor\u001b[39;00m item \u001b[38;5;129;01min\u001b[39;00m items])\n", + "File \u001b[0;32m/opt/conda/lib/python3.11/site-packages/zarr/core/common.py:67\u001b[0m, in \u001b[0;36mconcurrent_map..run\u001b[0;34m(item)\u001b[0m\n\u001b[1;32m 65\u001b[0m \u001b[38;5;28;01masync\u001b[39;00m \u001b[38;5;28;01mdef\u001b[39;00m \u001b[38;5;21mrun\u001b[39m(item: \u001b[38;5;28mtuple\u001b[39m[Any]) \u001b[38;5;241m-\u001b[39m\u001b[38;5;241m>\u001b[39m V:\n\u001b[1;32m 66\u001b[0m \u001b[38;5;28;01masync\u001b[39;00m \u001b[38;5;28;01mwith\u001b[39;00m sem:\n\u001b[0;32m---> 67\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28;01mawait\u001b[39;00m func(\u001b[38;5;241m*\u001b[39mitem)\n", + "File \u001b[0;32m/opt/conda/lib/python3.11/site-packages/zarr/core/codec_pipeline.py:270\u001b[0m, in \u001b[0;36mBatchedCodecPipeline.read_batch\u001b[0;34m(self, batch_info, out, drop_axes)\u001b[0m\n\u001b[1;32m 264\u001b[0m \u001b[38;5;28;01melse\u001b[39;00m:\n\u001b[1;32m 265\u001b[0m chunk_bytes_batch \u001b[38;5;241m=\u001b[39m \u001b[38;5;28;01mawait\u001b[39;00m concurrent_map(\n\u001b[1;32m 266\u001b[0m [(byte_getter, array_spec\u001b[38;5;241m.\u001b[39mprototype) \u001b[38;5;28;01mfor\u001b[39;00m byte_getter, array_spec, \u001b[38;5;241m*\u001b[39m_ \u001b[38;5;129;01min\u001b[39;00m batch_info],\n\u001b[1;32m 267\u001b[0m \u001b[38;5;28;01mlambda\u001b[39;00m byte_getter, prototype: byte_getter\u001b[38;5;241m.\u001b[39mget(prototype),\n\u001b[1;32m 268\u001b[0m config\u001b[38;5;241m.\u001b[39mget(\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124masync.concurrency\u001b[39m\u001b[38;5;124m\"\u001b[39m),\n\u001b[1;32m 269\u001b[0m )\n\u001b[0;32m--> 270\u001b[0m chunk_array_batch \u001b[38;5;241m=\u001b[39m \u001b[38;5;28;01mawait\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mdecode_batch(\n\u001b[1;32m 271\u001b[0m [\n\u001b[1;32m 272\u001b[0m (chunk_bytes, chunk_spec)\n\u001b[1;32m 273\u001b[0m \u001b[38;5;28;01mfor\u001b[39;00m chunk_bytes, (_, chunk_spec, \u001b[38;5;241m*\u001b[39m_) \u001b[38;5;129;01min\u001b[39;00m \u001b[38;5;28mzip\u001b[39m(\n\u001b[1;32m 274\u001b[0m chunk_bytes_batch, batch_info, strict\u001b[38;5;241m=\u001b[39m\u001b[38;5;28;01mFalse\u001b[39;00m\n\u001b[1;32m 275\u001b[0m )\n\u001b[1;32m 276\u001b[0m ],\n\u001b[1;32m 277\u001b[0m )\n\u001b[1;32m 278\u001b[0m \u001b[38;5;28;01mfor\u001b[39;00m chunk_array, (_, chunk_spec, chunk_selection, out_selection, _) \u001b[38;5;129;01min\u001b[39;00m \u001b[38;5;28mzip\u001b[39m(\n\u001b[1;32m 279\u001b[0m chunk_array_batch, batch_info, strict\u001b[38;5;241m=\u001b[39m\u001b[38;5;28;01mFalse\u001b[39;00m\n\u001b[1;32m 280\u001b[0m ):\n\u001b[1;32m 281\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m chunk_array \u001b[38;5;129;01mis\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m \u001b[38;5;28;01mNone\u001b[39;00m:\n", + "File \u001b[0;32m/opt/conda/lib/python3.11/site-packages/zarr/core/codec_pipeline.py:185\u001b[0m, in \u001b[0;36mBatchedCodecPipeline.decode_batch\u001b[0;34m(self, chunk_bytes_and_specs)\u001b[0m\n\u001b[1;32m 178\u001b[0m (\n\u001b[1;32m 179\u001b[0m aa_codecs_with_spec,\n\u001b[1;32m 180\u001b[0m ab_codec_with_spec,\n\u001b[1;32m 181\u001b[0m bb_codecs_with_spec,\n\u001b[1;32m 182\u001b[0m ) \u001b[38;5;241m=\u001b[39m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_codecs_with_resolved_metadata_batched(chunk_specs)\n\u001b[1;32m 184\u001b[0m \u001b[38;5;28;01mfor\u001b[39;00m bb_codec, chunk_spec_batch \u001b[38;5;129;01min\u001b[39;00m bb_codecs_with_spec[::\u001b[38;5;241m-\u001b[39m\u001b[38;5;241m1\u001b[39m]:\n\u001b[0;32m--> 185\u001b[0m chunk_bytes_batch \u001b[38;5;241m=\u001b[39m \u001b[38;5;28;01mawait\u001b[39;00m bb_codec\u001b[38;5;241m.\u001b[39mdecode(\n\u001b[1;32m 186\u001b[0m \u001b[38;5;28mzip\u001b[39m(chunk_bytes_batch, chunk_spec_batch, strict\u001b[38;5;241m=\u001b[39m\u001b[38;5;28;01mFalse\u001b[39;00m)\n\u001b[1;32m 187\u001b[0m )\n\u001b[1;32m 189\u001b[0m ab_codec, chunk_spec_batch \u001b[38;5;241m=\u001b[39m ab_codec_with_spec\n\u001b[1;32m 190\u001b[0m chunk_array_batch \u001b[38;5;241m=\u001b[39m \u001b[38;5;28;01mawait\u001b[39;00m ab_codec\u001b[38;5;241m.\u001b[39mdecode(\n\u001b[1;32m 191\u001b[0m \u001b[38;5;28mzip\u001b[39m(chunk_bytes_batch, chunk_spec_batch, strict\u001b[38;5;241m=\u001b[39m\u001b[38;5;28;01mFalse\u001b[39;00m)\n\u001b[1;32m 192\u001b[0m )\n", + "File \u001b[0;32m/opt/conda/lib/python3.11/site-packages/zarr/abc/codec.py:129\u001b[0m, in \u001b[0;36mBaseCodec.decode\u001b[0;34m(self, chunks_and_specs)\u001b[0m\n\u001b[1;32m 113\u001b[0m \u001b[38;5;28;01masync\u001b[39;00m \u001b[38;5;28;01mdef\u001b[39;00m \u001b[38;5;21mdecode\u001b[39m(\n\u001b[1;32m 114\u001b[0m \u001b[38;5;28mself\u001b[39m,\n\u001b[1;32m 115\u001b[0m chunks_and_specs: Iterable[\u001b[38;5;28mtuple\u001b[39m[CodecOutput \u001b[38;5;241m|\u001b[39m \u001b[38;5;28;01mNone\u001b[39;00m, ArraySpec]],\n\u001b[1;32m 116\u001b[0m ) \u001b[38;5;241m-\u001b[39m\u001b[38;5;241m>\u001b[39m Iterable[CodecInput \u001b[38;5;241m|\u001b[39m \u001b[38;5;28;01mNone\u001b[39;00m]:\n\u001b[1;32m 117\u001b[0m \u001b[38;5;250m \u001b[39m\u001b[38;5;124;03m\"\"\"Decodes a batch of chunks.\u001b[39;00m\n\u001b[1;32m 118\u001b[0m \u001b[38;5;124;03m Chunks can be None in which case they are ignored by the codec.\u001b[39;00m\n\u001b[1;32m 119\u001b[0m \n\u001b[0;32m (...)\u001b[0m\n\u001b[1;32m 127\u001b[0m \u001b[38;5;124;03m Iterable[CodecInput | None]\u001b[39;00m\n\u001b[1;32m 128\u001b[0m \u001b[38;5;124;03m \"\"\"\u001b[39;00m\n\u001b[0;32m--> 129\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28;01mawait\u001b[39;00m _batching_helper(\u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_decode_single, chunks_and_specs)\n", + "File \u001b[0;32m/opt/conda/lib/python3.11/site-packages/zarr/abc/codec.py:407\u001b[0m, in \u001b[0;36m_batching_helper\u001b[0;34m(func, batch_info)\u001b[0m\n\u001b[1;32m 403\u001b[0m \u001b[38;5;28;01masync\u001b[39;00m \u001b[38;5;28;01mdef\u001b[39;00m \u001b[38;5;21m_batching_helper\u001b[39m(\n\u001b[1;32m 404\u001b[0m func: Callable[[CodecInput, ArraySpec], Awaitable[CodecOutput \u001b[38;5;241m|\u001b[39m \u001b[38;5;28;01mNone\u001b[39;00m]],\n\u001b[1;32m 405\u001b[0m batch_info: Iterable[\u001b[38;5;28mtuple\u001b[39m[CodecInput \u001b[38;5;241m|\u001b[39m \u001b[38;5;28;01mNone\u001b[39;00m, ArraySpec]],\n\u001b[1;32m 406\u001b[0m ) \u001b[38;5;241m-\u001b[39m\u001b[38;5;241m>\u001b[39m \u001b[38;5;28mlist\u001b[39m[CodecOutput \u001b[38;5;241m|\u001b[39m \u001b[38;5;28;01mNone\u001b[39;00m]:\n\u001b[0;32m--> 407\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28;01mawait\u001b[39;00m concurrent_map(\n\u001b[1;32m 408\u001b[0m \u001b[38;5;28mlist\u001b[39m(batch_info),\n\u001b[1;32m 409\u001b[0m _noop_for_none(func),\n\u001b[1;32m 410\u001b[0m config\u001b[38;5;241m.\u001b[39mget(\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124masync.concurrency\u001b[39m\u001b[38;5;124m\"\u001b[39m),\n\u001b[1;32m 411\u001b[0m )\n", + "File \u001b[0;32m/opt/conda/lib/python3.11/site-packages/zarr/core/common.py:69\u001b[0m, in \u001b[0;36mconcurrent_map\u001b[0;34m(items, func, limit)\u001b[0m\n\u001b[1;32m 66\u001b[0m \u001b[38;5;28;01masync\u001b[39;00m \u001b[38;5;28;01mwith\u001b[39;00m sem:\n\u001b[1;32m 67\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28;01mawait\u001b[39;00m func(\u001b[38;5;241m*\u001b[39mitem)\n\u001b[0;32m---> 69\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28;01mawait\u001b[39;00m asyncio\u001b[38;5;241m.\u001b[39mgather(\u001b[38;5;241m*\u001b[39m[asyncio\u001b[38;5;241m.\u001b[39mensure_future(run(item)) \u001b[38;5;28;01mfor\u001b[39;00m item \u001b[38;5;129;01min\u001b[39;00m items])\n", + "File \u001b[0;32m/opt/conda/lib/python3.11/site-packages/zarr/core/common.py:67\u001b[0m, in \u001b[0;36mconcurrent_map..run\u001b[0;34m(item)\u001b[0m\n\u001b[1;32m 65\u001b[0m \u001b[38;5;28;01masync\u001b[39;00m \u001b[38;5;28;01mdef\u001b[39;00m \u001b[38;5;21mrun\u001b[39m(item: \u001b[38;5;28mtuple\u001b[39m[Any]) \u001b[38;5;241m-\u001b[39m\u001b[38;5;241m>\u001b[39m V:\n\u001b[1;32m 66\u001b[0m \u001b[38;5;28;01masync\u001b[39;00m \u001b[38;5;28;01mwith\u001b[39;00m sem:\n\u001b[0;32m---> 67\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28;01mawait\u001b[39;00m func(\u001b[38;5;241m*\u001b[39mitem)\n", + "File \u001b[0;32m/opt/conda/lib/python3.11/site-packages/zarr/abc/codec.py:420\u001b[0m, in \u001b[0;36m_noop_for_none..wrap\u001b[0;34m(chunk, chunk_spec)\u001b[0m\n\u001b[1;32m 418\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m chunk \u001b[38;5;129;01mis\u001b[39;00m \u001b[38;5;28;01mNone\u001b[39;00m:\n\u001b[1;32m 419\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28;01mNone\u001b[39;00m\n\u001b[0;32m--> 420\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28;01mawait\u001b[39;00m func(chunk, chunk_spec)\n", + "File \u001b[0;32m/opt/conda/lib/python3.11/site-packages/numcodecs/zarr3.py:148\u001b[0m, in \u001b[0;36m_NumcodecsBytesBytesCodec._decode_single\u001b[0;34m(self, chunk_bytes, chunk_spec)\u001b[0m\n\u001b[1;32m 147\u001b[0m \u001b[38;5;28;01masync\u001b[39;00m \u001b[38;5;28;01mdef\u001b[39;00m \u001b[38;5;21m_decode_single\u001b[39m(\u001b[38;5;28mself\u001b[39m, chunk_bytes: Buffer, chunk_spec: ArraySpec) \u001b[38;5;241m-\u001b[39m\u001b[38;5;241m>\u001b[39m Buffer:\n\u001b[0;32m--> 148\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28;01mawait\u001b[39;00m asyncio\u001b[38;5;241m.\u001b[39mto_thread(\n\u001b[1;32m 149\u001b[0m as_numpy_array_wrapper,\n\u001b[1;32m 150\u001b[0m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_codec\u001b[38;5;241m.\u001b[39mdecode,\n\u001b[1;32m 151\u001b[0m chunk_bytes,\n\u001b[1;32m 152\u001b[0m chunk_spec\u001b[38;5;241m.\u001b[39mprototype,\n\u001b[1;32m 153\u001b[0m )\n", + "File \u001b[0;32m/opt/conda/lib/python3.11/asyncio/threads.py:25\u001b[0m, in \u001b[0;36mto_thread\u001b[0;34m(func, *args, **kwargs)\u001b[0m\n\u001b[1;32m 23\u001b[0m ctx \u001b[38;5;241m=\u001b[39m contextvars\u001b[38;5;241m.\u001b[39mcopy_context()\n\u001b[1;32m 24\u001b[0m func_call \u001b[38;5;241m=\u001b[39m functools\u001b[38;5;241m.\u001b[39mpartial(ctx\u001b[38;5;241m.\u001b[39mrun, func, \u001b[38;5;241m*\u001b[39margs, \u001b[38;5;241m*\u001b[39m\u001b[38;5;241m*\u001b[39mkwargs)\n\u001b[0;32m---> 25\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28;01mawait\u001b[39;00m loop\u001b[38;5;241m.\u001b[39mrun_in_executor(\u001b[38;5;28;01mNone\u001b[39;00m, func_call)\n", + "File \u001b[0;32m/opt/conda/lib/python3.11/site-packages/zarr/core/buffer/cpu.py:213\u001b[0m, in \u001b[0;36mas_numpy_array_wrapper\u001b[0;34m(func, buf, prototype)\u001b[0m\n\u001b[1;32m 188\u001b[0m \u001b[38;5;28;01mdef\u001b[39;00m \u001b[38;5;21mas_numpy_array_wrapper\u001b[39m(\n\u001b[1;32m 189\u001b[0m func: Callable[[npt\u001b[38;5;241m.\u001b[39mNDArray[Any]], \u001b[38;5;28mbytes\u001b[39m], buf: core\u001b[38;5;241m.\u001b[39mBuffer, prototype: core\u001b[38;5;241m.\u001b[39mBufferPrototype\n\u001b[1;32m 190\u001b[0m ) \u001b[38;5;241m-\u001b[39m\u001b[38;5;241m>\u001b[39m core\u001b[38;5;241m.\u001b[39mBuffer:\n\u001b[1;32m 191\u001b[0m \u001b[38;5;250m \u001b[39m\u001b[38;5;124;03m\"\"\"Converts the input of `func` to a numpy array and the output back to `Buffer`.\u001b[39;00m\n\u001b[1;32m 192\u001b[0m \n\u001b[1;32m 193\u001b[0m \u001b[38;5;124;03m This function is useful when calling a `func` that only support host memory such\u001b[39;00m\n\u001b[0;32m (...)\u001b[0m\n\u001b[1;32m 211\u001b[0m \u001b[38;5;124;03m The result of `func` converted to a `Buffer`\u001b[39;00m\n\u001b[1;32m 212\u001b[0m \u001b[38;5;124;03m \"\"\"\u001b[39;00m\n\u001b[0;32m--> 213\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m prototype\u001b[38;5;241m.\u001b[39mbuffer\u001b[38;5;241m.\u001b[39mfrom_bytes(\u001b[43mfunc\u001b[49m\u001b[43m(\u001b[49m\u001b[43mbuf\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mas_numpy_array\u001b[49m\u001b[43m(\u001b[49m\u001b[43m)\u001b[49m\u001b[43m)\u001b[49m)\n", + "File \u001b[0;32m/opt/conda/lib/python3.11/site-packages/numcodecs/zlib.py:37\u001b[0m, in \u001b[0;36mZlib.decode\u001b[0;34m(self, buf, out)\u001b[0m\n\u001b[1;32m 34\u001b[0m out \u001b[38;5;241m=\u001b[39m ensure_contiguous_ndarray(out)\n\u001b[1;32m 36\u001b[0m \u001b[38;5;66;03m# do decompression\u001b[39;00m\n\u001b[0;32m---> 37\u001b[0m dec \u001b[38;5;241m=\u001b[39m \u001b[43m_zlib\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mdecompress\u001b[49m\u001b[43m(\u001b[49m\u001b[43mbuf\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 39\u001b[0m \u001b[38;5;66;03m# handle destination - Python standard library zlib module does not\u001b[39;00m\n\u001b[1;32m 40\u001b[0m \u001b[38;5;66;03m# support direct decompression into buffer, so we have to copy into\u001b[39;00m\n\u001b[1;32m 41\u001b[0m \u001b[38;5;66;03m# out if given\u001b[39;00m\n\u001b[1;32m 42\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m ndarray_copy(dec, out)\n", + "\u001b[0;31merror\u001b[0m: Error -3 while decompressing data: incorrect header check" + ] + } + ], + "source": [ + "subset = xds.sel(lat=lat_slice, lon=lon_slice, time='2025-07-01')\n", + "print(subset.analysed_sst.shape)\n", + "subset.analysed_sst.mean().values" + ] + }, + { + "cell_type": "markdown", + "id": "4a3e767e-9f0e-4e47-ab68-7389e384e0ee", + "metadata": {}, + "source": [ + "### Get same value from original data" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "1b6df651-9d2f-4987-bbce-eb9141974100", + "metadata": {}, + "outputs": [], + "source": [ + "results = earthaccess.search_data(\n", + " short_name='MUR-JPL-L4-GLOB-v4.1',\n", + " temporal=(time_range[0] + \" 09:00:00\", time_range[1] + \" 09:00:00\"),\n", + ")\n", + "\n", + "direct_access_links = [granule.data_links(access=\"direct\")[0] for granule in results]\n", + "\n", + "fileset = earthaccess.open(direct_access_links, provider='POCLOUD')\n", + "\n", + "og_ds = xr.open_mfdataset(fileset)" + ] + }, + { + "cell_type": "code", + "execution_count": 49, + "id": "4030a24b-e2b1-432b-b32f-ea29b59dd0bf", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "(29, 11, 11)\n" + ] + }, { "data": { "text/plain": [ - "array(284.17548539)" + "array(285.61836207)" ] }, - "execution_count": 45, + "execution_count": 49, "metadata": {}, "output_type": "execute_result" } ], "source": [ - "xds.sel(lat=slice(47.6, 47.7), lon=slice(-122.4, -122.3)).analysed_sst.mean().values" + "og_subset = og_ds.sel(lat=lat_slice, lon=lon_slice, time=slice(*time_range))\n", + "print(og_subset.analysed_sst.shape)\n", + "og_subset.analysed_sst.mean().values" ] }, { @@ -114,7 +258,11 @@ "id": "0cb434fa-f083-4b8f-8604-08bd2f0a66de", "metadata": {}, "source": [ - "# Deleting data" + "# Deleting data\n", + "\n", + "You can delete data directly if necessary or remove previous commits.\n", + "\n", + "### Option 1: first option resizes the arrays." ] }, { @@ -170,6 +318,74 @@ "# DANGER!\n", "# session.commit(\"Removed data for time > 2025-05-31\")" ] + }, + { + "cell_type": "markdown", + "id": "72ccfdcf-7a6b-4b97-aba8-02ade6f48847", + "metadata": {}, + "source": [ + "### Option 2: Reset to a previous commit.\n", + "\n", + "First list commits:" + ] + }, + { + "cell_type": "code", + "execution_count": 22, + "id": "7ae9a196-49b5-4f83-9730-19612c3c7124", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "[('Committed data for 2025-07-01 09:00:00 21:00:01 to 2025-07-03 21:00:00.',\n", + " 'TSWFKSA0JB7WJK52APSG'),\n", + " ('Committed data for 2025-06-30 09:00:00 21:00:01 to 2025-07-02 21:00:00.',\n", + " 'W96ECCA8SNV111SB6KG0'),\n", + " ('Committed data for 2025-06-29 09:00:00 21:00:01 to 2025-07-01 21:00:00.',\n", + " '7CPR33AM6TD4B9CNKBEG'),\n", + " ('Committed data for 2025-06-28 09:00:00 21:00:01 to 2025-06-30 21:00:00.',\n", + " '3WQJD66XCMSAKBVZAMF0'),\n", + " ('Committed data for 2025-06-27 09:00:00 21:00:01 to 2025-06-29 21:00:00.',\n", + " '8Y9Z9XA2B9VF3NVNJ1J0'),\n", + " ('Committed data for 2025-05-31 09:00:00 09:00:00 to 2025-06-28 09:00:00.',\n", + " '7PZK6744FDHXHKPYVX8G'),\n", + " ('Removed data for time > 2025-05-31', '44B9XPA2C0QH6FVT5AF0'),\n", + " ('Commit data 2024-10-01 to 2025-06-04', 'GPRDBT2XK9ZERYSQ7EA0'),\n", + " ('Commit data 2024-09-13 to 2024-09-30', 'CP5PHVT9V88VPZTZ0E00'),\n", + " ('Committed data for 2024-09-12 09:00:00 to 2024-09-12 09:00:00 using 20240912090000-JPL-L4_GHRSST-SSTfnd-MUR-GLOB-v02.0-fv04.1',\n", + " 'W7Z0Y2FAGZ8WFPMJYZTG'),\n", + " ('Committed data for 2024-09-11 09:00:00 to 2024-09-11 09:00:00 using 20240911090000-JPL-L4_GHRSST-SSTfnd-MUR-GLOB-v02.0-fv04.1',\n", + " 'QRB8HZE1WEK9AA1FWH00'),\n", + " ('Commit data 2024-09-05 to 2024-09-10', '3R6SDVDMWP0SVB6KW0ZG'),\n", + " ('Committed data for 2024-09-04 09:00:00 to 2024-09-04 09:00:00 using 20240904090000-JPL-L4_GHRSST-SSTfnd-MUR-GLOB-v02.0-fv04.1',\n", + " 'MSX9YMGN8EPP3S2Z8K50'),\n", + " ('Committed data for 2024-09-01 09:00:00 to 2024-09-03 09:00:00 using 20240903090000-JPL-L4_GHRSST-SSTfnd-MUR-GLOB-v02.0-fv04.1',\n", + " 'SE5QYNGDA0KSQCKTHGQ0'),\n", + " ('Commit data 2024-08-02 to 2024-08-30', 'K31CHGA0N2FXWCNSJVDG'),\n", + " ('Commit data 2024-07-02 to 2024-07-31', 'Q37QBVDX4A58FPSY9190'),\n", + " ('Commit data 2024-06-02 to 2024-06-30', 'DWCSFZX4TPNA4SEREME0'),\n", + " ('Repository initialized', 'AYXEAVT6QYRBDXC1455G')]" + ] + }, + "execution_count": 22, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "[(ancestor.message, ancestor.id) for ancestor in repo.ancestry(branch=\"main\")]" + ] + }, + { + "cell_type": "code", + "execution_count": 23, + "id": "a4214649-11b2-4a35-9e97-bed52a2e0dcd", + "metadata": {}, + "outputs": [], + "source": [ + "repo.reset_branch(\"main\", \"8Y9Z9XA2B9VF3NVNJ1J0\")" + ] } ], "metadata": { From 5cc5b5956729427a760c73b8dd2e91098a1388d3 Mon Sep 17 00:00:00 2001 From: Aimee Barciauskas Date: Thu, 3 Jul 2025 09:36:15 -0700 Subject: [PATCH 07/26] Bump memory and timeout --- cdk/mursst_stack.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/cdk/mursst_stack.py b/cdk/mursst_stack.py index f31f868..29e6a74 100644 --- a/cdk/mursst_stack.py +++ b/cdk/mursst_stack.py @@ -50,8 +50,8 @@ def __init__(self, scope: Construct, construct_id: str, **kwargs) -> None: environment={ "SECRET_ARN": "arn:aws:secretsmanager:us-west-2:444055461661:secret:mursst_lambda_edl_credentials-9dKy1C" # Replace with your secret ARN }, - timeout=Duration.seconds(30), - memory_size=2048 + timeout=Duration.seconds(600), + memory_size=10240 ) # Create SNS topic for notifications From 91e1ee0b6686ba44a99a9794527ef6d1d90ba1c8 Mon Sep 17 00:00:00 2001 From: Julius Busecke Date: Thu, 10 Jul 2025 18:49:55 +0000 Subject: [PATCH 08/26] Bunch of stuff that does not work --- cdk/lambda/lambda_function.py | 43 +- cdk/lambda/testing.ipynb | 1520 +++++++++++++++++++++++++++++++-- 2 files changed, 1482 insertions(+), 81 deletions(-) diff --git a/cdk/lambda/lambda_function.py b/cdk/lambda/lambda_function.py index 2a2f5a0..a8ac93c 100644 --- a/cdk/lambda/lambda_function.py +++ b/cdk/lambda/lambda_function.py @@ -17,12 +17,38 @@ drop_vars = ["dt_1km_data", "sst_anomaly"] collection_short_name = "MUR-JPL-L4-GLOB-v4.1" +# refreshable earthdata credentials +from typing import Dict + +def get_earthdata_creds() -> Dict[str, str]: + # assumes that username and password are available in the environment + # TODO: accomodate rc file? + auth = earthaccess.login(strategy='environment') # this does not create a netrc file... + if not auth.authenticated: + raise PermissionError('Could not authenticate using environment variables') + else: + print + creds = auth.get_s3_credentials(daac='PODAAC') + return creds + +from icechunk import S3StaticCredentials +def get_icechunk_creds() -> S3StaticCredentials: + creds = get_earthdata_creds() + return S3StaticCredentials( + access_key_id=creds['accessKeyId'], + secret_access_key=creds['secretAccessKey'], + expires_after=datetime.fromisoformat(creds['expiration']), + session_token=creds['sessionToken'] + ) + # 馃嵄 there is a lot of overlap between this and lithops code and icechunk-nasa code 馃 def open_icechunk_repo(bucket_name: str, store_name: str, ea_creds: Optional[dict] = None): storage = icechunk.s3_storage( bucket=bucket_name, prefix=f"icechunk/{store_name}", - anonymous=False + anonymous=False, + from_env=True # cannot auth with EDL + # get_credentials=get_icechunk_creds, ) config = icechunk.RepositoryConfig.default() @@ -32,15 +58,13 @@ def open_icechunk_repo(bucket_name: str, store_name: str, ea_creds: Optional[dic storage=storage, config=config, ) + if ea_creds: earthdata_credentials = icechunk.containers_credentials( - s3=icechunk.s3_credentials( - access_key_id=ea_creds['accessKeyId'], - secret_access_key=ea_creds['secretAccessKey'], - session_token=ea_creds['sessionToken'] - ) + s3=icechunk.s3_refreshable_credentials(get_credentials=get_icechunk_creds), ) repo_config['virtual_chunk_credentials'] = earthdata_credentials + print('repo_config', repo_config) return icechunk.Repository.open(**repo_config) def get_last_timestep(session: icechunk.Session) -> datetime: @@ -125,10 +149,11 @@ def lambda_handler(event, context: dict = {}): """ Update the icechunk store with the latest MUR-JPL-L4-GLOB-v4.1 data. """ + # Reactivate this! For now testing with manual injection # Fetch secrets - secrets = get_secret() - os.environ['EARTHDATA_USERNAME'] = secrets['EARTHDATA_USERNAME'] - os.environ['EARTHDATA_PASSWORD'] = secrets['EARTHDATA_PASSWORD'] + # secrets = get_secret() + # os.environ['EARTHDATA_USERNAME'] = secrets['EARTHDATA_USERNAME'] + # os.environ['EARTHDATA_PASSWORD'] = secrets['EARTHDATA_PASSWORD'] print(f"Received event: {json.dumps(event)}") result = write_to_icechunk_or_fail() diff --git a/cdk/lambda/testing.ipynb b/cdk/lambda/testing.ipynb index a1b8a4d..7b332e4 100644 --- a/cdk/lambda/testing.ipynb +++ b/cdk/lambda/testing.ipynb @@ -10,7 +10,7 @@ }, { "cell_type": "code", - "execution_count": 3, + "execution_count": 1, "id": "edcdd905-0700-4c3b-9da0-fa9e9e652b4e", "metadata": {}, "outputs": [], @@ -27,48 +27,135 @@ }, { "cell_type": "code", - "execution_count": 4, + "execution_count": 2, "id": "416d2208-52de-4e56-a515-b0ea165ce1d5", "metadata": {}, "outputs": [], "source": [ - "os.environ['SECRET_ARN'] = 'arn:aws:secretsmanager:us-west-2:444055461661:secret:mursst_lambda_edl_credentials-9dKy1C'\n", - "os.environ['DRY_RUN'] = \"true\"" + "# # os.environ['SECRET_ARN'] = 'arn:aws:secretsmanager:us-west-2:444055461661:secret:mursst_lambda_edl_credentials-9dKy1C'\n", + "# os.environ['SECRET_ARN'] = 'arn:aws:secretsmanager:us-east-2:444055461661:secret:edl-login-julius-ogbYjL'\n", + "# os.environ['DRY_RUN'] = \"true\"\n", + "# TODO: Need to figure out secrets management, for now Ill ingest the env variables manually (see also commented out code in `lambda_function.py`\n" ] }, { "cell_type": "code", - "execution_count": 14, - "id": "ff920b67-793e-4fe3-ae82-6ce038af0e87", + "execution_count": null, + "id": "e85faa51-d917-4e91-a979-24bb12fe02c4", + "metadata": {}, + "outputs": [], + "source": [] + }, + { + "cell_type": "code", + "execution_count": 3, + "id": "b18705da-75e1-43e7-93fd-767d391ebbe7", "metadata": {}, "outputs": [], "source": [ - "lambda_handler({})" + "# #Testing the store bucket with EDL auth (does not work)\n", + "\n", + "# from lambda_function import get_icechunk_creds\n", + "# test_creds = get_icechunk_creds()\n", + "# import fsspec\n", + "# fs = fsspec.filesystem('s3', key=test_creds.access_key_id, secret=test_creds.secret_access_key, token=test_creds.session_token)\n", + "# fs.ls('nasa-eodc-public')" ] }, { - "cell_type": "markdown", - "id": "7b0b213b-bfda-42e3-85ac-1b5250e89b6e", + "cell_type": "code", + "execution_count": 4, + "id": "8bb1a30d-e279-4692-ad8e-d25c696d9eed", "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "['nasa-eodc-public/ASTER_GDEM',\n", + " 'nasa-eodc-public/BlueFlux',\n", + " 'nasa-eodc-public/GPM_3IMERGDF.07',\n", + " 'nasa-eodc-public/MCD12Q1.061',\n", + " 'nasa-eodc-public/MUR-JPL-L4-GLOB-v4.1',\n", + " 'nasa-eodc-public/NLDAS',\n", + " 'nasa-eodc-public/NLDAS3',\n", + " 'nasa-eodc-public/NOAA_FCDR',\n", + " 'nasa-eodc-public/cmip6',\n", + " 'nasa-eodc-public/icechunk',\n", + " 'nasa-eodc-public/oco3',\n", + " 'nasa-eodc-public/sample_hdf5_files']" + ] + }, + "execution_count": 4, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ - "# Testing the store" + "#Testing the store bucket with env auth (works)\n", + "import fsspec\n", + "fs = fsspec.filesystem('s3', )\n", + "fs.ls('nasa-eodc-public')" ] }, { "cell_type": "code", "execution_count": 5, - "id": "fadb5413-8375-4a5c-9f8e-111dfd2b457f", + "id": "ff920b67-793e-4fe3-ae82-6ce038af0e87", "metadata": {}, "outputs": [ { - "name": "stdin", + "name": "stdout", "output_type": "stream", "text": [ - "Enter your Earthdata Login username: aimeeb\n", - "Enter your Earthdata password: 路路路路路路路路\n" + "Received event: {}\n", + "earthaccess.login()\n", + "earthaccess.get_s3_credentials\n", + "opening icechunk repo\n", + "repo_config {'storage': S3Storage(bucket=nasa-eodc-public, prefix=icechunk/icechunk/MUR-JPL-L4-GLOB-v4.1-virtual-v1-p2, config=S3Options(region=None, endpoint_url=None, anonymous=false, allow_http=false, force_path_style=false)), 'config': RepositoryConfig(inline_chunk_threshold_bytes=None, get_partial_values_concurrency=None, compression=None, caching=None, storage=None, manifest=None), 'virtual_chunk_credentials': {'s3': }}\n" + ] + }, + { + "ename": "IcechunkError", + "evalue": " x error getting object from object store service error\n | \n | context:\n | 0: icechunk::storage::s3::get_ref\n | with ref_key=\"branch.main/ref.json\"\n | at icechunk/src/storage/s3.rs:581\n | 1: icechunk::refs::fetch_branch\n | with name=\"main\"\n | at icechunk/src/refs.rs:385\n | 2: icechunk::refs::fetch_branch_tip\n | with name=\"main\"\n | at icechunk/src/refs.rs:404\n | 3: icechunk::repository::exists\n | at icechunk/src/repository.rs:316\n | 4: icechunk::repository::open\n | at icechunk/src/repository.rs:225\n | \n |-> error getting object from object store service error\n |-> service error\n |-> unhandled error (SignatureDoesNotMatch)\n `-> Error { code: \"SignatureDoesNotMatch\", message: \"The request signature we calculated does not match the signature you provided. Check your key and signing method.\", s3_extended_request_id:\n \"WxhFQ0mjtcoqG/Dhr9/fAEtoRyzxOsqUtwxW7i2LmOM0y/07aWgq57Mae+eLOtV6SrWfMYAe0V8=\", aws_request_id: \"HZQ5GGGE7S2ABXYQ\" }\n", + "output_type": "error", + "traceback": [ + "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", + "\u001b[0;31mIcechunkError\u001b[0m Traceback (most recent call last)", + "Cell \u001b[0;32mIn[5], line 1\u001b[0m\n\u001b[0;32m----> 1\u001b[0m \u001b[43mlambda_handler\u001b[49m\u001b[43m(\u001b[49m\u001b[43m{\u001b[49m\u001b[43m}\u001b[49m\u001b[43m)\u001b[49m\n", + "File \u001b[0;32m~/mursst-icechunk-updater/cdk/lambda/lambda_function.py:159\u001b[0m, in \u001b[0;36mlambda_handler\u001b[0;34m(event, context)\u001b[0m\n\u001b[1;32m 152\u001b[0m \u001b[38;5;66;03m# Reactivate this! For now testing with manual injection\u001b[39;00m\n\u001b[1;32m 153\u001b[0m \u001b[38;5;66;03m# Fetch secrets\u001b[39;00m\n\u001b[1;32m 154\u001b[0m \u001b[38;5;66;03m# secrets = get_secret()\u001b[39;00m\n\u001b[1;32m 155\u001b[0m \u001b[38;5;66;03m# os.environ['EARTHDATA_USERNAME'] = secrets['EARTHDATA_USERNAME']\u001b[39;00m\n\u001b[1;32m 156\u001b[0m \u001b[38;5;66;03m# os.environ['EARTHDATA_PASSWORD'] = secrets['EARTHDATA_PASSWORD']\u001b[39;00m\n\u001b[1;32m 157\u001b[0m \u001b[38;5;28mprint\u001b[39m(\u001b[38;5;124mf\u001b[39m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mReceived event: \u001b[39m\u001b[38;5;132;01m{\u001b[39;00mjson\u001b[38;5;241m.\u001b[39mdumps(event)\u001b[38;5;132;01m}\u001b[39;00m\u001b[38;5;124m\"\u001b[39m)\n\u001b[0;32m--> 159\u001b[0m result \u001b[38;5;241m=\u001b[39m \u001b[43mwrite_to_icechunk_or_fail\u001b[49m\u001b[43m(\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 161\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m {\n\u001b[1;32m 162\u001b[0m \u001b[38;5;124m'\u001b[39m\u001b[38;5;124mstatusCode\u001b[39m\u001b[38;5;124m'\u001b[39m: \u001b[38;5;241m200\u001b[39m,\n\u001b[1;32m 163\u001b[0m \u001b[38;5;124m'\u001b[39m\u001b[38;5;124mbody\u001b[39m\u001b[38;5;124m'\u001b[39m: json\u001b[38;5;241m.\u001b[39mdumps(\u001b[38;5;124mf\u001b[39m\u001b[38;5;124m'\u001b[39m\u001b[38;5;124mSuccessfully processed messages: \u001b[39m\u001b[38;5;132;01m{\u001b[39;00mresult\u001b[38;5;132;01m}\u001b[39;00m\u001b[38;5;124m'\u001b[39m)\n\u001b[1;32m 164\u001b[0m }\n", + "File \u001b[0;32m~/mursst-icechunk-updater/cdk/lambda/lambda_function.py:109\u001b[0m, in \u001b[0;36mwrite_to_icechunk_or_fail\u001b[0;34m()\u001b[0m\n\u001b[1;32m 107\u001b[0m \u001b[38;5;28mprint\u001b[39m(\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mopening icechunk repo\u001b[39m\u001b[38;5;124m\"\u001b[39m)\n\u001b[1;32m 108\u001b[0m \u001b[38;5;66;03m# check date is next datetime for the icechunk store or fail\u001b[39;00m\n\u001b[0;32m--> 109\u001b[0m repo \u001b[38;5;241m=\u001b[39m \u001b[43mopen_icechunk_repo\u001b[49m\u001b[43m(\u001b[49m\u001b[43mbucket\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mstore_name\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mea_creds\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 110\u001b[0m session \u001b[38;5;241m=\u001b[39m repo\u001b[38;5;241m.\u001b[39mreadonly_session(branch\u001b[38;5;241m=\u001b[39m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mmain\u001b[39m\u001b[38;5;124m\"\u001b[39m)\n\u001b[1;32m 111\u001b[0m \u001b[38;5;66;03m# MUR SST granules have a temporal range of date 1 21:00:00 to date 2 21:00:00, e.g. granule 20240627090000 has datetime range of 2024-06-26 21:00:00:00 to 2024-06-27 21:00:00:00\u001b[39;00m\n\u001b[1;32m 112\u001b[0m \u001b[38;5;66;03m# so granules overlap in time. \u001b[39;00m\n\u001b[1;32m 113\u001b[0m \u001b[38;5;66;03m# Here we increment the latest timestep of the icechunkstore by 1 minute to make sure we only get granules outside of the latest date covered by the icechunk store\u001b[39;00m\n", + "File \u001b[0;32m~/mursst-icechunk-updater/cdk/lambda/lambda_function.py:68\u001b[0m, in \u001b[0;36mopen_icechunk_repo\u001b[0;34m(bucket_name, store_name, ea_creds)\u001b[0m\n\u001b[1;32m 66\u001b[0m repo_config[\u001b[38;5;124m'\u001b[39m\u001b[38;5;124mvirtual_chunk_credentials\u001b[39m\u001b[38;5;124m'\u001b[39m] \u001b[38;5;241m=\u001b[39m earthdata_credentials\n\u001b[1;32m 67\u001b[0m \u001b[38;5;28mprint\u001b[39m(\u001b[38;5;124m'\u001b[39m\u001b[38;5;124mrepo_config\u001b[39m\u001b[38;5;124m'\u001b[39m, repo_config)\n\u001b[0;32m---> 68\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[43micechunk\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mRepository\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mopen\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43mrepo_config\u001b[49m\u001b[43m)\u001b[49m\n", + "File \u001b[0;32m/opt/conda/lib/python3.11/site-packages/icechunk/repository.py:94\u001b[0m, in \u001b[0;36mRepository.open\u001b[0;34m(cls, storage, config, virtual_chunk_credentials)\u001b[0m\n\u001b[1;32m 62\u001b[0m \u001b[38;5;129m@classmethod\u001b[39m\n\u001b[1;32m 63\u001b[0m \u001b[38;5;28;01mdef\u001b[39;00m \u001b[38;5;21mopen\u001b[39m(\n\u001b[1;32m 64\u001b[0m \u001b[38;5;28mcls\u001b[39m,\n\u001b[0;32m (...)\u001b[0m\n\u001b[1;32m 67\u001b[0m virtual_chunk_credentials: \u001b[38;5;28mdict\u001b[39m[\u001b[38;5;28mstr\u001b[39m, AnyCredential] \u001b[38;5;241m|\u001b[39m \u001b[38;5;28;01mNone\u001b[39;00m \u001b[38;5;241m=\u001b[39m \u001b[38;5;28;01mNone\u001b[39;00m,\n\u001b[1;32m 68\u001b[0m ) \u001b[38;5;241m-\u001b[39m\u001b[38;5;241m>\u001b[39m Self:\n\u001b[1;32m 69\u001b[0m \u001b[38;5;250m \u001b[39m\u001b[38;5;124;03m\"\"\"\u001b[39;00m\n\u001b[1;32m 70\u001b[0m \u001b[38;5;124;03m Open an existing Icechunk repository.\u001b[39;00m\n\u001b[1;32m 71\u001b[0m \n\u001b[0;32m (...)\u001b[0m\n\u001b[1;32m 91\u001b[0m \u001b[38;5;124;03m An instance of the Repository class.\u001b[39;00m\n\u001b[1;32m 92\u001b[0m \u001b[38;5;124;03m \"\"\"\u001b[39;00m\n\u001b[1;32m 93\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28mcls\u001b[39m(\n\u001b[0;32m---> 94\u001b[0m \u001b[43mPyRepository\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mopen\u001b[49m\u001b[43m(\u001b[49m\n\u001b[1;32m 95\u001b[0m \u001b[43m \u001b[49m\u001b[43mstorage\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 96\u001b[0m \u001b[43m \u001b[49m\u001b[43mconfig\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mconfig\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 97\u001b[0m \u001b[43m \u001b[49m\u001b[43mvirtual_chunk_credentials\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mvirtual_chunk_credentials\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 98\u001b[0m \u001b[43m \u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 99\u001b[0m )\n", + "\u001b[0;31mIcechunkError\u001b[0m: x error getting object from object store service error\n | \n | context:\n | 0: icechunk::storage::s3::get_ref\n | with ref_key=\"branch.main/ref.json\"\n | at icechunk/src/storage/s3.rs:581\n | 1: icechunk::refs::fetch_branch\n | with name=\"main\"\n | at icechunk/src/refs.rs:385\n | 2: icechunk::refs::fetch_branch_tip\n | with name=\"main\"\n | at icechunk/src/refs.rs:404\n | 3: icechunk::repository::exists\n | at icechunk/src/repository.rs:316\n | 4: icechunk::repository::open\n | at icechunk/src/repository.rs:225\n | \n |-> error getting object from object store service error\n |-> service error\n |-> unhandled error (SignatureDoesNotMatch)\n `-> Error { code: \"SignatureDoesNotMatch\", message: \"The request signature we calculated does not match the signature you provided. Check your key and signing method.\", s3_extended_request_id:\n \"WxhFQ0mjtcoqG/Dhr9/fAEtoRyzxOsqUtwxW7i2LmOM0y/07aWgq57Mae+eLOtV6SrWfMYAe0V8=\", aws_request_id: \"HZQ5GGGE7S2ABXYQ\" }\n" ] } ], + "source": [ + "lambda_handler({})" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "5c44987c-d199-4003-91d7-d47561ae3e2a", + "metadata": {}, + "outputs": [], + "source": [] + }, + { + "cell_type": "markdown", + "id": "7b0b213b-bfda-42e3-85ac-1b5250e89b6e", + "metadata": {}, + "source": [ + "# Testing the store" + ] + }, + { + "cell_type": "code", + "execution_count": 9, + "id": "fadb5413-8375-4a5c-9f8e-111dfd2b457f", + "metadata": {}, + "outputs": [], "source": [ "earthaccess.login()\n", "ea_creds = earthaccess.get_s3_credentials(daac='PODAAC')\n", @@ -80,7 +167,7 @@ }, { "cell_type": "code", - "execution_count": 12, + "execution_count": 11, "id": "e8f6b892-477d-43be-bf6c-9a720104ad37", "metadata": {}, "outputs": [], @@ -98,10 +185,68 @@ }, { "cell_type": "code", - "execution_count": 6, - "id": "605fc2c3-378c-4986-84b3-6fe2020c00dd", + "execution_count": 13, + "id": "deb01661-2544-4451-a0c2-5b5977a4cfa3", "metadata": {}, "outputs": [], + "source": [ + "import icechunk as ic\n", + "\n", + "\n", + "storage = ic.s3_storage(\n", + " bucket=bucket,\n", + " prefix=f\"icechunk/{store_name}\",\n", + " anonymous=False,\n", + " from_env=True # cannot auth with EDL\n", + ")\n", + "config = ic.RepositoryConfig.default()\n", + "config.set_virtual_chunk_container(ic.VirtualChunkContainer(\n", + "# ic.Repository.open(storage=storage)\n", + "\n", + "# config = icechunk.RepositoryConfig.default()\n", + "# config.set_virtual_chunk_container(icechunk.VirtualChunkContainer(\"s3\", \"s3://\", icechunk.s3_store(region=\"us-west-2\")))\n", + "\n", + "# repo_config = dict(\n", + "# storage=storage,\n", + "# config=config,\n", + "# )\n", + "\n", + "# if ea_creds:\n", + "# earthdata_credentials = icechunk.containers_credentials(\n", + "# s3=icechunk.s3_refreshable_credentials(get_credentials=get_icechunk_creds),\n", + "# )\n", + "# repo_config['virtual_chunk_credentials'] = earthdata_credentials\n", + "# print('repo_config', repo_config)\n", + "# return icechunk.Repository.open(**repo_config)" + ] + }, + { + "cell_type": "code", + "execution_count": 8, + "id": "605fc2c3-378c-4986-84b3-6fe2020c00dd", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "repo_config {'storage': S3Storage(bucket=nasa-eodc-public, prefix=icechunk/MUR-JPL-L4-GLOB-v4.1-virtual-v1-p2, config=S3Options(region=None, endpoint_url=None, anonymous=false, allow_http=false, force_path_style=false)), 'config': RepositoryConfig(inline_chunk_threshold_bytes=None, get_partial_values_concurrency=None, compression=None, caching=None, storage=None, manifest=None), 'virtual_chunk_credentials': {'s3': }}\n" + ] + }, + { + "ename": "IcechunkError", + "evalue": " x error getting object from object store service error\n | \n | context:\n | 0: icechunk::storage::s3::get_ref\n | with ref_key=\"branch.main/ref.json\"\n | at icechunk/src/storage/s3.rs:581\n | 1: icechunk::refs::fetch_branch\n | with name=\"main\"\n | at icechunk/src/refs.rs:385\n | 2: icechunk::refs::fetch_branch_tip\n | with name=\"main\"\n | at icechunk/src/refs.rs:404\n | 3: icechunk::repository::exists\n | at icechunk/src/repository.rs:316\n | 4: icechunk::repository::open\n | at icechunk/src/repository.rs:225\n | \n |-> error getting object from object store service error\n |-> service error\n |-> unhandled error (SignatureDoesNotMatch)\n `-> Error { code: \"SignatureDoesNotMatch\", message: \"The request signature we calculated does not match the signature you provided. Check your key and signing method.\", aws_request_id:\n \"XS0RHNT4Y0BWAPF4\", s3_extended_request_id: \"dHG0oc/Ok3Gv5peEsPT97TQrQPwpmCn+t4mB56Ucxf+8O5uOm5VRjirQk0BP3ss4+yTCu2qwTnU=\" }\n", + "output_type": "error", + "traceback": [ + "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", + "\u001b[0;31mIcechunkError\u001b[0m Traceback (most recent call last)", + "Cell \u001b[0;32mIn[8], line 1\u001b[0m\n\u001b[0;32m----> 1\u001b[0m repo \u001b[38;5;241m=\u001b[39m \u001b[43mopen_icechunk_repo\u001b[49m\u001b[43m(\u001b[49m\u001b[43mbucket\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mstore_name\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mea_creds\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 2\u001b[0m session \u001b[38;5;241m=\u001b[39m repo\u001b[38;5;241m.\u001b[39mreadonly_session(branch\u001b[38;5;241m=\u001b[39m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mmain\u001b[39m\u001b[38;5;124m\"\u001b[39m)\n\u001b[1;32m 3\u001b[0m xds \u001b[38;5;241m=\u001b[39m xr\u001b[38;5;241m.\u001b[39mopen_zarr(session\u001b[38;5;241m.\u001b[39mstore, zarr_version\u001b[38;5;241m=\u001b[39m\u001b[38;5;241m3\u001b[39m, chunks\u001b[38;5;241m=\u001b[39m{}, consolidated\u001b[38;5;241m=\u001b[39m\u001b[38;5;28;01mFalse\u001b[39;00m)\n", + "File \u001b[0;32m~/mursst-icechunk-updater/cdk/lambda/lambda_function.py:68\u001b[0m, in \u001b[0;36mopen_icechunk_repo\u001b[0;34m(bucket_name, store_name, ea_creds)\u001b[0m\n\u001b[1;32m 66\u001b[0m repo_config[\u001b[38;5;124m'\u001b[39m\u001b[38;5;124mvirtual_chunk_credentials\u001b[39m\u001b[38;5;124m'\u001b[39m] \u001b[38;5;241m=\u001b[39m earthdata_credentials\n\u001b[1;32m 67\u001b[0m \u001b[38;5;28mprint\u001b[39m(\u001b[38;5;124m'\u001b[39m\u001b[38;5;124mrepo_config\u001b[39m\u001b[38;5;124m'\u001b[39m, repo_config)\n\u001b[0;32m---> 68\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[43micechunk\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mRepository\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mopen\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43mrepo_config\u001b[49m\u001b[43m)\u001b[49m\n", + "File \u001b[0;32m/opt/conda/lib/python3.11/site-packages/icechunk/repository.py:94\u001b[0m, in \u001b[0;36mRepository.open\u001b[0;34m(cls, storage, config, virtual_chunk_credentials)\u001b[0m\n\u001b[1;32m 62\u001b[0m \u001b[38;5;129m@classmethod\u001b[39m\n\u001b[1;32m 63\u001b[0m \u001b[38;5;28;01mdef\u001b[39;00m \u001b[38;5;21mopen\u001b[39m(\n\u001b[1;32m 64\u001b[0m \u001b[38;5;28mcls\u001b[39m,\n\u001b[0;32m (...)\u001b[0m\n\u001b[1;32m 67\u001b[0m virtual_chunk_credentials: \u001b[38;5;28mdict\u001b[39m[\u001b[38;5;28mstr\u001b[39m, AnyCredential] \u001b[38;5;241m|\u001b[39m \u001b[38;5;28;01mNone\u001b[39;00m \u001b[38;5;241m=\u001b[39m \u001b[38;5;28;01mNone\u001b[39;00m,\n\u001b[1;32m 68\u001b[0m ) \u001b[38;5;241m-\u001b[39m\u001b[38;5;241m>\u001b[39m Self:\n\u001b[1;32m 69\u001b[0m \u001b[38;5;250m \u001b[39m\u001b[38;5;124;03m\"\"\"\u001b[39;00m\n\u001b[1;32m 70\u001b[0m \u001b[38;5;124;03m Open an existing Icechunk repository.\u001b[39;00m\n\u001b[1;32m 71\u001b[0m \n\u001b[0;32m (...)\u001b[0m\n\u001b[1;32m 91\u001b[0m \u001b[38;5;124;03m An instance of the Repository class.\u001b[39;00m\n\u001b[1;32m 92\u001b[0m \u001b[38;5;124;03m \"\"\"\u001b[39;00m\n\u001b[1;32m 93\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28mcls\u001b[39m(\n\u001b[0;32m---> 94\u001b[0m \u001b[43mPyRepository\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mopen\u001b[49m\u001b[43m(\u001b[49m\n\u001b[1;32m 95\u001b[0m \u001b[43m \u001b[49m\u001b[43mstorage\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 96\u001b[0m \u001b[43m \u001b[49m\u001b[43mconfig\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mconfig\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 97\u001b[0m \u001b[43m \u001b[49m\u001b[43mvirtual_chunk_credentials\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mvirtual_chunk_credentials\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 98\u001b[0m \u001b[43m \u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 99\u001b[0m )\n", + "\u001b[0;31mIcechunkError\u001b[0m: x error getting object from object store service error\n | \n | context:\n | 0: icechunk::storage::s3::get_ref\n | with ref_key=\"branch.main/ref.json\"\n | at icechunk/src/storage/s3.rs:581\n | 1: icechunk::refs::fetch_branch\n | with name=\"main\"\n | at icechunk/src/refs.rs:385\n | 2: icechunk::refs::fetch_branch_tip\n | with name=\"main\"\n | at icechunk/src/refs.rs:404\n | 3: icechunk::repository::exists\n | at icechunk/src/repository.rs:316\n | 4: icechunk::repository::open\n | at icechunk/src/repository.rs:225\n | \n |-> error getting object from object store service error\n |-> service error\n |-> unhandled error (SignatureDoesNotMatch)\n `-> Error { code: \"SignatureDoesNotMatch\", message: \"The request signature we calculated does not match the signature you provided. Check your key and signing method.\", aws_request_id:\n \"XS0RHNT4Y0BWAPF4\", s3_extended_request_id: \"dHG0oc/Ok3Gv5peEsPT97TQrQPwpmCn+t4mB56Ucxf+8O5uOm5VRjirQk0BP3ss4+yTCu2qwTnU=\" }\n" + ] + } + ], "source": [ "repo = open_icechunk_repo(bucket, store_name, ea_creds)\n", "session = repo.readonly_session(branch=\"main\")\n", @@ -110,7 +255,7 @@ }, { "cell_type": "code", - "execution_count": 16, + "execution_count": 7, "id": "bfa9d9d6-3f19-4788-bfe2-b1b5efa620c7", "metadata": {}, "outputs": [ @@ -120,7 +265,7 @@ "slice('2025-06-30', '2025-07-02', None)" ] }, - "execution_count": 16, + "execution_count": 7, "metadata": {}, "output_type": "execute_result" } @@ -131,7 +276,1232 @@ }, { "cell_type": "code", - "execution_count": 21, + "execution_count": 13, + "id": "a3850a68-eee5-4133-a9e1-6d4b14bba0ef", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "('2025-06-30 09:00:00', '2025-07-02 09:00:00')" + ] + }, + "execution_count": 13, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "(time_range[0] + \" 09:00:00\", time_range[1] + \" 09:00:00\")" + ] + }, + { + "cell_type": "code", + "execution_count": 12, + "id": "3140febe-9c40-404b-93b8-fb1fa4c01501", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "
<xarray.Dataset> Size: 7TB\n",
+       "Dimensions:           (time: 403, lat: 17999, lon: 36000)\n",
+       "Coordinates:\n",
+       "  * lat               (lat) float32 72kB -89.99 -89.98 -89.97 ... 89.98 89.99\n",
+       "  * lon               (lon) float32 144kB -180.0 -180.0 -180.0 ... 180.0 180.0\n",
+       "  * time              (time) datetime64[ns] 3kB 2024-06-02T09:00:00 ... 2025-...\n",
+       "Data variables:\n",
+       "    analysed_sst      (time, lat, lon) float64 2TB dask.array<chunksize=(1, 1023, 2047), meta=np.ndarray>\n",
+       "    analysis_error    (time, lat, lon) float64 2TB dask.array<chunksize=(1, 1023, 2047), meta=np.ndarray>\n",
+       "    mask              (time, lat, lon) float32 1TB dask.array<chunksize=(1, 1023, 2047), meta=np.ndarray>\n",
+       "    sea_ice_fraction  (time, lat, lon) float64 2TB dask.array<chunksize=(1, 1023, 2047), meta=np.ndarray>\n",
+       "Attributes: (12/47)\n",
+       "    Conventions:                CF-1.7\n",
+       "    title:                      Daily MUR SST, Interim near-real-time (nrt) p...\n",
+       "    summary:                    A merged, multi-sensor L4 Foundation SST anal...\n",
+       "    references:                 http://podaac.jpl.nasa.gov/Multi-scale_Ultra-...\n",
+       "    institution:                Jet Propulsion Laboratory\n",
+       "    history:                    near real time (nrt) version created at nomin...\n",
+       "    ...                         ...\n",
+       "    project:                    NASA Making Earth Science Data Records for Us...\n",
+       "    publisher_name:             GHRSST Project Office\n",
+       "    publisher_url:              http://www.ghrsst.org\n",
+       "    publisher_email:            ghrsst-po@nceo.ac.uk\n",
+       "    processing_level:           L4\n",
+       "    cdm_data_type:              grid
" + ], + "text/plain": [ + " Size: 7TB\n", + "Dimensions: (time: 403, lat: 17999, lon: 36000)\n", + "Coordinates:\n", + " * lat (lat) float32 72kB -89.99 -89.98 -89.97 ... 89.98 89.99\n", + " * lon (lon) float32 144kB -180.0 -180.0 -180.0 ... 180.0 180.0\n", + " * time (time) datetime64[ns] 3kB 2024-06-02T09:00:00 ... 2025-...\n", + "Data variables:\n", + " analysed_sst (time, lat, lon) float64 2TB dask.array\n", + " analysis_error (time, lat, lon) float64 2TB dask.array\n", + " mask (time, lat, lon) float32 1TB dask.array\n", + " sea_ice_fraction (time, lat, lon) float64 2TB dask.array\n", + "Attributes: (12/47)\n", + " Conventions: CF-1.7\n", + " title: Daily MUR SST, Interim near-real-time (nrt) p...\n", + " summary: A merged, multi-sensor L4 Foundation SST anal...\n", + " references: http://podaac.jpl.nasa.gov/Multi-scale_Ultra-...\n", + " institution: Jet Propulsion Laboratory\n", + " history: near real time (nrt) version created at nomin...\n", + " ... ...\n", + " project: NASA Making Earth Science Data Records for Us...\n", + " publisher_name: GHRSST Project Office\n", + " publisher_url: http://www.ghrsst.org\n", + " publisher_email: ghrsst-po@nceo.ac.uk\n", + " processing_level: L4\n", + " cdm_data_type: grid" + ] + }, + "execution_count": 12, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "xds" + ] + }, + { + "cell_type": "code", + "execution_count": 8, "id": "c476af97-621c-4c52-a032-0a131573d1d4", "metadata": {}, "outputs": [ @@ -139,55 +1509,18 @@ "name": "stdout", "output_type": "stream", "text": [ - "(1, 11, 11)\n" + "(0, 11, 11)\n" ] }, { - "ename": "error", - "evalue": "Error -3 while decompressing data: incorrect header check", - "output_type": "error", - "traceback": [ - "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", - "\u001b[0;31merror\u001b[0m Traceback (most recent call last)", - "Cell \u001b[0;32mIn[21], line 3\u001b[0m\n\u001b[1;32m 1\u001b[0m subset \u001b[38;5;241m=\u001b[39m xds\u001b[38;5;241m.\u001b[39msel(lat\u001b[38;5;241m=\u001b[39mlat_slice, lon\u001b[38;5;241m=\u001b[39mlon_slice, time\u001b[38;5;241m=\u001b[39m\u001b[38;5;124m'\u001b[39m\u001b[38;5;124m2025-07-01\u001b[39m\u001b[38;5;124m'\u001b[39m)\n\u001b[1;32m 2\u001b[0m \u001b[38;5;28mprint\u001b[39m(subset\u001b[38;5;241m.\u001b[39manalysed_sst\u001b[38;5;241m.\u001b[39mshape)\n\u001b[0;32m----> 3\u001b[0m \u001b[43msubset\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43manalysed_sst\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mmean\u001b[49m\u001b[43m(\u001b[49m\u001b[43m)\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mvalues\u001b[49m\n", - "File \u001b[0;32m/opt/conda/lib/python3.11/site-packages/xarray/core/dataarray.py:815\u001b[0m, in \u001b[0;36mDataArray.values\u001b[0;34m(self)\u001b[0m\n\u001b[1;32m 802\u001b[0m \u001b[38;5;129m@property\u001b[39m\n\u001b[1;32m 803\u001b[0m \u001b[38;5;28;01mdef\u001b[39;00m \u001b[38;5;21mvalues\u001b[39m(\u001b[38;5;28mself\u001b[39m) \u001b[38;5;241m-\u001b[39m\u001b[38;5;241m>\u001b[39m np\u001b[38;5;241m.\u001b[39mndarray:\n\u001b[1;32m 804\u001b[0m \u001b[38;5;250m \u001b[39m\u001b[38;5;124;03m\"\"\"\u001b[39;00m\n\u001b[1;32m 805\u001b[0m \u001b[38;5;124;03m The array's data converted to numpy.ndarray.\u001b[39;00m\n\u001b[1;32m 806\u001b[0m \n\u001b[0;32m (...)\u001b[0m\n\u001b[1;32m 813\u001b[0m \u001b[38;5;124;03m to this array may be reflected in the DataArray as well.\u001b[39;00m\n\u001b[1;32m 814\u001b[0m \u001b[38;5;124;03m \"\"\"\u001b[39;00m\n\u001b[0;32m--> 815\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mvariable\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mvalues\u001b[49m\n", - "File \u001b[0;32m/opt/conda/lib/python3.11/site-packages/xarray/core/variable.py:516\u001b[0m, in \u001b[0;36mVariable.values\u001b[0;34m(self)\u001b[0m\n\u001b[1;32m 513\u001b[0m \u001b[38;5;129m@property\u001b[39m\n\u001b[1;32m 514\u001b[0m \u001b[38;5;28;01mdef\u001b[39;00m \u001b[38;5;21mvalues\u001b[39m(\u001b[38;5;28mself\u001b[39m) \u001b[38;5;241m-\u001b[39m\u001b[38;5;241m>\u001b[39m np\u001b[38;5;241m.\u001b[39mndarray:\n\u001b[1;32m 515\u001b[0m \u001b[38;5;250m \u001b[39m\u001b[38;5;124;03m\"\"\"The variable's data as a numpy.ndarray\"\"\"\u001b[39;00m\n\u001b[0;32m--> 516\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[43m_as_array_or_item\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43m_data\u001b[49m\u001b[43m)\u001b[49m\n", - "File \u001b[0;32m/opt/conda/lib/python3.11/site-packages/xarray/core/variable.py:302\u001b[0m, in \u001b[0;36m_as_array_or_item\u001b[0;34m(data)\u001b[0m\n\u001b[1;32m 288\u001b[0m \u001b[38;5;28;01mdef\u001b[39;00m \u001b[38;5;21m_as_array_or_item\u001b[39m(data):\n\u001b[1;32m 289\u001b[0m \u001b[38;5;250m \u001b[39m\u001b[38;5;124;03m\"\"\"Return the given values as a numpy array, or as an individual item if\u001b[39;00m\n\u001b[1;32m 290\u001b[0m \u001b[38;5;124;03m it's a 0d datetime64 or timedelta64 array.\u001b[39;00m\n\u001b[1;32m 291\u001b[0m \n\u001b[0;32m (...)\u001b[0m\n\u001b[1;32m 300\u001b[0m \u001b[38;5;124;03m TODO: remove this (replace with np.asarray) once these issues are fixed\u001b[39;00m\n\u001b[1;32m 301\u001b[0m \u001b[38;5;124;03m \"\"\"\u001b[39;00m\n\u001b[0;32m--> 302\u001b[0m data \u001b[38;5;241m=\u001b[39m \u001b[43mnp\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43masarray\u001b[49m\u001b[43m(\u001b[49m\u001b[43mdata\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 303\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m data\u001b[38;5;241m.\u001b[39mndim \u001b[38;5;241m==\u001b[39m \u001b[38;5;241m0\u001b[39m:\n\u001b[1;32m 304\u001b[0m kind \u001b[38;5;241m=\u001b[39m data\u001b[38;5;241m.\u001b[39mdtype\u001b[38;5;241m.\u001b[39mkind\n", - "File \u001b[0;32m/opt/conda/lib/python3.11/site-packages/dask/array/core.py:1724\u001b[0m, in \u001b[0;36mArray.__array__\u001b[0;34m(self, dtype, copy, **kwargs)\u001b[0m\n\u001b[1;32m 1717\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m copy \u001b[38;5;129;01mis\u001b[39;00m \u001b[38;5;28;01mFalse\u001b[39;00m:\n\u001b[1;32m 1718\u001b[0m warnings\u001b[38;5;241m.\u001b[39mwarn(\n\u001b[1;32m 1719\u001b[0m \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mCan\u001b[39m\u001b[38;5;124m'\u001b[39m\u001b[38;5;124mt acquire a memory view of a Dask array. \u001b[39m\u001b[38;5;124m\"\u001b[39m\n\u001b[1;32m 1720\u001b[0m \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mThis will raise in the future.\u001b[39m\u001b[38;5;124m\"\u001b[39m,\n\u001b[1;32m 1721\u001b[0m \u001b[38;5;167;01mFutureWarning\u001b[39;00m,\n\u001b[1;32m 1722\u001b[0m )\n\u001b[0;32m-> 1724\u001b[0m x \u001b[38;5;241m=\u001b[39m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mcompute\u001b[49m\u001b[43m(\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 1726\u001b[0m \u001b[38;5;66;03m# Apply requested dtype and convert non-numpy backends to numpy.\u001b[39;00m\n\u001b[1;32m 1727\u001b[0m \u001b[38;5;66;03m# If copy is True, numpy is going to perform its own deep copy\u001b[39;00m\n\u001b[1;32m 1728\u001b[0m \u001b[38;5;66;03m# after this method returns.\u001b[39;00m\n\u001b[1;32m 1729\u001b[0m \u001b[38;5;66;03m# If copy is None, finalize() ensures that the returned object\u001b[39;00m\n\u001b[1;32m 1730\u001b[0m \u001b[38;5;66;03m# does not share memory with an object stored in the graph or on a\u001b[39;00m\n\u001b[1;32m 1731\u001b[0m \u001b[38;5;66;03m# process-local Worker.\u001b[39;00m\n\u001b[1;32m 1732\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m np\u001b[38;5;241m.\u001b[39masarray(x, dtype\u001b[38;5;241m=\u001b[39mdtype)\n", - "File \u001b[0;32m/opt/conda/lib/python3.11/site-packages/dask/base.py:373\u001b[0m, in \u001b[0;36mDaskMethodsMixin.compute\u001b[0;34m(self, **kwargs)\u001b[0m\n\u001b[1;32m 349\u001b[0m \u001b[38;5;28;01mdef\u001b[39;00m \u001b[38;5;21mcompute\u001b[39m(\u001b[38;5;28mself\u001b[39m, \u001b[38;5;241m*\u001b[39m\u001b[38;5;241m*\u001b[39mkwargs):\n\u001b[1;32m 350\u001b[0m \u001b[38;5;250m \u001b[39m\u001b[38;5;124;03m\"\"\"Compute this dask collection\u001b[39;00m\n\u001b[1;32m 351\u001b[0m \n\u001b[1;32m 352\u001b[0m \u001b[38;5;124;03m This turns a lazy Dask collection into its in-memory equivalent.\u001b[39;00m\n\u001b[0;32m (...)\u001b[0m\n\u001b[1;32m 371\u001b[0m \u001b[38;5;124;03m dask.compute\u001b[39;00m\n\u001b[1;32m 372\u001b[0m \u001b[38;5;124;03m \"\"\"\u001b[39;00m\n\u001b[0;32m--> 373\u001b[0m (result,) \u001b[38;5;241m=\u001b[39m \u001b[43mcompute\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;28;43mself\u001b[39;49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mtraverse\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[38;5;28;43;01mFalse\u001b[39;49;00m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43mkwargs\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 374\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m result\n", - "File \u001b[0;32m/opt/conda/lib/python3.11/site-packages/dask/base.py:681\u001b[0m, in \u001b[0;36mcompute\u001b[0;34m(traverse, optimize_graph, scheduler, get, *args, **kwargs)\u001b[0m\n\u001b[1;32m 678\u001b[0m expr \u001b[38;5;241m=\u001b[39m expr\u001b[38;5;241m.\u001b[39moptimize()\n\u001b[1;32m 679\u001b[0m keys \u001b[38;5;241m=\u001b[39m \u001b[38;5;28mlist\u001b[39m(flatten(expr\u001b[38;5;241m.\u001b[39m__dask_keys__()))\n\u001b[0;32m--> 681\u001b[0m results \u001b[38;5;241m=\u001b[39m \u001b[43mschedule\u001b[49m\u001b[43m(\u001b[49m\u001b[43mexpr\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mkeys\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43mkwargs\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 683\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m repack(results)\n", - "File \u001b[0;32m/opt/conda/lib/python3.11/site-packages/xarray/core/indexing.py:575\u001b[0m, in \u001b[0;36mImplicitToExplicitIndexingAdapter.__array__\u001b[0;34m(self, dtype, copy)\u001b[0m\n\u001b[1;32m 571\u001b[0m \u001b[38;5;28;01mdef\u001b[39;00m \u001b[38;5;21m__array__\u001b[39m(\n\u001b[1;32m 572\u001b[0m \u001b[38;5;28mself\u001b[39m, dtype: np\u001b[38;5;241m.\u001b[39mtyping\u001b[38;5;241m.\u001b[39mDTypeLike \u001b[38;5;241m=\u001b[39m \u001b[38;5;28;01mNone\u001b[39;00m, \u001b[38;5;241m/\u001b[39m, \u001b[38;5;241m*\u001b[39m, copy: \u001b[38;5;28mbool\u001b[39m \u001b[38;5;241m|\u001b[39m \u001b[38;5;28;01mNone\u001b[39;00m \u001b[38;5;241m=\u001b[39m \u001b[38;5;28;01mNone\u001b[39;00m\n\u001b[1;32m 573\u001b[0m ) \u001b[38;5;241m-\u001b[39m\u001b[38;5;241m>\u001b[39m np\u001b[38;5;241m.\u001b[39mndarray:\n\u001b[1;32m 574\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m Version(np\u001b[38;5;241m.\u001b[39m__version__) \u001b[38;5;241m>\u001b[39m\u001b[38;5;241m=\u001b[39m Version(\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124m2.0.0\u001b[39m\u001b[38;5;124m\"\u001b[39m):\n\u001b[0;32m--> 575\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m np\u001b[38;5;241m.\u001b[39masarray(\u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mget_duck_array\u001b[49m\u001b[43m(\u001b[49m\u001b[43m)\u001b[49m, dtype\u001b[38;5;241m=\u001b[39mdtype, copy\u001b[38;5;241m=\u001b[39mcopy)\n\u001b[1;32m 576\u001b[0m \u001b[38;5;28;01melse\u001b[39;00m:\n\u001b[1;32m 577\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m np\u001b[38;5;241m.\u001b[39masarray(\u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mget_duck_array(), dtype\u001b[38;5;241m=\u001b[39mdtype)\n", - "File \u001b[0;32m/opt/conda/lib/python3.11/site-packages/xarray/core/indexing.py:580\u001b[0m, in \u001b[0;36mImplicitToExplicitIndexingAdapter.get_duck_array\u001b[0;34m(self)\u001b[0m\n\u001b[1;32m 579\u001b[0m \u001b[38;5;28;01mdef\u001b[39;00m \u001b[38;5;21mget_duck_array\u001b[39m(\u001b[38;5;28mself\u001b[39m):\n\u001b[0;32m--> 580\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43marray\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mget_duck_array\u001b[49m\u001b[43m(\u001b[49m\u001b[43m)\u001b[49m\n", - "File \u001b[0;32m/opt/conda/lib/python3.11/site-packages/xarray/core/indexing.py:791\u001b[0m, in \u001b[0;36mCopyOnWriteArray.get_duck_array\u001b[0;34m(self)\u001b[0m\n\u001b[1;32m 790\u001b[0m \u001b[38;5;28;01mdef\u001b[39;00m \u001b[38;5;21mget_duck_array\u001b[39m(\u001b[38;5;28mself\u001b[39m):\n\u001b[0;32m--> 791\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43marray\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mget_duck_array\u001b[49m\u001b[43m(\u001b[49m\u001b[43m)\u001b[49m\n", - "File \u001b[0;32m/opt/conda/lib/python3.11/site-packages/xarray/core/indexing.py:661\u001b[0m, in \u001b[0;36mLazilyIndexedArray.get_duck_array\u001b[0;34m(self)\u001b[0m\n\u001b[1;32m 656\u001b[0m \u001b[38;5;66;03m# self.array[self.key] is now a numpy array when\u001b[39;00m\n\u001b[1;32m 657\u001b[0m \u001b[38;5;66;03m# self.array is a BackendArray subclass\u001b[39;00m\n\u001b[1;32m 658\u001b[0m \u001b[38;5;66;03m# and self.key is BasicIndexer((slice(None, None, None),))\u001b[39;00m\n\u001b[1;32m 659\u001b[0m \u001b[38;5;66;03m# so we need the explicit check for ExplicitlyIndexed\u001b[39;00m\n\u001b[1;32m 660\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;28misinstance\u001b[39m(array, ExplicitlyIndexed):\n\u001b[0;32m--> 661\u001b[0m array \u001b[38;5;241m=\u001b[39m \u001b[43marray\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mget_duck_array\u001b[49m\u001b[43m(\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 662\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m _wrap_numpy_scalars(array)\n", - "File \u001b[0;32m/opt/conda/lib/python3.11/site-packages/xarray/coding/common.py:76\u001b[0m, in \u001b[0;36m_ElementwiseFunctionArray.get_duck_array\u001b[0;34m(self)\u001b[0m\n\u001b[1;32m 75\u001b[0m \u001b[38;5;28;01mdef\u001b[39;00m \u001b[38;5;21mget_duck_array\u001b[39m(\u001b[38;5;28mself\u001b[39m):\n\u001b[0;32m---> 76\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mfunc(\u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43marray\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mget_duck_array\u001b[49m\u001b[43m(\u001b[49m\u001b[43m)\u001b[49m)\n", - "File \u001b[0;32m/opt/conda/lib/python3.11/site-packages/xarray/coding/common.py:76\u001b[0m, in \u001b[0;36m_ElementwiseFunctionArray.get_duck_array\u001b[0;34m(self)\u001b[0m\n\u001b[1;32m 75\u001b[0m \u001b[38;5;28;01mdef\u001b[39;00m \u001b[38;5;21mget_duck_array\u001b[39m(\u001b[38;5;28mself\u001b[39m):\n\u001b[0;32m---> 76\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mfunc(\u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43marray\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mget_duck_array\u001b[49m\u001b[43m(\u001b[49m\u001b[43m)\u001b[49m)\n", - "File \u001b[0;32m/opt/conda/lib/python3.11/site-packages/xarray/core/indexing.py:654\u001b[0m, in \u001b[0;36mLazilyIndexedArray.get_duck_array\u001b[0;34m(self)\u001b[0m\n\u001b[1;32m 650\u001b[0m array \u001b[38;5;241m=\u001b[39m apply_indexer(\u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39marray, \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mkey)\n\u001b[1;32m 651\u001b[0m \u001b[38;5;28;01melse\u001b[39;00m:\n\u001b[1;32m 652\u001b[0m \u001b[38;5;66;03m# If the array is not an ExplicitlyIndexedNDArrayMixin,\u001b[39;00m\n\u001b[1;32m 653\u001b[0m \u001b[38;5;66;03m# it may wrap a BackendArray so use its __getitem__\u001b[39;00m\n\u001b[0;32m--> 654\u001b[0m array \u001b[38;5;241m=\u001b[39m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43marray\u001b[49m\u001b[43m[\u001b[49m\u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mkey\u001b[49m\u001b[43m]\u001b[49m\n\u001b[1;32m 656\u001b[0m \u001b[38;5;66;03m# self.array[self.key] is now a numpy array when\u001b[39;00m\n\u001b[1;32m 657\u001b[0m \u001b[38;5;66;03m# self.array is a BackendArray subclass\u001b[39;00m\n\u001b[1;32m 658\u001b[0m \u001b[38;5;66;03m# and self.key is BasicIndexer((slice(None, None, None),))\u001b[39;00m\n\u001b[1;32m 659\u001b[0m \u001b[38;5;66;03m# so we need the explicit check for ExplicitlyIndexed\u001b[39;00m\n\u001b[1;32m 660\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;28misinstance\u001b[39m(array, ExplicitlyIndexed):\n", - "File \u001b[0;32m/opt/conda/lib/python3.11/site-packages/xarray/backends/zarr.py:223\u001b[0m, in \u001b[0;36mZarrArrayWrapper.__getitem__\u001b[0;34m(self, key)\u001b[0m\n\u001b[1;32m 221\u001b[0m \u001b[38;5;28;01melif\u001b[39;00m \u001b[38;5;28misinstance\u001b[39m(key, indexing\u001b[38;5;241m.\u001b[39mOuterIndexer):\n\u001b[1;32m 222\u001b[0m method \u001b[38;5;241m=\u001b[39m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_oindex\n\u001b[0;32m--> 223\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[43mindexing\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mexplicit_indexing_adapter\u001b[49m\u001b[43m(\u001b[49m\n\u001b[1;32m 224\u001b[0m \u001b[43m \u001b[49m\u001b[43mkey\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43marray\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mshape\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mindexing\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mIndexingSupport\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mVECTORIZED\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mmethod\u001b[49m\n\u001b[1;32m 225\u001b[0m \u001b[43m\u001b[49m\u001b[43m)\u001b[49m\n", - "File \u001b[0;32m/opt/conda/lib/python3.11/site-packages/xarray/core/indexing.py:1015\u001b[0m, in \u001b[0;36mexplicit_indexing_adapter\u001b[0;34m(key, shape, indexing_support, raw_indexing_method)\u001b[0m\n\u001b[1;32m 993\u001b[0m \u001b[38;5;250m\u001b[39m\u001b[38;5;124;03m\"\"\"Support explicit indexing by delegating to a raw indexing method.\u001b[39;00m\n\u001b[1;32m 994\u001b[0m \n\u001b[1;32m 995\u001b[0m \u001b[38;5;124;03mOuter and/or vectorized indexers are supported by indexing a second time\u001b[39;00m\n\u001b[0;32m (...)\u001b[0m\n\u001b[1;32m 1012\u001b[0m \u001b[38;5;124;03mIndexing result, in the form of a duck numpy-array.\u001b[39;00m\n\u001b[1;32m 1013\u001b[0m \u001b[38;5;124;03m\"\"\"\u001b[39;00m\n\u001b[1;32m 1014\u001b[0m raw_key, numpy_indices \u001b[38;5;241m=\u001b[39m decompose_indexer(key, shape, indexing_support)\n\u001b[0;32m-> 1015\u001b[0m result \u001b[38;5;241m=\u001b[39m \u001b[43mraw_indexing_method\u001b[49m\u001b[43m(\u001b[49m\u001b[43mraw_key\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mtuple\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 1016\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m numpy_indices\u001b[38;5;241m.\u001b[39mtuple:\n\u001b[1;32m 1017\u001b[0m \u001b[38;5;66;03m# index the loaded duck array\u001b[39;00m\n\u001b[1;32m 1018\u001b[0m indexable \u001b[38;5;241m=\u001b[39m as_indexable(result)\n", - "File \u001b[0;32m/opt/conda/lib/python3.11/site-packages/xarray/backends/zarr.py:213\u001b[0m, in \u001b[0;36mZarrArrayWrapper._getitem\u001b[0;34m(self, key)\u001b[0m\n\u001b[1;32m 212\u001b[0m \u001b[38;5;28;01mdef\u001b[39;00m \u001b[38;5;21m_getitem\u001b[39m(\u001b[38;5;28mself\u001b[39m, key):\n\u001b[0;32m--> 213\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43m_array\u001b[49m\u001b[43m[\u001b[49m\u001b[43mkey\u001b[49m\u001b[43m]\u001b[49m\n", - "File \u001b[0;32m/opt/conda/lib/python3.11/site-packages/zarr/core/array.py:2441\u001b[0m, in \u001b[0;36mArray.__getitem__\u001b[0;34m(self, selection)\u001b[0m\n\u001b[1;32m 2439\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mvindex[cast(CoordinateSelection \u001b[38;5;241m|\u001b[39m MaskSelection, selection)]\n\u001b[1;32m 2440\u001b[0m \u001b[38;5;28;01melif\u001b[39;00m is_pure_orthogonal_indexing(pure_selection, \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mndim):\n\u001b[0;32m-> 2441\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mget_orthogonal_selection\u001b[49m\u001b[43m(\u001b[49m\u001b[43mpure_selection\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mfields\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mfields\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 2442\u001b[0m \u001b[38;5;28;01melse\u001b[39;00m:\n\u001b[1;32m 2443\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mget_basic_selection(cast(BasicSelection, pure_selection), fields\u001b[38;5;241m=\u001b[39mfields)\n", - "File \u001b[0;32m/opt/conda/lib/python3.11/site-packages/zarr/_compat.py:43\u001b[0m, in \u001b[0;36m_deprecate_positional_args.._inner_deprecate_positional_args..inner_f\u001b[0;34m(*args, **kwargs)\u001b[0m\n\u001b[1;32m 41\u001b[0m extra_args \u001b[38;5;241m=\u001b[39m \u001b[38;5;28mlen\u001b[39m(args) \u001b[38;5;241m-\u001b[39m \u001b[38;5;28mlen\u001b[39m(all_args)\n\u001b[1;32m 42\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m extra_args \u001b[38;5;241m<\u001b[39m\u001b[38;5;241m=\u001b[39m \u001b[38;5;241m0\u001b[39m:\n\u001b[0;32m---> 43\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[43mf\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43margs\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43mkwargs\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 45\u001b[0m \u001b[38;5;66;03m# extra_args > 0\u001b[39;00m\n\u001b[1;32m 46\u001b[0m args_msg \u001b[38;5;241m=\u001b[39m [\n\u001b[1;32m 47\u001b[0m \u001b[38;5;124mf\u001b[39m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;132;01m{\u001b[39;00mname\u001b[38;5;132;01m}\u001b[39;00m\u001b[38;5;124m=\u001b[39m\u001b[38;5;132;01m{\u001b[39;00marg\u001b[38;5;132;01m}\u001b[39;00m\u001b[38;5;124m\"\u001b[39m\n\u001b[1;32m 48\u001b[0m \u001b[38;5;28;01mfor\u001b[39;00m name, arg \u001b[38;5;129;01min\u001b[39;00m \u001b[38;5;28mzip\u001b[39m(kwonly_args[:extra_args], args[\u001b[38;5;241m-\u001b[39mextra_args:], strict\u001b[38;5;241m=\u001b[39m\u001b[38;5;28;01mFalse\u001b[39;00m)\n\u001b[1;32m 49\u001b[0m ]\n", - "File \u001b[0;32m/opt/conda/lib/python3.11/site-packages/zarr/core/array.py:2883\u001b[0m, in \u001b[0;36mArray.get_orthogonal_selection\u001b[0;34m(self, selection, out, fields, prototype)\u001b[0m\n\u001b[1;32m 2881\u001b[0m prototype \u001b[38;5;241m=\u001b[39m default_buffer_prototype()\n\u001b[1;32m 2882\u001b[0m indexer \u001b[38;5;241m=\u001b[39m OrthogonalIndexer(selection, \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mshape, \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mmetadata\u001b[38;5;241m.\u001b[39mchunk_grid)\n\u001b[0;32m-> 2883\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[43msync\u001b[49m\u001b[43m(\u001b[49m\n\u001b[1;32m 2884\u001b[0m \u001b[43m \u001b[49m\u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43m_async_array\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43m_get_selection\u001b[49m\u001b[43m(\u001b[49m\n\u001b[1;32m 2885\u001b[0m \u001b[43m \u001b[49m\u001b[43mindexer\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mindexer\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mout\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mout\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mfields\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mfields\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mprototype\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mprototype\u001b[49m\n\u001b[1;32m 2886\u001b[0m \u001b[43m \u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 2887\u001b[0m \u001b[43m\u001b[49m\u001b[43m)\u001b[49m\n", - "File \u001b[0;32m/opt/conda/lib/python3.11/site-packages/zarr/core/sync.py:163\u001b[0m, in \u001b[0;36msync\u001b[0;34m(coro, loop, timeout)\u001b[0m\n\u001b[1;32m 160\u001b[0m return_result \u001b[38;5;241m=\u001b[39m \u001b[38;5;28mnext\u001b[39m(\u001b[38;5;28miter\u001b[39m(finished))\u001b[38;5;241m.\u001b[39mresult()\n\u001b[1;32m 162\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;28misinstance\u001b[39m(return_result, \u001b[38;5;167;01mBaseException\u001b[39;00m):\n\u001b[0;32m--> 163\u001b[0m \u001b[38;5;28;01mraise\u001b[39;00m return_result\n\u001b[1;32m 164\u001b[0m \u001b[38;5;28;01melse\u001b[39;00m:\n\u001b[1;32m 165\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m return_result\n", - "File \u001b[0;32m/opt/conda/lib/python3.11/site-packages/zarr/core/sync.py:119\u001b[0m, in \u001b[0;36m_runner\u001b[0;34m(coro)\u001b[0m\n\u001b[1;32m 114\u001b[0m \u001b[38;5;250m\u001b[39m\u001b[38;5;124;03m\"\"\"\u001b[39;00m\n\u001b[1;32m 115\u001b[0m \u001b[38;5;124;03mAwait a coroutine and return the result of running it. If awaiting the coroutine raises an\u001b[39;00m\n\u001b[1;32m 116\u001b[0m \u001b[38;5;124;03mexception, the exception will be returned.\u001b[39;00m\n\u001b[1;32m 117\u001b[0m \u001b[38;5;124;03m\"\"\"\u001b[39;00m\n\u001b[1;32m 118\u001b[0m \u001b[38;5;28;01mtry\u001b[39;00m:\n\u001b[0;32m--> 119\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28;01mawait\u001b[39;00m coro\n\u001b[1;32m 120\u001b[0m \u001b[38;5;28;01mexcept\u001b[39;00m \u001b[38;5;167;01mException\u001b[39;00m \u001b[38;5;28;01mas\u001b[39;00m ex:\n\u001b[1;32m 121\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m ex\n", - "File \u001b[0;32m/opt/conda/lib/python3.11/site-packages/zarr/core/array.py:1298\u001b[0m, in \u001b[0;36mAsyncArray._get_selection\u001b[0;34m(self, indexer, prototype, out, fields)\u001b[0m\n\u001b[1;32m 1295\u001b[0m _config \u001b[38;5;241m=\u001b[39m replace(_config, order\u001b[38;5;241m=\u001b[39m\u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39morder)\n\u001b[1;32m 1297\u001b[0m \u001b[38;5;66;03m# reading chunks and decoding them\u001b[39;00m\n\u001b[0;32m-> 1298\u001b[0m \u001b[38;5;28;01mawait\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mcodec_pipeline\u001b[38;5;241m.\u001b[39mread(\n\u001b[1;32m 1299\u001b[0m [\n\u001b[1;32m 1300\u001b[0m (\n\u001b[1;32m 1301\u001b[0m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mstore_path \u001b[38;5;241m/\u001b[39m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mmetadata\u001b[38;5;241m.\u001b[39mencode_chunk_key(chunk_coords),\n\u001b[1;32m 1302\u001b[0m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mmetadata\u001b[38;5;241m.\u001b[39mget_chunk_spec(chunk_coords, _config, prototype\u001b[38;5;241m=\u001b[39mprototype),\n\u001b[1;32m 1303\u001b[0m chunk_selection,\n\u001b[1;32m 1304\u001b[0m out_selection,\n\u001b[1;32m 1305\u001b[0m is_complete_chunk,\n\u001b[1;32m 1306\u001b[0m )\n\u001b[1;32m 1307\u001b[0m \u001b[38;5;28;01mfor\u001b[39;00m chunk_coords, chunk_selection, out_selection, is_complete_chunk \u001b[38;5;129;01min\u001b[39;00m indexer\n\u001b[1;32m 1308\u001b[0m ],\n\u001b[1;32m 1309\u001b[0m out_buffer,\n\u001b[1;32m 1310\u001b[0m drop_axes\u001b[38;5;241m=\u001b[39mindexer\u001b[38;5;241m.\u001b[39mdrop_axes,\n\u001b[1;32m 1311\u001b[0m )\n\u001b[1;32m 1312\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;28misinstance\u001b[39m(indexer, BasicIndexer) \u001b[38;5;129;01mand\u001b[39;00m indexer\u001b[38;5;241m.\u001b[39mshape \u001b[38;5;241m==\u001b[39m ():\n\u001b[1;32m 1313\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m out_buffer\u001b[38;5;241m.\u001b[39mas_scalar()\n", - "File \u001b[0;32m/opt/conda/lib/python3.11/site-packages/zarr/core/codec_pipeline.py:464\u001b[0m, in \u001b[0;36mBatchedCodecPipeline.read\u001b[0;34m(self, batch_info, out, drop_axes)\u001b[0m\n\u001b[1;32m 458\u001b[0m \u001b[38;5;28;01masync\u001b[39;00m \u001b[38;5;28;01mdef\u001b[39;00m \u001b[38;5;21mread\u001b[39m(\n\u001b[1;32m 459\u001b[0m \u001b[38;5;28mself\u001b[39m,\n\u001b[1;32m 460\u001b[0m batch_info: Iterable[\u001b[38;5;28mtuple\u001b[39m[ByteGetter, ArraySpec, SelectorTuple, SelectorTuple, \u001b[38;5;28mbool\u001b[39m]],\n\u001b[1;32m 461\u001b[0m out: NDBuffer,\n\u001b[1;32m 462\u001b[0m drop_axes: \u001b[38;5;28mtuple\u001b[39m[\u001b[38;5;28mint\u001b[39m, \u001b[38;5;241m.\u001b[39m\u001b[38;5;241m.\u001b[39m\u001b[38;5;241m.\u001b[39m] \u001b[38;5;241m=\u001b[39m (),\n\u001b[1;32m 463\u001b[0m ) \u001b[38;5;241m-\u001b[39m\u001b[38;5;241m>\u001b[39m \u001b[38;5;28;01mNone\u001b[39;00m:\n\u001b[0;32m--> 464\u001b[0m \u001b[38;5;28;01mawait\u001b[39;00m concurrent_map(\n\u001b[1;32m 465\u001b[0m [\n\u001b[1;32m 466\u001b[0m (single_batch_info, out, drop_axes)\n\u001b[1;32m 467\u001b[0m \u001b[38;5;28;01mfor\u001b[39;00m single_batch_info \u001b[38;5;129;01min\u001b[39;00m batched(batch_info, \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mbatch_size)\n\u001b[1;32m 468\u001b[0m ],\n\u001b[1;32m 469\u001b[0m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mread_batch,\n\u001b[1;32m 470\u001b[0m config\u001b[38;5;241m.\u001b[39mget(\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124masync.concurrency\u001b[39m\u001b[38;5;124m\"\u001b[39m),\n\u001b[1;32m 471\u001b[0m )\n", - "File \u001b[0;32m/opt/conda/lib/python3.11/site-packages/zarr/core/common.py:69\u001b[0m, in \u001b[0;36mconcurrent_map\u001b[0;34m(items, func, limit)\u001b[0m\n\u001b[1;32m 66\u001b[0m \u001b[38;5;28;01masync\u001b[39;00m \u001b[38;5;28;01mwith\u001b[39;00m sem:\n\u001b[1;32m 67\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28;01mawait\u001b[39;00m func(\u001b[38;5;241m*\u001b[39mitem)\n\u001b[0;32m---> 69\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28;01mawait\u001b[39;00m asyncio\u001b[38;5;241m.\u001b[39mgather(\u001b[38;5;241m*\u001b[39m[asyncio\u001b[38;5;241m.\u001b[39mensure_future(run(item)) \u001b[38;5;28;01mfor\u001b[39;00m item \u001b[38;5;129;01min\u001b[39;00m items])\n", - "File \u001b[0;32m/opt/conda/lib/python3.11/site-packages/zarr/core/common.py:67\u001b[0m, in \u001b[0;36mconcurrent_map..run\u001b[0;34m(item)\u001b[0m\n\u001b[1;32m 65\u001b[0m \u001b[38;5;28;01masync\u001b[39;00m \u001b[38;5;28;01mdef\u001b[39;00m \u001b[38;5;21mrun\u001b[39m(item: \u001b[38;5;28mtuple\u001b[39m[Any]) \u001b[38;5;241m-\u001b[39m\u001b[38;5;241m>\u001b[39m V:\n\u001b[1;32m 66\u001b[0m \u001b[38;5;28;01masync\u001b[39;00m \u001b[38;5;28;01mwith\u001b[39;00m sem:\n\u001b[0;32m---> 67\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28;01mawait\u001b[39;00m func(\u001b[38;5;241m*\u001b[39mitem)\n", - "File \u001b[0;32m/opt/conda/lib/python3.11/site-packages/zarr/core/codec_pipeline.py:270\u001b[0m, in \u001b[0;36mBatchedCodecPipeline.read_batch\u001b[0;34m(self, batch_info, out, drop_axes)\u001b[0m\n\u001b[1;32m 264\u001b[0m \u001b[38;5;28;01melse\u001b[39;00m:\n\u001b[1;32m 265\u001b[0m chunk_bytes_batch \u001b[38;5;241m=\u001b[39m \u001b[38;5;28;01mawait\u001b[39;00m concurrent_map(\n\u001b[1;32m 266\u001b[0m [(byte_getter, array_spec\u001b[38;5;241m.\u001b[39mprototype) \u001b[38;5;28;01mfor\u001b[39;00m byte_getter, array_spec, \u001b[38;5;241m*\u001b[39m_ \u001b[38;5;129;01min\u001b[39;00m batch_info],\n\u001b[1;32m 267\u001b[0m \u001b[38;5;28;01mlambda\u001b[39;00m byte_getter, prototype: byte_getter\u001b[38;5;241m.\u001b[39mget(prototype),\n\u001b[1;32m 268\u001b[0m config\u001b[38;5;241m.\u001b[39mget(\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124masync.concurrency\u001b[39m\u001b[38;5;124m\"\u001b[39m),\n\u001b[1;32m 269\u001b[0m )\n\u001b[0;32m--> 270\u001b[0m chunk_array_batch \u001b[38;5;241m=\u001b[39m \u001b[38;5;28;01mawait\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mdecode_batch(\n\u001b[1;32m 271\u001b[0m [\n\u001b[1;32m 272\u001b[0m (chunk_bytes, chunk_spec)\n\u001b[1;32m 273\u001b[0m \u001b[38;5;28;01mfor\u001b[39;00m chunk_bytes, (_, chunk_spec, \u001b[38;5;241m*\u001b[39m_) \u001b[38;5;129;01min\u001b[39;00m \u001b[38;5;28mzip\u001b[39m(\n\u001b[1;32m 274\u001b[0m chunk_bytes_batch, batch_info, strict\u001b[38;5;241m=\u001b[39m\u001b[38;5;28;01mFalse\u001b[39;00m\n\u001b[1;32m 275\u001b[0m )\n\u001b[1;32m 276\u001b[0m ],\n\u001b[1;32m 277\u001b[0m )\n\u001b[1;32m 278\u001b[0m \u001b[38;5;28;01mfor\u001b[39;00m chunk_array, (_, chunk_spec, chunk_selection, out_selection, _) \u001b[38;5;129;01min\u001b[39;00m \u001b[38;5;28mzip\u001b[39m(\n\u001b[1;32m 279\u001b[0m chunk_array_batch, batch_info, strict\u001b[38;5;241m=\u001b[39m\u001b[38;5;28;01mFalse\u001b[39;00m\n\u001b[1;32m 280\u001b[0m ):\n\u001b[1;32m 281\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m chunk_array \u001b[38;5;129;01mis\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m \u001b[38;5;28;01mNone\u001b[39;00m:\n", - "File \u001b[0;32m/opt/conda/lib/python3.11/site-packages/zarr/core/codec_pipeline.py:185\u001b[0m, in \u001b[0;36mBatchedCodecPipeline.decode_batch\u001b[0;34m(self, chunk_bytes_and_specs)\u001b[0m\n\u001b[1;32m 178\u001b[0m (\n\u001b[1;32m 179\u001b[0m aa_codecs_with_spec,\n\u001b[1;32m 180\u001b[0m ab_codec_with_spec,\n\u001b[1;32m 181\u001b[0m bb_codecs_with_spec,\n\u001b[1;32m 182\u001b[0m ) \u001b[38;5;241m=\u001b[39m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_codecs_with_resolved_metadata_batched(chunk_specs)\n\u001b[1;32m 184\u001b[0m \u001b[38;5;28;01mfor\u001b[39;00m bb_codec, chunk_spec_batch \u001b[38;5;129;01min\u001b[39;00m bb_codecs_with_spec[::\u001b[38;5;241m-\u001b[39m\u001b[38;5;241m1\u001b[39m]:\n\u001b[0;32m--> 185\u001b[0m chunk_bytes_batch \u001b[38;5;241m=\u001b[39m \u001b[38;5;28;01mawait\u001b[39;00m bb_codec\u001b[38;5;241m.\u001b[39mdecode(\n\u001b[1;32m 186\u001b[0m \u001b[38;5;28mzip\u001b[39m(chunk_bytes_batch, chunk_spec_batch, strict\u001b[38;5;241m=\u001b[39m\u001b[38;5;28;01mFalse\u001b[39;00m)\n\u001b[1;32m 187\u001b[0m )\n\u001b[1;32m 189\u001b[0m ab_codec, chunk_spec_batch \u001b[38;5;241m=\u001b[39m ab_codec_with_spec\n\u001b[1;32m 190\u001b[0m chunk_array_batch \u001b[38;5;241m=\u001b[39m \u001b[38;5;28;01mawait\u001b[39;00m ab_codec\u001b[38;5;241m.\u001b[39mdecode(\n\u001b[1;32m 191\u001b[0m \u001b[38;5;28mzip\u001b[39m(chunk_bytes_batch, chunk_spec_batch, strict\u001b[38;5;241m=\u001b[39m\u001b[38;5;28;01mFalse\u001b[39;00m)\n\u001b[1;32m 192\u001b[0m )\n", - "File \u001b[0;32m/opt/conda/lib/python3.11/site-packages/zarr/abc/codec.py:129\u001b[0m, in \u001b[0;36mBaseCodec.decode\u001b[0;34m(self, chunks_and_specs)\u001b[0m\n\u001b[1;32m 113\u001b[0m \u001b[38;5;28;01masync\u001b[39;00m \u001b[38;5;28;01mdef\u001b[39;00m \u001b[38;5;21mdecode\u001b[39m(\n\u001b[1;32m 114\u001b[0m \u001b[38;5;28mself\u001b[39m,\n\u001b[1;32m 115\u001b[0m chunks_and_specs: Iterable[\u001b[38;5;28mtuple\u001b[39m[CodecOutput \u001b[38;5;241m|\u001b[39m \u001b[38;5;28;01mNone\u001b[39;00m, ArraySpec]],\n\u001b[1;32m 116\u001b[0m ) \u001b[38;5;241m-\u001b[39m\u001b[38;5;241m>\u001b[39m Iterable[CodecInput \u001b[38;5;241m|\u001b[39m \u001b[38;5;28;01mNone\u001b[39;00m]:\n\u001b[1;32m 117\u001b[0m \u001b[38;5;250m \u001b[39m\u001b[38;5;124;03m\"\"\"Decodes a batch of chunks.\u001b[39;00m\n\u001b[1;32m 118\u001b[0m \u001b[38;5;124;03m Chunks can be None in which case they are ignored by the codec.\u001b[39;00m\n\u001b[1;32m 119\u001b[0m \n\u001b[0;32m (...)\u001b[0m\n\u001b[1;32m 127\u001b[0m \u001b[38;5;124;03m Iterable[CodecInput | None]\u001b[39;00m\n\u001b[1;32m 128\u001b[0m \u001b[38;5;124;03m \"\"\"\u001b[39;00m\n\u001b[0;32m--> 129\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28;01mawait\u001b[39;00m _batching_helper(\u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_decode_single, chunks_and_specs)\n", - "File \u001b[0;32m/opt/conda/lib/python3.11/site-packages/zarr/abc/codec.py:407\u001b[0m, in \u001b[0;36m_batching_helper\u001b[0;34m(func, batch_info)\u001b[0m\n\u001b[1;32m 403\u001b[0m \u001b[38;5;28;01masync\u001b[39;00m \u001b[38;5;28;01mdef\u001b[39;00m \u001b[38;5;21m_batching_helper\u001b[39m(\n\u001b[1;32m 404\u001b[0m func: Callable[[CodecInput, ArraySpec], Awaitable[CodecOutput \u001b[38;5;241m|\u001b[39m \u001b[38;5;28;01mNone\u001b[39;00m]],\n\u001b[1;32m 405\u001b[0m batch_info: Iterable[\u001b[38;5;28mtuple\u001b[39m[CodecInput \u001b[38;5;241m|\u001b[39m \u001b[38;5;28;01mNone\u001b[39;00m, ArraySpec]],\n\u001b[1;32m 406\u001b[0m ) \u001b[38;5;241m-\u001b[39m\u001b[38;5;241m>\u001b[39m \u001b[38;5;28mlist\u001b[39m[CodecOutput \u001b[38;5;241m|\u001b[39m \u001b[38;5;28;01mNone\u001b[39;00m]:\n\u001b[0;32m--> 407\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28;01mawait\u001b[39;00m concurrent_map(\n\u001b[1;32m 408\u001b[0m \u001b[38;5;28mlist\u001b[39m(batch_info),\n\u001b[1;32m 409\u001b[0m _noop_for_none(func),\n\u001b[1;32m 410\u001b[0m config\u001b[38;5;241m.\u001b[39mget(\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124masync.concurrency\u001b[39m\u001b[38;5;124m\"\u001b[39m),\n\u001b[1;32m 411\u001b[0m )\n", - "File \u001b[0;32m/opt/conda/lib/python3.11/site-packages/zarr/core/common.py:69\u001b[0m, in \u001b[0;36mconcurrent_map\u001b[0;34m(items, func, limit)\u001b[0m\n\u001b[1;32m 66\u001b[0m \u001b[38;5;28;01masync\u001b[39;00m \u001b[38;5;28;01mwith\u001b[39;00m sem:\n\u001b[1;32m 67\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28;01mawait\u001b[39;00m func(\u001b[38;5;241m*\u001b[39mitem)\n\u001b[0;32m---> 69\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28;01mawait\u001b[39;00m asyncio\u001b[38;5;241m.\u001b[39mgather(\u001b[38;5;241m*\u001b[39m[asyncio\u001b[38;5;241m.\u001b[39mensure_future(run(item)) \u001b[38;5;28;01mfor\u001b[39;00m item \u001b[38;5;129;01min\u001b[39;00m items])\n", - "File \u001b[0;32m/opt/conda/lib/python3.11/site-packages/zarr/core/common.py:67\u001b[0m, in \u001b[0;36mconcurrent_map..run\u001b[0;34m(item)\u001b[0m\n\u001b[1;32m 65\u001b[0m \u001b[38;5;28;01masync\u001b[39;00m \u001b[38;5;28;01mdef\u001b[39;00m \u001b[38;5;21mrun\u001b[39m(item: \u001b[38;5;28mtuple\u001b[39m[Any]) \u001b[38;5;241m-\u001b[39m\u001b[38;5;241m>\u001b[39m V:\n\u001b[1;32m 66\u001b[0m \u001b[38;5;28;01masync\u001b[39;00m \u001b[38;5;28;01mwith\u001b[39;00m sem:\n\u001b[0;32m---> 67\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28;01mawait\u001b[39;00m func(\u001b[38;5;241m*\u001b[39mitem)\n", - "File \u001b[0;32m/opt/conda/lib/python3.11/site-packages/zarr/abc/codec.py:420\u001b[0m, in \u001b[0;36m_noop_for_none..wrap\u001b[0;34m(chunk, chunk_spec)\u001b[0m\n\u001b[1;32m 418\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m chunk \u001b[38;5;129;01mis\u001b[39;00m \u001b[38;5;28;01mNone\u001b[39;00m:\n\u001b[1;32m 419\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28;01mNone\u001b[39;00m\n\u001b[0;32m--> 420\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28;01mawait\u001b[39;00m func(chunk, chunk_spec)\n", - "File \u001b[0;32m/opt/conda/lib/python3.11/site-packages/numcodecs/zarr3.py:148\u001b[0m, in \u001b[0;36m_NumcodecsBytesBytesCodec._decode_single\u001b[0;34m(self, chunk_bytes, chunk_spec)\u001b[0m\n\u001b[1;32m 147\u001b[0m \u001b[38;5;28;01masync\u001b[39;00m \u001b[38;5;28;01mdef\u001b[39;00m \u001b[38;5;21m_decode_single\u001b[39m(\u001b[38;5;28mself\u001b[39m, chunk_bytes: Buffer, chunk_spec: ArraySpec) \u001b[38;5;241m-\u001b[39m\u001b[38;5;241m>\u001b[39m Buffer:\n\u001b[0;32m--> 148\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28;01mawait\u001b[39;00m asyncio\u001b[38;5;241m.\u001b[39mto_thread(\n\u001b[1;32m 149\u001b[0m as_numpy_array_wrapper,\n\u001b[1;32m 150\u001b[0m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_codec\u001b[38;5;241m.\u001b[39mdecode,\n\u001b[1;32m 151\u001b[0m chunk_bytes,\n\u001b[1;32m 152\u001b[0m chunk_spec\u001b[38;5;241m.\u001b[39mprototype,\n\u001b[1;32m 153\u001b[0m )\n", - "File \u001b[0;32m/opt/conda/lib/python3.11/asyncio/threads.py:25\u001b[0m, in \u001b[0;36mto_thread\u001b[0;34m(func, *args, **kwargs)\u001b[0m\n\u001b[1;32m 23\u001b[0m ctx \u001b[38;5;241m=\u001b[39m contextvars\u001b[38;5;241m.\u001b[39mcopy_context()\n\u001b[1;32m 24\u001b[0m func_call \u001b[38;5;241m=\u001b[39m functools\u001b[38;5;241m.\u001b[39mpartial(ctx\u001b[38;5;241m.\u001b[39mrun, func, \u001b[38;5;241m*\u001b[39margs, \u001b[38;5;241m*\u001b[39m\u001b[38;5;241m*\u001b[39mkwargs)\n\u001b[0;32m---> 25\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28;01mawait\u001b[39;00m loop\u001b[38;5;241m.\u001b[39mrun_in_executor(\u001b[38;5;28;01mNone\u001b[39;00m, func_call)\n", - "File \u001b[0;32m/opt/conda/lib/python3.11/site-packages/zarr/core/buffer/cpu.py:213\u001b[0m, in \u001b[0;36mas_numpy_array_wrapper\u001b[0;34m(func, buf, prototype)\u001b[0m\n\u001b[1;32m 188\u001b[0m \u001b[38;5;28;01mdef\u001b[39;00m \u001b[38;5;21mas_numpy_array_wrapper\u001b[39m(\n\u001b[1;32m 189\u001b[0m func: Callable[[npt\u001b[38;5;241m.\u001b[39mNDArray[Any]], \u001b[38;5;28mbytes\u001b[39m], buf: core\u001b[38;5;241m.\u001b[39mBuffer, prototype: core\u001b[38;5;241m.\u001b[39mBufferPrototype\n\u001b[1;32m 190\u001b[0m ) \u001b[38;5;241m-\u001b[39m\u001b[38;5;241m>\u001b[39m core\u001b[38;5;241m.\u001b[39mBuffer:\n\u001b[1;32m 191\u001b[0m \u001b[38;5;250m \u001b[39m\u001b[38;5;124;03m\"\"\"Converts the input of `func` to a numpy array and the output back to `Buffer`.\u001b[39;00m\n\u001b[1;32m 192\u001b[0m \n\u001b[1;32m 193\u001b[0m \u001b[38;5;124;03m This function is useful when calling a `func` that only support host memory such\u001b[39;00m\n\u001b[0;32m (...)\u001b[0m\n\u001b[1;32m 211\u001b[0m \u001b[38;5;124;03m The result of `func` converted to a `Buffer`\u001b[39;00m\n\u001b[1;32m 212\u001b[0m \u001b[38;5;124;03m \"\"\"\u001b[39;00m\n\u001b[0;32m--> 213\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m prototype\u001b[38;5;241m.\u001b[39mbuffer\u001b[38;5;241m.\u001b[39mfrom_bytes(\u001b[43mfunc\u001b[49m\u001b[43m(\u001b[49m\u001b[43mbuf\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mas_numpy_array\u001b[49m\u001b[43m(\u001b[49m\u001b[43m)\u001b[49m\u001b[43m)\u001b[49m)\n", - "File \u001b[0;32m/opt/conda/lib/python3.11/site-packages/numcodecs/zlib.py:37\u001b[0m, in \u001b[0;36mZlib.decode\u001b[0;34m(self, buf, out)\u001b[0m\n\u001b[1;32m 34\u001b[0m out \u001b[38;5;241m=\u001b[39m ensure_contiguous_ndarray(out)\n\u001b[1;32m 36\u001b[0m \u001b[38;5;66;03m# do decompression\u001b[39;00m\n\u001b[0;32m---> 37\u001b[0m dec \u001b[38;5;241m=\u001b[39m \u001b[43m_zlib\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mdecompress\u001b[49m\u001b[43m(\u001b[49m\u001b[43mbuf\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 39\u001b[0m \u001b[38;5;66;03m# handle destination - Python standard library zlib module does not\u001b[39;00m\n\u001b[1;32m 40\u001b[0m \u001b[38;5;66;03m# support direct decompression into buffer, so we have to copy into\u001b[39;00m\n\u001b[1;32m 41\u001b[0m \u001b[38;5;66;03m# out if given\u001b[39;00m\n\u001b[1;32m 42\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m ndarray_copy(dec, out)\n", - "\u001b[0;31merror\u001b[0m: Error -3 while decompressing data: incorrect header check" - ] + "data": { + "text/plain": [ + "array(nan)" + ] + }, + "execution_count": 8, + "metadata": {}, + "output_type": "execute_result" } ], "source": [ @@ -206,10 +1539,53 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 9, "id": "1b6df651-9d2f-4987-bbce-eb9141974100", "metadata": {}, - "outputs": [], + "outputs": [ + { + "data": { + "application/vnd.jupyter.widget-view+json": { + "model_id": "e955ad6ea616405abac54f2961807f45", + "version_major": 2, + "version_minor": 0 + }, + "text/plain": [ + "QUEUEING TASKS | : 0%| | 0/3 [00:00 Date: Fri, 11 Jul 2025 15:24:06 +0000 Subject: [PATCH 09/26] Added branching + tests --- cdk/lambda/lambda_function.py | 207 ++++++--- cdk/lambda/testing.ipynb | 838 ++++++++++++++++++++-------------- 2 files changed, 651 insertions(+), 394 deletions(-) diff --git a/cdk/lambda/lambda_function.py b/cdk/lambda/lambda_function.py index a8ac93c..5787ada 100644 --- a/cdk/lambda/lambda_function.py +++ b/cdk/lambda/lambda_function.py @@ -4,7 +4,7 @@ import icechunk import boto3 import os -from datetime import datetime, timedelta +from datetime import datetime, timedelta, timezone import virtualizarr as vz import zarr import numpy as np @@ -16,24 +16,42 @@ store_name = "MUR-JPL-L4-GLOB-v4.1-virtual-v1-p2" drop_vars = ["dt_1km_data", "sst_anomaly"] collection_short_name = "MUR-JPL-L4-GLOB-v4.1" +#TODO can I name this based on some id for the lambda? +# for now lets just use time +branchname=f"add_time_{datetime.now(timezone.utc).isoformat()}" -# refreshable earthdata credentials -from typing import Dict +def get_secret(): + secret_name = os.environ['SECRET_ARN'] + session = boto3.session.Session() + client = session.client( + service_name='secretsmanager', + region_name=session.region_name + ) + + try: + get_secret_value_response = client.get_secret_value( + SecretId=secret_name + ) + except Exception as e: + raise e + else: + if 'SecretString' in get_secret_value_response: + return json.loads(get_secret_value_response['SecretString']) + else: + raise ValueError("Secret is not a string") -def get_earthdata_creds() -> Dict[str, str]: +# refreshable earthdata credentials +from icechunk import S3StaticCredentials +def get_icechunk_creds(daac:str=None) -> S3StaticCredentials: + if daac is None: + daac = 'PODAAC' #TODO: Might want to change this for a more general version + # https://github.com/nsidc/earthaccess/discussions/1051 could help here. # assumes that username and password are available in the environment # TODO: accomodate rc file? auth = earthaccess.login(strategy='environment') # this does not create a netrc file... if not auth.authenticated: raise PermissionError('Could not authenticate using environment variables') - else: - print - creds = auth.get_s3_credentials(daac='PODAAC') - return creds - -from icechunk import S3StaticCredentials -def get_icechunk_creds() -> S3StaticCredentials: - creds = get_earthdata_creds() + creds = auth.get_s3_credentials(daac=daac) return S3StaticCredentials( access_key_id=creds['accessKeyId'], secret_access_key=creds['secretAccessKey'], @@ -42,7 +60,8 @@ def get_icechunk_creds() -> S3StaticCredentials: ) # 馃嵄 there is a lot of overlap between this and lithops code and icechunk-nasa code 馃 -def open_icechunk_repo(bucket_name: str, store_name: str, ea_creds: Optional[dict] = None): +def open_icechunk_repo(bucket_name: str, store_name: str): + print("opening icechunk repo") storage = icechunk.s3_storage( bucket=bucket_name, prefix=f"icechunk/{store_name}", @@ -58,16 +77,15 @@ def open_icechunk_repo(bucket_name: str, store_name: str, ea_creds: Optional[dic storage=storage, config=config, ) - - if ea_creds: - earthdata_credentials = icechunk.containers_credentials( - s3=icechunk.s3_refreshable_credentials(get_credentials=get_icechunk_creds), - ) - repo_config['virtual_chunk_credentials'] = earthdata_credentials - print('repo_config', repo_config) + + virtual_credentials = icechunk.containers_credentials( + s3=icechunk.s3_refreshable_credentials(get_credentials=get_icechunk_creds), + ) + repo_config['virtual_chunk_credentials'] = virtual_credentials return icechunk.Repository.open(**repo_config) def get_last_timestep(session: icechunk.Session) -> datetime: + print("Getting last timestep") # get the last timestep from the icechunk store # return the last timestep zarr_store = zarr.open(session.store, mode="r") @@ -75,7 +93,8 @@ def get_last_timestep(session: icechunk.Session) -> datetime: dt_array = np.array([epoch + timedelta(seconds=int(t)) for t in zarr_store['time'][:]]) return dt_array[-1] -def write_to_icechunk(repo: icechunk.Repository, granule_results: list[DataGranule], start_date: str, end_date: str): + +def write_to_icechunk_branch(repo: icechunk.Repository, granule_results: list[DataGranule]) -> str: print("opening virtual dataset") vds = earthaccess.open_virtual_mfdataset( granule_results, @@ -87,69 +106,141 @@ def write_to_icechunk(repo: icechunk.Repository, granule_results: list[DataGranu combine_attrs="override", parallel=False, ) + print(f"New Data (Virtual): {vds}") # write to the icechunk store vds = vds.drop_vars(drop_vars, errors="ignore") - print("writing to icechunk") - commit_message = f"Committed data for {start_date} to {end_date}." + print(f"Creating branch: {branchname}") + repo.create_branch( + branchname, + snapshot_id=repo.lookup_branch("main") #branches of the lates commit to main! + ) + + print(f"writing to icechunk branch {branchname}") + # get the time range from the granules + start_time = min([g['umm']['TemporalExtent']['RangeDateTime']['BeginningDateTime'] for g in granule_results]) + end_time = max([g['umm']['TemporalExtent']['RangeDateTime']['EndingDateTime'] for g in granule_results]) + + commit_message = f"Committed data for {start_time} to {end_time}." + + session = repo.writable_session(branch=branchname) + vds.virtualize.to_icechunk(session.store, append_dim='time') + + snapshot = session.commit(commit_message) + print(f"Commit successful. {snapshot} | {commit_message}") + return snapshot + +def open_xr_dataset_from_branch(repo:icechunk.Repository,branch:str): + session = repo.readonly_session(branch=branch) + ds = xr.open_zarr(session.store, consolidated=False) + return ds + + +def test_store_on_branch( + repo: icechunk.Repository, + granule_results: list[DataGranule] +): + ds = open_xr_dataset_from_branch(repo, branchname) + nt = len(granule_results) + + # Test 1: time continuity + try: + dt_expected = ds.time.isel(time=slice(0, 1)).diff('time') + dt_actual = ds.time.isel(time=slice(-nt+1, None)).diff('time') + time_continuity = (dt_actual == dt_expected).all().item() + except Exception as e: + time_continuity = False + time_continuity_error = str(e) + else: + time_continuity_error = None + + # Test 2: data equality + try: + direct_access_links = [granule.data_links(access="direct")[0] for granule in granule_results] + fileset = earthaccess.open(direct_access_links, provider='POCLOUD') + ds_original = xr.open_mfdataset(fileset).drop_vars(drop_vars, errors="ignore") + # xr.testing.assert_allclose(ds_original, ds.isel(time=slice(-nt, None))) + # for testing - TODO: Reset + xr.testing.assert_allclose(ds_original.isel(time=-1).mean(), ds.isel(time=-1).mean()) + data_equal = True + except AssertionError as e: + data_equal = False + data_equal_error = str(e) + except Exception as e: + data_equal = False + data_equal_error = f"Unexpected error during data comparison: {e}" + else: + data_equal_error = None + + # Compose result + tests_passed = time_continuity and data_equal + + if not tests_passed: + error_message = "Failures:\n" + if not time_continuity: + error_message += f"- Time continuity failed: {time_continuity_error or 'Mismatch in timestep differences'}\n" + if not data_equal: + error_message += f"- Data equality failed: {data_equal_error}\n" + else: + error_message = None + + return tests_passed, error_message + + +def merge_into_main(repo: icechunk.Repository): if os.environ.get("DRY_RUN", "false") == "true": - print(f"Dry run, skipping write to icechunk: {commit_message}") - return commit_message + print(f"Dry run, not merging {branchname} into main") else: - session = repo.writable_session(branch="main") - vds.virtualize.to_icechunk(session.store, append_dim='time') - return session.commit(commit_message) + # append branch commit to main branch and delete test branch + repo.reset_branch('main', repo.lookup_branch(branchname)) + # always delete extra branch + #TODO: The hub does not allow us to delete objects! + # repo.delete_branch(branchname) -def write_to_icechunk_or_fail(): - print("earthaccess.login()") - earthaccess.login() - print("earthaccess.get_s3_credentials") - ea_creds = earthaccess.get_s3_credentials(daac='PODAAC') - print("opening icechunk repo") - # check date is next datetime for the icechunk store or fail - repo = open_icechunk_repo(bucket, store_name, ea_creds) +def find_granules(repo: icechunk.Repository): session = repo.readonly_session(branch="main") - # MUR SST granules have a temporal range of date 1 21:00:00 to date 2 21:00:00, e.g. granule 20240627090000 has datetime range of 2024-06-26 21:00:00:00 to 2024-06-27 21:00:00:00 + # MUR SST granules have a temporal range of date 1 21:00:00 to date 2 21:00:00, + # e.g. granule 20240627090000 has datetime range of 2024-06-26 21:00:00:00 to 2024-06-27 21:00:00:00 # so granules overlap in time. - # Here we increment the latest timestep of the icechunkstore by 1 minute to make sure we only get granules outside of the latest date covered by the icechunk store + # Here we increment the latest timestep of the icechunkstore by 1 minute + # to make sure we only get granules outside of the latest date covered by the icechunk store last_timestep = str(get_last_timestep(session)) + " 21:00:01" print("Searching for granules") current_date = str(datetime.now().date()) + " 21:00:00" granule_results = earthaccess.search_data( temporal=(last_timestep, current_date), short_name=collection_short_name ) + if len(granule_results) == 0: print("No granules found") return None else: print(f"Number of granules found: {len(granule_results)}") - # write to the icechunk store - return write_to_icechunk(repo, granule_results, start_date=last_timestep, end_date=current_date) + return granule_results -def get_secret(): - secret_name = os.environ['SECRET_ARN'] - session = boto3.session.Session() - client = session.client( - service_name='secretsmanager', - region_name=session.region_name - ) - try: - get_secret_value_response = client.get_secret_value( - SecretId=secret_name - ) - except Exception as e: - raise e - else: - if 'SecretString' in get_secret_value_response: - return json.loads(get_secret_value_response['SecretString']) +def write_to_icechunk_or_fail(): + repo = open_icechunk_repo(bucket, store_name) + granule_results = find_granules(repo) + + if len(granule_results) > 0: + write_to_icechunk_branch(repo, granule_results) + passed, message = test_store_on_branch(repo, granule_results) + if not passed: + print('Tests did not pass with: {message}') + return message else: - raise ValueError("Secret is not a string") + print('Tests passed. Merging new data into main branch.') + merge_into_main(repo) + return repo.lookup_branch('main') + else: + return None + def lambda_handler(event, context: dict = {}): """ Update the icechunk store with the latest MUR-JPL-L4-GLOB-v4.1 data. """ - # Reactivate this! For now testing with manual injection + # TODO:Reactivate this! For now testing with manual injection # Fetch secrets # secrets = get_secret() # os.environ['EARTHDATA_USERNAME'] = secrets['EARTHDATA_USERNAME'] diff --git a/cdk/lambda/testing.ipynb b/cdk/lambda/testing.ipynb index 7b332e4..df26707 100644 --- a/cdk/lambda/testing.ipynb +++ b/cdk/lambda/testing.ipynb @@ -34,273 +34,219 @@ "source": [ "# # os.environ['SECRET_ARN'] = 'arn:aws:secretsmanager:us-west-2:444055461661:secret:mursst_lambda_edl_credentials-9dKy1C'\n", "# os.environ['SECRET_ARN'] = 'arn:aws:secretsmanager:us-east-2:444055461661:secret:edl-login-julius-ogbYjL'\n", - "# os.environ['DRY_RUN'] = \"true\"\n", + "os.environ['DRY_RUN'] = \"true\"\n", "# TODO: Need to figure out secrets management, for now Ill ingest the env variables manually (see also commented out code in `lambda_function.py`\n" ] }, - { - "cell_type": "code", - "execution_count": null, - "id": "e85faa51-d917-4e91-a979-24bb12fe02c4", - "metadata": {}, - "outputs": [], - "source": [] - }, { "cell_type": "code", "execution_count": 3, - "id": "b18705da-75e1-43e7-93fd-767d391ebbe7", - "metadata": {}, - "outputs": [], - "source": [ - "# #Testing the store bucket with EDL auth (does not work)\n", - "\n", - "# from lambda_function import get_icechunk_creds\n", - "# test_creds = get_icechunk_creds()\n", - "# import fsspec\n", - "# fs = fsspec.filesystem('s3', key=test_creds.access_key_id, secret=test_creds.secret_access_key, token=test_creds.session_token)\n", - "# fs.ls('nasa-eodc-public')" - ] - }, - { - "cell_type": "code", - "execution_count": 4, - "id": "8bb1a30d-e279-4692-ad8e-d25c696d9eed", + "id": "ff920b67-793e-4fe3-ae82-6ce038af0e87", "metadata": {}, "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Received event: {}\n", + "opening icechunk repo\n", + "Getting last timestep\n", + "Searching for granules\n", + "Number of granules found: 39\n", + "opening virtual dataset\n", + "New Data (Virtual): Size: 227GB\n", + "Dimensions: (time: 39, lat: 17999, lon: 36000)\n", + "Coordinates:\n", + " time (time) int32 156B ManifestArray}}\n" + "Tests passed. Merging new data into main branch.\n", + "Dry run, not merging add_time_2025-07-11T01:19:21.826321+00:00 into main\n" ] }, { - "ename": "IcechunkError", - "evalue": " x error getting object from object store service error\n | \n | context:\n | 0: icechunk::storage::s3::get_ref\n | with ref_key=\"branch.main/ref.json\"\n | at icechunk/src/storage/s3.rs:581\n | 1: icechunk::refs::fetch_branch\n | with name=\"main\"\n | at icechunk/src/refs.rs:385\n | 2: icechunk::refs::fetch_branch_tip\n | with name=\"main\"\n | at icechunk/src/refs.rs:404\n | 3: icechunk::repository::exists\n | at icechunk/src/repository.rs:316\n | 4: icechunk::repository::open\n | at icechunk/src/repository.rs:225\n | \n |-> error getting object from object store service error\n |-> service error\n |-> unhandled error (SignatureDoesNotMatch)\n `-> Error { code: \"SignatureDoesNotMatch\", message: \"The request signature we calculated does not match the signature you provided. Check your key and signing method.\", s3_extended_request_id:\n \"WxhFQ0mjtcoqG/Dhr9/fAEtoRyzxOsqUtwxW7i2LmOM0y/07aWgq57Mae+eLOtV6SrWfMYAe0V8=\", aws_request_id: \"HZQ5GGGE7S2ABXYQ\" }\n", - "output_type": "error", - "traceback": [ - "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", - "\u001b[0;31mIcechunkError\u001b[0m Traceback (most recent call last)", - "Cell \u001b[0;32mIn[5], line 1\u001b[0m\n\u001b[0;32m----> 1\u001b[0m \u001b[43mlambda_handler\u001b[49m\u001b[43m(\u001b[49m\u001b[43m{\u001b[49m\u001b[43m}\u001b[49m\u001b[43m)\u001b[49m\n", - "File \u001b[0;32m~/mursst-icechunk-updater/cdk/lambda/lambda_function.py:159\u001b[0m, in \u001b[0;36mlambda_handler\u001b[0;34m(event, context)\u001b[0m\n\u001b[1;32m 152\u001b[0m \u001b[38;5;66;03m# Reactivate this! For now testing with manual injection\u001b[39;00m\n\u001b[1;32m 153\u001b[0m \u001b[38;5;66;03m# Fetch secrets\u001b[39;00m\n\u001b[1;32m 154\u001b[0m \u001b[38;5;66;03m# secrets = get_secret()\u001b[39;00m\n\u001b[1;32m 155\u001b[0m \u001b[38;5;66;03m# os.environ['EARTHDATA_USERNAME'] = secrets['EARTHDATA_USERNAME']\u001b[39;00m\n\u001b[1;32m 156\u001b[0m \u001b[38;5;66;03m# os.environ['EARTHDATA_PASSWORD'] = secrets['EARTHDATA_PASSWORD']\u001b[39;00m\n\u001b[1;32m 157\u001b[0m \u001b[38;5;28mprint\u001b[39m(\u001b[38;5;124mf\u001b[39m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mReceived event: \u001b[39m\u001b[38;5;132;01m{\u001b[39;00mjson\u001b[38;5;241m.\u001b[39mdumps(event)\u001b[38;5;132;01m}\u001b[39;00m\u001b[38;5;124m\"\u001b[39m)\n\u001b[0;32m--> 159\u001b[0m result \u001b[38;5;241m=\u001b[39m \u001b[43mwrite_to_icechunk_or_fail\u001b[49m\u001b[43m(\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 161\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m {\n\u001b[1;32m 162\u001b[0m \u001b[38;5;124m'\u001b[39m\u001b[38;5;124mstatusCode\u001b[39m\u001b[38;5;124m'\u001b[39m: \u001b[38;5;241m200\u001b[39m,\n\u001b[1;32m 163\u001b[0m \u001b[38;5;124m'\u001b[39m\u001b[38;5;124mbody\u001b[39m\u001b[38;5;124m'\u001b[39m: json\u001b[38;5;241m.\u001b[39mdumps(\u001b[38;5;124mf\u001b[39m\u001b[38;5;124m'\u001b[39m\u001b[38;5;124mSuccessfully processed messages: \u001b[39m\u001b[38;5;132;01m{\u001b[39;00mresult\u001b[38;5;132;01m}\u001b[39;00m\u001b[38;5;124m'\u001b[39m)\n\u001b[1;32m 164\u001b[0m }\n", - "File \u001b[0;32m~/mursst-icechunk-updater/cdk/lambda/lambda_function.py:109\u001b[0m, in \u001b[0;36mwrite_to_icechunk_or_fail\u001b[0;34m()\u001b[0m\n\u001b[1;32m 107\u001b[0m \u001b[38;5;28mprint\u001b[39m(\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mopening icechunk repo\u001b[39m\u001b[38;5;124m\"\u001b[39m)\n\u001b[1;32m 108\u001b[0m \u001b[38;5;66;03m# check date is next datetime for the icechunk store or fail\u001b[39;00m\n\u001b[0;32m--> 109\u001b[0m repo \u001b[38;5;241m=\u001b[39m \u001b[43mopen_icechunk_repo\u001b[49m\u001b[43m(\u001b[49m\u001b[43mbucket\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mstore_name\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mea_creds\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 110\u001b[0m session \u001b[38;5;241m=\u001b[39m repo\u001b[38;5;241m.\u001b[39mreadonly_session(branch\u001b[38;5;241m=\u001b[39m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mmain\u001b[39m\u001b[38;5;124m\"\u001b[39m)\n\u001b[1;32m 111\u001b[0m \u001b[38;5;66;03m# MUR SST granules have a temporal range of date 1 21:00:00 to date 2 21:00:00, e.g. granule 20240627090000 has datetime range of 2024-06-26 21:00:00:00 to 2024-06-27 21:00:00:00\u001b[39;00m\n\u001b[1;32m 112\u001b[0m \u001b[38;5;66;03m# so granules overlap in time. \u001b[39;00m\n\u001b[1;32m 113\u001b[0m \u001b[38;5;66;03m# Here we increment the latest timestep of the icechunkstore by 1 minute to make sure we only get granules outside of the latest date covered by the icechunk store\u001b[39;00m\n", - "File \u001b[0;32m~/mursst-icechunk-updater/cdk/lambda/lambda_function.py:68\u001b[0m, in \u001b[0;36mopen_icechunk_repo\u001b[0;34m(bucket_name, store_name, ea_creds)\u001b[0m\n\u001b[1;32m 66\u001b[0m repo_config[\u001b[38;5;124m'\u001b[39m\u001b[38;5;124mvirtual_chunk_credentials\u001b[39m\u001b[38;5;124m'\u001b[39m] \u001b[38;5;241m=\u001b[39m earthdata_credentials\n\u001b[1;32m 67\u001b[0m \u001b[38;5;28mprint\u001b[39m(\u001b[38;5;124m'\u001b[39m\u001b[38;5;124mrepo_config\u001b[39m\u001b[38;5;124m'\u001b[39m, repo_config)\n\u001b[0;32m---> 68\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[43micechunk\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mRepository\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mopen\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43mrepo_config\u001b[49m\u001b[43m)\u001b[49m\n", - "File \u001b[0;32m/opt/conda/lib/python3.11/site-packages/icechunk/repository.py:94\u001b[0m, in \u001b[0;36mRepository.open\u001b[0;34m(cls, storage, config, virtual_chunk_credentials)\u001b[0m\n\u001b[1;32m 62\u001b[0m \u001b[38;5;129m@classmethod\u001b[39m\n\u001b[1;32m 63\u001b[0m \u001b[38;5;28;01mdef\u001b[39;00m \u001b[38;5;21mopen\u001b[39m(\n\u001b[1;32m 64\u001b[0m \u001b[38;5;28mcls\u001b[39m,\n\u001b[0;32m (...)\u001b[0m\n\u001b[1;32m 67\u001b[0m virtual_chunk_credentials: \u001b[38;5;28mdict\u001b[39m[\u001b[38;5;28mstr\u001b[39m, AnyCredential] \u001b[38;5;241m|\u001b[39m \u001b[38;5;28;01mNone\u001b[39;00m \u001b[38;5;241m=\u001b[39m \u001b[38;5;28;01mNone\u001b[39;00m,\n\u001b[1;32m 68\u001b[0m ) \u001b[38;5;241m-\u001b[39m\u001b[38;5;241m>\u001b[39m Self:\n\u001b[1;32m 69\u001b[0m \u001b[38;5;250m \u001b[39m\u001b[38;5;124;03m\"\"\"\u001b[39;00m\n\u001b[1;32m 70\u001b[0m \u001b[38;5;124;03m Open an existing Icechunk repository.\u001b[39;00m\n\u001b[1;32m 71\u001b[0m \n\u001b[0;32m (...)\u001b[0m\n\u001b[1;32m 91\u001b[0m \u001b[38;5;124;03m An instance of the Repository class.\u001b[39;00m\n\u001b[1;32m 92\u001b[0m \u001b[38;5;124;03m \"\"\"\u001b[39;00m\n\u001b[1;32m 93\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28mcls\u001b[39m(\n\u001b[0;32m---> 94\u001b[0m \u001b[43mPyRepository\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mopen\u001b[49m\u001b[43m(\u001b[49m\n\u001b[1;32m 95\u001b[0m \u001b[43m \u001b[49m\u001b[43mstorage\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 96\u001b[0m \u001b[43m \u001b[49m\u001b[43mconfig\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mconfig\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 97\u001b[0m \u001b[43m \u001b[49m\u001b[43mvirtual_chunk_credentials\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mvirtual_chunk_credentials\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 98\u001b[0m \u001b[43m \u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 99\u001b[0m )\n", - "\u001b[0;31mIcechunkError\u001b[0m: x error getting object from object store service error\n | \n | context:\n | 0: icechunk::storage::s3::get_ref\n | with ref_key=\"branch.main/ref.json\"\n | at icechunk/src/storage/s3.rs:581\n | 1: icechunk::refs::fetch_branch\n | with name=\"main\"\n | at icechunk/src/refs.rs:385\n | 2: icechunk::refs::fetch_branch_tip\n | with name=\"main\"\n | at icechunk/src/refs.rs:404\n | 3: icechunk::repository::exists\n | at icechunk/src/repository.rs:316\n | 4: icechunk::repository::open\n | at icechunk/src/repository.rs:225\n | \n |-> error getting object from object store service error\n |-> service error\n |-> unhandled error (SignatureDoesNotMatch)\n `-> Error { code: \"SignatureDoesNotMatch\", message: \"The request signature we calculated does not match the signature you provided. Check your key and signing method.\", s3_extended_request_id:\n \"WxhFQ0mjtcoqG/Dhr9/fAEtoRyzxOsqUtwxW7i2LmOM0y/07aWgq57Mae+eLOtV6SrWfMYAe0V8=\", aws_request_id: \"HZQ5GGGE7S2ABXYQ\" }\n" - ] + "data": { + "text/plain": [ + "{'statusCode': 200,\n", + " 'body': '\"Successfully processed messages: 44B9XPA2C0QH6FVT5AF0\"'}" + ] + }, + "execution_count": 3, + "metadata": {}, + "output_type": "execute_result" } ], "source": [ "lambda_handler({})" ] }, - { - "cell_type": "code", - "execution_count": null, - "id": "5c44987c-d199-4003-91d7-d47561ae3e2a", - "metadata": {}, - "outputs": [], - "source": [] - }, { "cell_type": "markdown", - "id": "7b0b213b-bfda-42e3-85ac-1b5250e89b6e", + "id": "29bb3baf", "metadata": {}, "source": [ - "# Testing the store" + "## Testing intermediate steps in the \n", + "Mostly a leftover from previous debugging, but might still be helpful in the future" ] }, { "cell_type": "code", - "execution_count": 9, - "id": "fadb5413-8375-4a5c-9f8e-111dfd2b457f", - "metadata": {}, - "outputs": [], - "source": [ - "earthaccess.login()\n", - "ea_creds = earthaccess.get_s3_credentials(daac='PODAAC')\n", - "bucket = 'nasa-eodc-public'\n", - "store_name = \"MUR-JPL-L4-GLOB-v4.1-virtual-v1-p2\"\n", - "lat_slice = slice(47.6, 47.7)\n", - "lon_slice = slice(-122.4, -122.3)" - ] - }, - { - "cell_type": "code", - "execution_count": 11, - "id": "e8f6b892-477d-43be-bf6c-9a720104ad37", - "metadata": {}, - "outputs": [], - "source": [ - "time_range = [\"2025-06-30\", \"2025-07-02\"]" - ] - }, - { - "cell_type": "markdown", - "id": "311a5dee-3d74-49b2-89e7-a7462c3f534a", - "metadata": {}, - "source": [ - "### Get a value from the icechunk store" - ] - }, - { - "cell_type": "code", - "execution_count": 13, - "id": "deb01661-2544-4451-a0c2-5b5977a4cfa3", - "metadata": {}, - "outputs": [], - "source": [ - "import icechunk as ic\n", - "\n", - "\n", - "storage = ic.s3_storage(\n", - " bucket=bucket,\n", - " prefix=f\"icechunk/{store_name}\",\n", - " anonymous=False,\n", - " from_env=True # cannot auth with EDL\n", - ")\n", - "config = ic.RepositoryConfig.default()\n", - "config.set_virtual_chunk_container(ic.VirtualChunkContainer(\n", - "# ic.Repository.open(storage=storage)\n", - "\n", - "# config = icechunk.RepositoryConfig.default()\n", - "# config.set_virtual_chunk_container(icechunk.VirtualChunkContainer(\"s3\", \"s3://\", icechunk.s3_store(region=\"us-west-2\")))\n", - "\n", - "# repo_config = dict(\n", - "# storage=storage,\n", - "# config=config,\n", - "# )\n", - "\n", - "# if ea_creds:\n", - "# earthdata_credentials = icechunk.containers_credentials(\n", - "# s3=icechunk.s3_refreshable_credentials(get_credentials=get_icechunk_creds),\n", - "# )\n", - "# repo_config['virtual_chunk_credentials'] = earthdata_credentials\n", - "# print('repo_config', repo_config)\n", - "# return icechunk.Repository.open(**repo_config)" - ] - }, - { - "cell_type": "code", - "execution_count": 8, - "id": "605fc2c3-378c-4986-84b3-6fe2020c00dd", + "execution_count": null, + "id": "5c44987c-d199-4003-91d7-d47561ae3e2a", "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ - "repo_config {'storage': S3Storage(bucket=nasa-eodc-public, prefix=icechunk/MUR-JPL-L4-GLOB-v4.1-virtual-v1-p2, config=S3Options(region=None, endpoint_url=None, anonymous=false, allow_http=false, force_path_style=false)), 'config': RepositoryConfig(inline_chunk_threshold_bytes=None, get_partial_values_concurrency=None, compression=None, caching=None, storage=None, manifest=None), 'virtual_chunk_credentials': {'s3': }}\n" + "opening icechunk repo\n" ] }, - { - "ename": "IcechunkError", - "evalue": " x error getting object from object store service error\n | \n | context:\n | 0: icechunk::storage::s3::get_ref\n | with ref_key=\"branch.main/ref.json\"\n | at icechunk/src/storage/s3.rs:581\n | 1: icechunk::refs::fetch_branch\n | with name=\"main\"\n | at icechunk/src/refs.rs:385\n | 2: icechunk::refs::fetch_branch_tip\n | with name=\"main\"\n | at icechunk/src/refs.rs:404\n | 3: icechunk::repository::exists\n | at icechunk/src/repository.rs:316\n | 4: icechunk::repository::open\n | at icechunk/src/repository.rs:225\n | \n |-> error getting object from object store service error\n |-> service error\n |-> unhandled error (SignatureDoesNotMatch)\n `-> Error { code: \"SignatureDoesNotMatch\", message: \"The request signature we calculated does not match the signature you provided. Check your key and signing method.\", aws_request_id:\n \"XS0RHNT4Y0BWAPF4\", s3_extended_request_id: \"dHG0oc/Ok3Gv5peEsPT97TQrQPwpmCn+t4mB56Ucxf+8O5uOm5VRjirQk0BP3ss4+yTCu2qwTnU=\" }\n", - "output_type": "error", - "traceback": [ - "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", - "\u001b[0;31mIcechunkError\u001b[0m Traceback (most recent call last)", - "Cell \u001b[0;32mIn[8], line 1\u001b[0m\n\u001b[0;32m----> 1\u001b[0m repo \u001b[38;5;241m=\u001b[39m \u001b[43mopen_icechunk_repo\u001b[49m\u001b[43m(\u001b[49m\u001b[43mbucket\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mstore_name\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mea_creds\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 2\u001b[0m session \u001b[38;5;241m=\u001b[39m repo\u001b[38;5;241m.\u001b[39mreadonly_session(branch\u001b[38;5;241m=\u001b[39m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mmain\u001b[39m\u001b[38;5;124m\"\u001b[39m)\n\u001b[1;32m 3\u001b[0m xds \u001b[38;5;241m=\u001b[39m xr\u001b[38;5;241m.\u001b[39mopen_zarr(session\u001b[38;5;241m.\u001b[39mstore, zarr_version\u001b[38;5;241m=\u001b[39m\u001b[38;5;241m3\u001b[39m, chunks\u001b[38;5;241m=\u001b[39m{}, consolidated\u001b[38;5;241m=\u001b[39m\u001b[38;5;28;01mFalse\u001b[39;00m)\n", - "File \u001b[0;32m~/mursst-icechunk-updater/cdk/lambda/lambda_function.py:68\u001b[0m, in \u001b[0;36mopen_icechunk_repo\u001b[0;34m(bucket_name, store_name, ea_creds)\u001b[0m\n\u001b[1;32m 66\u001b[0m repo_config[\u001b[38;5;124m'\u001b[39m\u001b[38;5;124mvirtual_chunk_credentials\u001b[39m\u001b[38;5;124m'\u001b[39m] \u001b[38;5;241m=\u001b[39m earthdata_credentials\n\u001b[1;32m 67\u001b[0m \u001b[38;5;28mprint\u001b[39m(\u001b[38;5;124m'\u001b[39m\u001b[38;5;124mrepo_config\u001b[39m\u001b[38;5;124m'\u001b[39m, repo_config)\n\u001b[0;32m---> 68\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[43micechunk\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mRepository\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mopen\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43mrepo_config\u001b[49m\u001b[43m)\u001b[49m\n", - "File \u001b[0;32m/opt/conda/lib/python3.11/site-packages/icechunk/repository.py:94\u001b[0m, in \u001b[0;36mRepository.open\u001b[0;34m(cls, storage, config, virtual_chunk_credentials)\u001b[0m\n\u001b[1;32m 62\u001b[0m \u001b[38;5;129m@classmethod\u001b[39m\n\u001b[1;32m 63\u001b[0m \u001b[38;5;28;01mdef\u001b[39;00m \u001b[38;5;21mopen\u001b[39m(\n\u001b[1;32m 64\u001b[0m \u001b[38;5;28mcls\u001b[39m,\n\u001b[0;32m (...)\u001b[0m\n\u001b[1;32m 67\u001b[0m virtual_chunk_credentials: \u001b[38;5;28mdict\u001b[39m[\u001b[38;5;28mstr\u001b[39m, AnyCredential] \u001b[38;5;241m|\u001b[39m \u001b[38;5;28;01mNone\u001b[39;00m \u001b[38;5;241m=\u001b[39m \u001b[38;5;28;01mNone\u001b[39;00m,\n\u001b[1;32m 68\u001b[0m ) \u001b[38;5;241m-\u001b[39m\u001b[38;5;241m>\u001b[39m Self:\n\u001b[1;32m 69\u001b[0m \u001b[38;5;250m \u001b[39m\u001b[38;5;124;03m\"\"\"\u001b[39;00m\n\u001b[1;32m 70\u001b[0m \u001b[38;5;124;03m Open an existing Icechunk repository.\u001b[39;00m\n\u001b[1;32m 71\u001b[0m \n\u001b[0;32m (...)\u001b[0m\n\u001b[1;32m 91\u001b[0m \u001b[38;5;124;03m An instance of the Repository class.\u001b[39;00m\n\u001b[1;32m 92\u001b[0m \u001b[38;5;124;03m \"\"\"\u001b[39;00m\n\u001b[1;32m 93\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28mcls\u001b[39m(\n\u001b[0;32m---> 94\u001b[0m \u001b[43mPyRepository\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mopen\u001b[49m\u001b[43m(\u001b[49m\n\u001b[1;32m 95\u001b[0m \u001b[43m \u001b[49m\u001b[43mstorage\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 96\u001b[0m \u001b[43m \u001b[49m\u001b[43mconfig\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mconfig\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 97\u001b[0m \u001b[43m \u001b[49m\u001b[43mvirtual_chunk_credentials\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mvirtual_chunk_credentials\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 98\u001b[0m \u001b[43m \u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 99\u001b[0m )\n", - "\u001b[0;31mIcechunkError\u001b[0m: x error getting object from object store service error\n | \n | context:\n | 0: icechunk::storage::s3::get_ref\n | with ref_key=\"branch.main/ref.json\"\n | at icechunk/src/storage/s3.rs:581\n | 1: icechunk::refs::fetch_branch\n | with name=\"main\"\n | at icechunk/src/refs.rs:385\n | 2: icechunk::refs::fetch_branch_tip\n | with name=\"main\"\n | at icechunk/src/refs.rs:404\n | 3: icechunk::repository::exists\n | at icechunk/src/repository.rs:316\n | 4: icechunk::repository::open\n | at icechunk/src/repository.rs:225\n | \n |-> error getting object from object store service error\n |-> service error\n |-> unhandled error (SignatureDoesNotMatch)\n `-> Error { code: \"SignatureDoesNotMatch\", message: \"The request signature we calculated does not match the signature you provided. Check your key and signing method.\", aws_request_id:\n \"XS0RHNT4Y0BWAPF4\", s3_extended_request_id: \"dHG0oc/Ok3Gv5peEsPT97TQrQPwpmCn+t4mB56Ucxf+8O5uOm5VRjirQk0BP3ss4+yTCu2qwTnU=\" }\n" - ] - } - ], - "source": [ - "repo = open_icechunk_repo(bucket, store_name, ea_creds)\n", - "session = repo.readonly_session(branch=\"main\")\n", - "xds = xr.open_zarr(session.store, zarr_version=3, chunks={}, consolidated=False)" - ] - }, - { - "cell_type": "code", - "execution_count": 7, - "id": "bfa9d9d6-3f19-4788-bfe2-b1b5efa620c7", - "metadata": {}, - "outputs": [ { "data": { "text/plain": [ - "slice('2025-06-30', '2025-07-02', None)" + "" ] }, - "execution_count": 7, + "execution_count": 2, "metadata": {}, "output_type": "execute_result" } ], "source": [ - "slice(*time_range)" + "import icechunk as ic\n", + "from lambda_function import open_icechunk_repo\n", + "\n", + "repo = open_icechunk_repo('nasa-eodc-public', \"MUR-JPL-L4-GLOB-v4.1-virtual-v1-p2\")\n", + "repo" ] }, { "cell_type": "code", - "execution_count": 13, - "id": "a3850a68-eee5-4133-a9e1-6d4b14bba0ef", + "execution_count": 4, + "id": "3988f3a2", "metadata": {}, "outputs": [ { "data": { "text/plain": [ - "('2025-06-30 09:00:00', '2025-07-02 09:00:00')" + "{'add_time_2025-07-10T23:29:18.786913+00:00',\n", + " 'add_time_2025-07-10T23:36:45.945500+00:00',\n", + " 'add_time_2025-07-10T23:38:12.453998+00:00',\n", + " 'add_time_2025-07-10T23:40:37.782580+00:00',\n", + " 'add_time_2025-07-10T23:44:20.720212+00:00',\n", + " 'add_time_2025-07-10T23:47:36.497484+00:00',\n", + " 'add_time_2025-07-11T00:09:47.540006+00:00',\n", + " 'add_time_2025-07-11T00:11:30.066167+00:00',\n", + " 'add_time_2025-07-11T00:13:05.655129+00:00',\n", + " 'main'}" ] }, - "execution_count": 13, + "execution_count": 4, "metadata": {}, "output_type": "execute_result" } ], "source": [ - "(time_range[0] + \" 09:00:00\", time_range[1] + \" 09:00:00\")" + "repo.list_branches()" ] }, { "cell_type": "code", - "execution_count": 12, - "id": "3140febe-9c40-404b-93b8-fb1fa4c01501", + "execution_count": 3, + "id": "0be83918", "metadata": {}, "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "/opt/conda/lib/python3.11/site-packages/numcodecs/zarr3.py:133: UserWarning: Numcodecs codecs are not in the Zarr version 3 specification and may not be supported by other zarr implementations.\n", + " super().__init__(**codec_config)\n", + "/opt/conda/lib/python3.11/site-packages/numcodecs/zarr3.py:133: UserWarning: Numcodecs codecs are not in the Zarr version 3 specification and may not be supported by other zarr implementations.\n", + " super().__init__(**codec_config)\n", + "/opt/conda/lib/python3.11/site-packages/numcodecs/zarr3.py:133: UserWarning: Numcodecs codecs are not in the Zarr version 3 specification and may not be supported by other zarr implementations.\n", + " super().__init__(**codec_config)\n", + "/opt/conda/lib/python3.11/site-packages/numcodecs/zarr3.py:133: UserWarning: Numcodecs codecs are not in the Zarr version 3 specification and may not be supported by other zarr implementations.\n", + " super().__init__(**codec_config)\n", + "/opt/conda/lib/python3.11/site-packages/numcodecs/zarr3.py:133: UserWarning: Numcodecs codecs are not in the Zarr version 3 specification and may not be supported by other zarr implementations.\n", + " super().__init__(**codec_config)\n", + "/opt/conda/lib/python3.11/site-packages/numcodecs/zarr3.py:133: UserWarning: Numcodecs codecs are not in the Zarr version 3 specification and may not be supported by other zarr implementations.\n", + " super().__init__(**codec_config)\n" + ] + }, { "data": { "text/html": [ @@ -681,29 +627,29 @@ " * lon (lon) float32 144kB -180.0 -180.0 -180.0 ... 180.0 180.0\n", " * time (time) datetime64[ns] 3kB 2024-06-02T09:00:00 ... 2025-...\n", "Data variables:\n", - " analysed_sst (time, lat, lon) float64 2TB dask.array<chunksize=(1, 1023, 2047), meta=np.ndarray>\n", " analysis_error (time, lat, lon) float64 2TB dask.array<chunksize=(1, 1023, 2047), meta=np.ndarray>\n", - " mask (time, lat, lon) float32 1TB dask.array<chunksize=(1, 1023, 2047), meta=np.ndarray>\n", + " analysed_sst (time, lat, lon) float64 2TB dask.array<chunksize=(1, 1023, 2047), meta=np.ndarray>\n", " sea_ice_fraction (time, lat, lon) float64 2TB dask.array<chunksize=(1, 1023, 2047), meta=np.ndarray>\n", + " mask (time, lat, lon) float32 1TB dask.array<chunksize=(1, 1023, 2047), meta=np.ndarray>\n", "Attributes: (12/47)\n", " Conventions: CF-1.7\n", - " title: Daily MUR SST, Interim near-real-time (nrt) p...\n", + " title: Daily MUR SST, Final product\n", " summary: A merged, multi-sensor L4 Foundation SST anal...\n", " references: http://podaac.jpl.nasa.gov/Multi-scale_Ultra-...\n", " institution: Jet Propulsion Laboratory\n", - " history: near real time (nrt) version created at nomin...\n", + " history: created at nominal 4-day latency; replaced nr...\n", " ... ...\n", " project: NASA Making Earth Science Data Records for Us...\n", " publisher_name: GHRSST Project Office\n", " publisher_url: http://www.ghrsst.org\n", " publisher_email: ghrsst-po@nceo.ac.uk\n", " processing_level: L4\n", - " cdm_data_type: grid