From 0d649a7ed022ebd66e7c3bc5738f938d6cfb8c43 Mon Sep 17 00:00:00 2001 From: Dengke Tang Date: Tue, 2 Sep 2025 15:53:40 -0700 Subject: [PATCH 1/8] latest submodules --- crt/aws-c-io | 2 +- crt/aws-lc | 2 +- crt/s2n | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/crt/aws-c-io b/crt/aws-c-io index 9c7f98dcb..ba5cb6de2 160000 --- a/crt/aws-c-io +++ b/crt/aws-c-io @@ -1 +1 @@ -Subproject commit 9c7f98dcb083bd705eeb323e77868b1e2c9d4e73 +Subproject commit ba5cb6de2cf12bfb154f302f6dedddf81c0ce4cd diff --git a/crt/aws-lc b/crt/aws-lc index 04875dbbd..98500e8bc 160000 --- a/crt/aws-lc +++ b/crt/aws-lc @@ -1 +1 @@ -Subproject commit 04875dbbd6610a91855dcdc8edc268da289cb6d9 +Subproject commit 98500e8bc7dc3e3e5871519abf12cdc781ebe4e1 diff --git a/crt/s2n b/crt/s2n index 418313c27..a7862238a 160000 --- a/crt/s2n +++ b/crt/s2n @@ -1 +1 @@ -Subproject commit 418313c274d9cb72984dcd6e5e917740bc180664 +Subproject commit a7862238a9d6703e0a3e2d7ea4341de8d57429bd From fe3cbc8ba9d41c4d41ac3d8e604d9e0aeac19470 Mon Sep 17 00:00:00 2001 From: Dengke Tang Date: Tue, 2 Sep 2025 15:54:09 -0700 Subject: [PATCH 2/8] all latest submodules --- crt/aws-c-common | 2 +- crt/aws-c-s3 | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/crt/aws-c-common b/crt/aws-c-common index 2b67a658e..29af1e537 160000 --- a/crt/aws-c-common +++ b/crt/aws-c-common @@ -1 +1 @@ -Subproject commit 2b67a658e461520f1de20d64342b91ddcedc7ebb +Subproject commit 29af1e5373b75bc26c33814e553bed7481ecffcc diff --git a/crt/aws-c-s3 b/crt/aws-c-s3 index 3afa5d08b..9cbb9516a 160000 --- a/crt/aws-c-s3 +++ b/crt/aws-c-s3 @@ -1 +1 @@ -Subproject commit 3afa5d08be95e82199a153e3abbe59bbb42638d7 +Subproject commit 9cbb9516a9818dc50a65eb1365499cd1cd18302a From 322c67ceecc555f9364cded016a46311f4668b44 Mon Sep 17 00:00:00 2001 From: Dengke Tang Date: Wed, 3 Sep 2025 10:24:50 -0700 Subject: [PATCH 3/8] fio options binding --- awscrt/s3.py | 81 +++++++++++++++++++++++++++++++++++++++- source/s3_client.c | 16 +++++++- source/s3_meta_request.c | 14 +++++++ 3 files changed, 109 insertions(+), 2 deletions(-) diff --git a/awscrt/s3.py b/awscrt/s3.py index 7061e699a..475decebb 100644 --- a/awscrt/s3.py +++ b/awscrt/s3.py @@ -142,6 +142,43 @@ class S3ChecksumConfig: """Whether to retrieve and validate response checksums.""" +@dataclass +class S3FileIoOptions: + """ + Controls how client performance file I/O operations. + Only applies to the file based workload. + Notes: only applies when `send_filepath` is set from the request. + """ + + should_stream: bool = False + """ + Skip buffering the part in memory before sending the request. + If set, set the `disk_throughput` to be reasonable align with the available disk throughput. + Otherwise, the transfer may fail with connection starvation. + """ + + disk_throughput_gbps: (Optional[float]) = 0.0 + """ + The estimated disk throughput. Only be applied when `streaming_upload` is true. + in gigabits per second (Gbps). + When doing upload with streaming, it's important to set the disk throughput to prevent the connection starvation. + + Notes: There are possibilities that cannot reach the all available disk throughput: + 1. Disk is busy with other applications + 2. OS Cache may cap the throughput, use `direct_io` to get around this. + """ + + direct_io: bool = False + """ + Enable direct IO to bypass the OS cache. Helpful when the disk I/O outperforms the kernel cache. + Notes: + - Only supported on linux for now. + - Only supports upload for now. + - Check NOTES for O_DIRECT for additional info https://man7.org/linux/man-pages/man2/openat.2.html + In summary, O_DIRECT is a potentially powerful tool that should be used with caution. + """ + + class S3Client(NativeResource): """S3 client @@ -217,6 +254,11 @@ class S3Client(NativeResource): This option is only supported on Linux, MacOS, and platforms that have either SO_BINDTODEVICE or IP_BOUND_IF. It is not supported on Windows. `AWS_ERROR_PLATFORM_NOT_SUPPORTED` will be raised on unsupported platforms. On Linux, SO_BINDTODEVICE is used and requires kernel version >= 5.7 or root privileges. + + fio_options: (Optional[S3FileIoOptions]) + If set, this controls how the client interact with file I/O. + If not set, a default options will be created to avoid memory issue based on the size of file. + Note: Only applies when the request created with `send_filepath` """ __slots__ = ('shutdown_event', '_region') @@ -235,7 +277,8 @@ def __init__( throughput_target_gbps=None, enable_s3express=False, memory_limit=None, - network_interface_names: Optional[Sequence[str]] = None): + network_interface_names: Optional[Sequence[str]] = None, + fio_options: Optional['S3FileIoOptions'] = None): assert isinstance(bootstrap, ClientBootstrap) or bootstrap is None assert isinstance(region, str) assert isinstance(signing_config, AwsSigningConfig) or signing_config is None @@ -249,6 +292,7 @@ def __init__( float) or throughput_target_gbps is None assert isinstance(enable_s3express, bool) or enable_s3express is None assert isinstance(network_interface_names, Sequence) or network_interface_names is None + assert isinstance(fio_options, S3FileIoOptions) or fio_options is None if credential_provider and signing_config: raise ValueError("'credential_provider' has been deprecated in favor of 'signing_config'. " @@ -288,6 +332,15 @@ def on_shutdown(): # ensure this is a list, so it's simpler to process in C if not isinstance(network_interface_names, list): network_interface_names = list(network_interface_names) + fio_options_set = False + should_stream = False + disk_throughput_gbps = 0.0 + direct_io = False + if fio_options is not None: + fio_options_set = True + should_stream = fio_options.should_stream + disk_throughput_gbps = fio_options.disk_throughput_gbps + direct_io = fio_options.direct_io self._binding = _awscrt.s3_client_new( bootstrap, @@ -303,6 +356,10 @@ def on_shutdown(): enable_s3express, memory_limit, network_interface_names, + fio_options_set, + should_stream, + disk_throughput_gbps, + direct_io, s3_client_core) def make_request( @@ -318,6 +375,7 @@ def make_request( checksum_config=None, part_size=None, multipart_upload_threshold=None, + fio_options=None, on_headers=None, on_body=None, on_done=None, @@ -398,6 +456,11 @@ def make_request( If both `part_size` and `multipart_upload_threshold` are not set, the values from `aws_s3_client_config` are used. + fio_options: (Optional[S3FileIoOptions]) + If set, this overrides the client fio_options to control how this request interact with file I/O. + If not set, a default options will be created to avoid memory issue based on the size of file. + Note: Only applies when the request created with `send_filepath` + on_headers: Optional callback invoked as the response received, and even the API request has been split into multiple parts, this callback will only be invoked once as it's just making one API request to S3. @@ -483,6 +546,7 @@ def make_request( checksum_config=checksum_config, part_size=part_size, multipart_upload_threshold=multipart_upload_threshold, + fio_options=fio_options, on_headers=on_headers, on_body=on_body, on_done=on_done, @@ -520,6 +584,7 @@ def __init__( checksum_config=None, part_size=None, multipart_upload_threshold=None, + fio_options=None, on_headers=None, on_body=None, on_done=None, @@ -532,6 +597,7 @@ def __init__( assert callable(on_done) or on_done is None assert isinstance(part_size, int) or part_size is None assert isinstance(multipart_upload_threshold, int) or multipart_upload_threshold is None + assert isinstance(fio_options, S3FileIoOptions) or fio_options is None if type == S3RequestType.DEFAULT and not operation_name: raise ValueError("'operation_name' must be set when using S3RequestType.DEFAULT") @@ -555,6 +621,15 @@ def __init__( if checksum_config.location is not None: checksum_location = checksum_config.location.value validate_response_checksum = checksum_config.validate_response + fio_options_set = False + should_stream = False + disk_throughput_gbps = 0.0 + direct_io = False + if fio_options is not None: + fio_options_set = True + should_stream = fio_options.should_stream + disk_throughput_gbps = fio_options.disk_throughput_gbps + direct_io = fio_options.direct_io s3_request_core = _S3RequestCore( request, @@ -583,6 +658,10 @@ def __init__( validate_response_checksum, part_size, multipart_upload_threshold, + fio_options_set, + should_stream, + disk_throughput_gbps, + direct_io, s3_request_core) @property diff --git a/source/s3_client.c b/source/s3_client.c index 8eafd03b5..613da1f23 100644 --- a/source/s3_client.c +++ b/source/s3_client.c @@ -258,11 +258,15 @@ PyObject *aws_py_s3_client_new(PyObject *self, PyObject *args) { int enable_s3express; /* p */ uint64_t mem_limit; /* K */ PyObject *network_interface_names_py; /* O */ + int fio_options_set; /* p - boolean predicate */ + int should_stream; /* p - boolean predicate */ + double disk_throughput_gbps; /* d */ + int direct_io; /* p - boolean predicate */ PyObject *py_core; /* O */ if (!PyArg_ParseTuple( args, - "OOOOOs#iKKdpKOO", + "OOOOOs#iKKdpKOppdpO", &bootstrap_py, &signing_config_py, &credential_provider_py, @@ -277,6 +281,10 @@ PyObject *aws_py_s3_client_new(PyObject *self, PyObject *args) { &enable_s3express, &mem_limit, &network_interface_names_py, + &fio_options_set, + &should_stream, + &disk_throughput_gbps, + &direct_io, &py_core)) { return NULL; } @@ -366,6 +374,11 @@ PyObject *aws_py_s3_client_new(PyObject *self, PyObject *args) { } } } + struct aws_s3_file_io_options fio_opts = { + .should_stream = should_stream, + .disk_throughput_gbps = disk_throughput_gbps, + .direct_io = direct_io, + }; struct aws_s3_client_config s3_config = { .region = region, @@ -382,6 +395,7 @@ PyObject *aws_py_s3_client_new(PyObject *self, PyObject *args) { .enable_s3express = enable_s3express, .network_interface_names_array = network_interface_names, .num_network_interface_names = num_network_interface_names, + .fio_opts = fio_options_set ? &fio_opts : NULL, }; s3_client->native = aws_s3_client_new(allocator, &s3_config); diff --git a/source/s3_meta_request.c b/source/s3_meta_request.c index 56ac5654f..081b5e598 100644 --- a/source/s3_meta_request.c +++ b/source/s3_meta_request.c @@ -332,6 +332,10 @@ PyObject *aws_py_s3_client_make_meta_request(PyObject *self, PyObject *args) { int validate_response_checksum; /* p - boolean predicate */ uint64_t part_size; /* K */ uint64_t multipart_upload_threshold; /* K */ + int fio_options_set; /* p - boolean predicate */ + int should_stream; /* p - boolean predicate */ + double disk_throughput_gbps; /* d */ + int direct_io; /* p - boolean predicate */ PyObject *py_core; /* O */ if (!PyArg_ParseTuple( args, @@ -352,6 +356,10 @@ PyObject *aws_py_s3_client_make_meta_request(PyObject *self, PyObject *args) { &validate_response_checksum, &part_size, &multipart_upload_threshold, + &fio_options_set, + &should_stream, + &disk_throughput_gbps, + &direct_io, &py_core)) { return NULL; } @@ -407,6 +415,11 @@ PyObject *aws_py_s3_client_make_meta_request(PyObject *self, PyObject *args) { aws_mem_release(allocator, meta_request); return NULL; } + struct aws_s3_file_io_options fio_opts = { + .should_stream = should_stream, + .disk_throughput_gbps = disk_throughput_gbps, + .direct_io = direct_io, + }; meta_request->py_core = py_core; Py_INCREF(meta_request->py_core); @@ -426,6 +439,7 @@ PyObject *aws_py_s3_client_make_meta_request(PyObject *self, PyObject *args) { .progress_callback = s_s3_request_on_progress, .part_size = part_size, .multipart_upload_threshold = multipart_upload_threshold, + .fio_opts = fio_options_set ? &fio_opts : NULL, .user_data = meta_request, }; From 01957824ee41575b378a78e2fdb0e87a5a670f63 Mon Sep 17 00:00:00 2001 From: Dengke Tang Date: Wed, 3 Sep 2025 10:34:22 -0700 Subject: [PATCH 4/8] make sure it passed down correctly --- test/test_s3.py | 15 ++++++++++++++- 1 file changed, 14 insertions(+), 1 deletion(-) diff --git a/test/test_s3.py b/test/test_s3.py index 4e8785eed..98bc09538 100644 --- a/test/test_s3.py +++ b/test/test_s3.py @@ -25,6 +25,7 @@ S3RequestType, S3ResponseError, CrossProcessLock, + S3FileIoOptions, create_default_s3_signing_config, get_optimized_platforms, ) @@ -397,12 +398,14 @@ def _test_s3_put_get_object( enable_s3express=False, mem_limit=None, **kwargs): + fio_options = S3FileIoOptions(should_stream=True, disk_throughput_gbps=10.0, direct_io=True) s3_client = s3_client_new( False, self.region, self.part_size, enable_s3express=enable_s3express, - mem_limit=mem_limit) + mem_limit=mem_limit, + fio_options=fio_options) signing_config = None if enable_s3express: signing_config = AwsSigningConfig( @@ -590,6 +593,16 @@ def test_put_object_filepath(self): request = self._put_object_request(None, content_length) self._test_s3_put_get_object(request, S3RequestType.PUT_OBJECT, send_filepath=self.temp_put_obj_file_path) + def test_put_object_filepath_with_fio_options(self): + content_length = os.stat(self.temp_put_obj_file_path).st_size + request = self._put_object_request(None, content_length) + fio_options = S3FileIoOptions(should_stream=True, disk_throughput_gbps=10.0, direct_io=True) + self._test_s3_put_get_object( + request, + S3RequestType.PUT_OBJECT, + send_filepath=self.temp_put_obj_file_path, + fio_options=fio_options) + def test_put_object_filepath_unknown_content_length(self): content_length = os.stat(self.temp_put_obj_file_path).st_size request = self._put_object_request(None, content_length, unknown_content_length=True) From ef74f3173a03d509d2a47ba84c7d7983035d78d4 Mon Sep 17 00:00:00 2001 From: Dengke Tang Date: Wed, 3 Sep 2025 10:36:44 -0700 Subject: [PATCH 5/8] update --- crt/aws-c-s3 | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/crt/aws-c-s3 b/crt/aws-c-s3 index 9cbb9516a..6235fd562 160000 --- a/crt/aws-c-s3 +++ b/crt/aws-c-s3 @@ -1 +1 @@ -Subproject commit 9cbb9516a9818dc50a65eb1365499cd1cd18302a +Subproject commit 6235fd5621f351d584c6ce4302a358eaa6329b4e From aa0a546544337a6332e5e52c3f97043aa3115eb1 Mon Sep 17 00:00:00 2001 From: Dengke Tang Date: Wed, 3 Sep 2025 10:48:48 -0700 Subject: [PATCH 6/8] add the magic string --- source/s3_meta_request.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/source/s3_meta_request.c b/source/s3_meta_request.c index 081b5e598..466b3cea7 100644 --- a/source/s3_meta_request.c +++ b/source/s3_meta_request.c @@ -339,7 +339,7 @@ PyObject *aws_py_s3_client_make_meta_request(PyObject *self, PyObject *args) { PyObject *py_core; /* O */ if (!PyArg_ParseTuple( args, - "OOOizOOzzs#iipKKO", + "OOOizOOzzs#iipKKppdpO", &py_s3_request, &s3_client_py, &http_request_py, From 28f73fa56ba8b30515e679dcc2b2e2bb653a370b Mon Sep 17 00:00:00 2001 From: Dengke Tang Date: Wed, 3 Sep 2025 10:52:51 -0700 Subject: [PATCH 7/8] well, looks like we used a hack in the test --- test/test_s3.py | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/test/test_s3.py b/test/test_s3.py index 98bc09538..e8390526a 100644 --- a/test/test_s3.py +++ b/test/test_s3.py @@ -398,14 +398,12 @@ def _test_s3_put_get_object( enable_s3express=False, mem_limit=None, **kwargs): - fio_options = S3FileIoOptions(should_stream=True, disk_throughput_gbps=10.0, direct_io=True) s3_client = s3_client_new( False, self.region, self.part_size, enable_s3express=enable_s3express, - mem_limit=mem_limit, - fio_options=fio_options) + mem_limit=mem_limit) signing_config = None if enable_s3express: signing_config = AwsSigningConfig( From de2f6bb6aae62dc7449ad748c917b654a3dff3d3 Mon Sep 17 00:00:00 2001 From: Dengke Tang Date: Wed, 3 Sep 2025 14:29:35 -0700 Subject: [PATCH 8/8] include `jitterentropy` from aws-lc --- MANIFEST.in | 1 + source/s3_client.c | 1 + source/s3_meta_request.c | 1 + 3 files changed, 3 insertions(+) diff --git a/MANIFEST.in b/MANIFEST.in index 2a8f6f649..829c4f8b8 100644 --- a/MANIFEST.in +++ b/MANIFEST.in @@ -44,6 +44,7 @@ graft crt/aws-lc/tests/compiler_features_tests prune crt/aws-lc/third_party graft crt/aws-lc/third_party/fiat graft crt/aws-lc/third_party/s2n-bignum +graft crt/aws-lc/third_party/jitterentropy prune crt/aws-lc/tool prune crt/aws-lc/util include crt/aws-lc/util/fipstools/CMakeLists.txt diff --git a/source/s3_client.c b/source/s3_client.c index 613da1f23..687d64d34 100644 --- a/source/s3_client.c +++ b/source/s3_client.c @@ -395,6 +395,7 @@ PyObject *aws_py_s3_client_new(PyObject *self, PyObject *args) { .enable_s3express = enable_s3express, .network_interface_names_array = network_interface_names, .num_network_interface_names = num_network_interface_names, + /* If fio options not set, let native code to decide the default instead */ .fio_opts = fio_options_set ? &fio_opts : NULL, }; diff --git a/source/s3_meta_request.c b/source/s3_meta_request.c index 466b3cea7..1c228e3a8 100644 --- a/source/s3_meta_request.c +++ b/source/s3_meta_request.c @@ -439,6 +439,7 @@ PyObject *aws_py_s3_client_make_meta_request(PyObject *self, PyObject *args) { .progress_callback = s_s3_request_on_progress, .part_size = part_size, .multipart_upload_threshold = multipart_upload_threshold, + /* If fio options not set, let native code to decide the default instead */ .fio_opts = fio_options_set ? &fio_opts : NULL, .user_data = meta_request, };