Skip to content

Commit ec6c6f5

Browse files
marcenacpThe TensorFlow Datasets Authors
authored andcommitted
Lazy load array_record.
This is a short-term solution to the installation problems on macOS since 4.9.0. This also makes sense in the long-term, because array_record is not used by all users. The long-term solution to fix macOS installation is to build proper wheels for array_record on macos/windows platforms, but this will take more time. PiperOrigin-RevId: 523373538
1 parent 2ca105a commit ec6c6f5

File tree

7 files changed

+26
-10
lines changed

7 files changed

+26
-10
lines changed

tensorflow_datasets/core/data_sources/array_record.py

Lines changed: 4 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -28,10 +28,9 @@
2828
from tensorflow_datasets.core import file_adapters
2929
from tensorflow_datasets.core import splits as splits_lib
3030
from tensorflow_datasets.core.utils import type_utils
31+
from tensorflow_datasets.core.utils.lazy_imports_utils import array_record_data_source
3132
import tree
3233

33-
from array_record.python import array_record_data_source
34-
3534
T = TypeVar('T')
3635

3736
_DEFAULT_ITERATION_STEP = 1000
@@ -55,9 +54,9 @@ class ArrayRecordDataSource(AbcSequence):
5554
None
5655
)
5756
iteration_step: int = _DEFAULT_ITERATION_STEP
58-
data_source: array_record_data_source.ArrayRecordDataSource = (
59-
dataclasses.field(init=False)
60-
)
57+
# In order to lazy load array_record, we don't load
58+
# `array_record_data_source.ArrayRecordDataSource` here.
59+
data_source: Any = dataclasses.field(init=False)
6160
length: int = dataclasses.field(init=False)
6261

6362
def __post_init__(self):

tensorflow_datasets/core/data_sources/array_record_test.py

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -25,8 +25,7 @@
2525
from tensorflow_datasets.core import splits as splits_lib
2626
from tensorflow_datasets.core.data_sources import array_record
2727
from tensorflow_datasets.core.utils import shard_utils
28-
29-
from array_record.python import array_record_data_source
28+
from tensorflow_datasets.core.utils.lazy_imports_utils import array_record_data_source
3029

3130

3231
_FILE_INSTRUCTIONS = [

tensorflow_datasets/core/dataset_builder.py

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -59,7 +59,6 @@
5959
if typing.TYPE_CHECKING:
6060
import apache_beam as beam
6161
from apache_beam.runners import runner
62-
from array_record.python import array_record_data_source
6362

6463
ListOrTreeOrElem = type_utils.ListOrTreeOrElem
6564
Tree = type_utils.Tree

tensorflow_datasets/core/file_adapters.py

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -24,10 +24,9 @@
2424

2525
from etils import epath
2626
from tensorflow_datasets.core.utils import type_utils
27+
from tensorflow_datasets.core.utils.lazy_imports_utils import array_record_module
2728
from tensorflow_datasets.core.utils.lazy_imports_utils import tensorflow as tf
2829

29-
from array_record.python import array_record_module
30-
3130
ExamplePositions = List[Any]
3231

3332

tensorflow_datasets/core/folder_dataset/compute_split_utils.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -30,6 +30,7 @@
3030
from tensorflow_datasets.core import splits as split_lib
3131
from tensorflow_datasets.core import utils
3232
from tensorflow_datasets.core.proto import dataset_info_pb2
33+
from tensorflow_datasets.core.utils.lazy_imports_utils import array_record_module
3334

3435
from google.protobuf import json_format
3536

tensorflow_datasets/core/utils/lazy_imports_utils.py

Lines changed: 17 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -185,9 +185,26 @@ def tf_success_callback(**kwargs):
185185
ensure_tf_version(kwargs["module"])
186186

187187

188+
def array_record_error_callback(**kwargs):
189+
del kwargs
190+
print("\n\n***************************************************************")
191+
print(
192+
"Failed to import ArrayRecord. This probably means that you are running"
193+
" on macOS or Windows. ArrayRecord currently does not work for your"
194+
" infrastructure, because it uses Python bindings in C++. We are actively"
195+
" working on this issue. Thanks for your understanding."
196+
)
197+
print("***************************************************************\n\n")
198+
199+
188200
with lazy_imports(
189201
error_callback=tf_error_callback, success_callback=tf_success_callback
190202
):
191203
import tensorflow as tf # pylint: disable=g-import-not-at-top,unused-import
192204

205+
206+
with lazy_imports(error_callback=array_record_error_callback):
207+
from array_record.python import array_record_data_source # pylint: disable=g-import-not-at-top,unused-import
208+
from array_record.python import array_record_module # pylint: disable=g-import-not-at-top,unused-import
209+
193210
tensorflow = tf

tensorflow_datasets/import_without_tf_test.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -60,6 +60,7 @@ def _generate_examples(self):
6060
def test_import_tfds_without_loading_tf():
6161
with mock.patch.object(logging, 'log_first_n') as log_first_n:
6262
assert 'tensorflow' not in sys.modules
63+
assert 'array_record' not in sys.modules
6364

6465
data_dir = '/tmp/import_without_tf'
6566
builder = DummyDataset(data_dir=data_dir)
@@ -77,3 +78,4 @@ def test_import_tfds_without_loading_tf():
7778
# No warning concerning TensorFlow DTypes was dispatched while loading
7879
assert not log_first_n.called
7980
assert 'tensorflow' not in sys.modules
81+
assert 'array_record' in sys.modules

0 commit comments

Comments
 (0)