From 7a9a2c4b743bb2d80f67294157ad89917bffcd96 Mon Sep 17 00:00:00 2001 From: Josh Rosen Date: Fri, 21 Sep 2018 23:23:05 -0700 Subject: [PATCH 1/5] Add failing regression test for opaque whiteout handling in flattener --- BUILD.bazel | 11 ++++ client_v2_2_unit_tests.py | 118 ++++++++++++++++++++++++++++++++++++++ 2 files changed, 129 insertions(+) create mode 100644 client_v2_2_unit_tests.py diff --git a/BUILD.bazel b/BUILD.bazel index fe131fb95..feb0df4e3 100755 --- a/BUILD.bazel +++ b/BUILD.bazel @@ -120,3 +120,14 @@ sh_test( ":pusher.par", ], ) + +py_test( + name = "client_v2_2_unit_tests", + size = "large", + srcs = [ + "client_v2_2_unit_tests.py", + ":containerregistry", + ], + main = "client_v2_2_unit_tests.py", +) + diff --git a/client_v2_2_unit_tests.py b/client_v2_2_unit_tests.py new file mode 100644 index 000000000..92dec2e28 --- /dev/null +++ b/client_v2_2_unit_tests.py @@ -0,0 +1,118 @@ +# Copyright 2018 Google Inc. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from collections import OrderedDict +import io +import tarfile +import unittest + +from containerregistry.client.v2_2 import docker_image as v2_2_image + + +class MockImage(object): + def __init__(self): + self._fs_layers = OrderedDict() + + def add_layer(self, filenames): + buf = io.BytesIO() + with tarfile.open(mode='w:', fileobj=buf) as tf: + for name in filenames: + tarinfo = tarfile.TarInfo(name) + if name.endswith("/"): + tarinfo.type = tarfile.DIRTYPE + tf.addfile(tarinfo) + buf.seek(0) + new_layer_id = str(len(self._fs_layers)) + self._fs_layers[new_layer_id] = buf.getvalue() + + def diff_ids(self): + return reversed(self._fs_layers.keys()) + + def uncompressed_layer(self, layer_id): + return self._fs_layers[layer_id] + + +class TestExtract(unittest.TestCase): + + def _test_flatten(self, layer_filenames, expected_output_filenames): + img = MockImage() + for filenames in layer_filenames: + img.add_layer(filenames) + buf = io.BytesIO() + with tarfile.open(mode='w:', fileobj=buf) as tar: + v2_2_image.extract(img, tar) + buf.seek(0) + output_filenames = [] + with tarfile.open(mode='r', fileobj=buf) as tar: + for tarinfo in tar: + if tarinfo.isdir(): + output_filenames.append(tarinfo.name + "/") + else: + output_filenames.append(tarinfo.name) + self.assertEqual(output_filenames, expected_output_filenames) + + def test_single_layer(self): + self._test_flatten( + [["/directory/", "/file"]], + ["/directory/", "/file"] + ) + + def test_purely_additive_layers(self): + self._test_flatten( + [ + ["dir/", "dir/file1", "file"], + ["dir/file2", "file2"] + ], + ["dir/file2", "file2", "dir/", "dir/file1", "file"] + ) + + def test_single_file_whiteout(self): + self._test_flatten( + [ + ["/foo"], + ["/.wh.foo"] + ], + [] + ) + + def test_parent_directory_whiteout(self): + self._test_flatten( + [ + ["/x/a/", "/x/b/", "/x/b/1"], + ["/x/.wh.b"] + ], + ["/x/a/"] + ) + + def test_opaque_whiteout(self): + # Examples from https://github.com/opencontainers/image-spec/blob/master/layer.md#whiteouts + self._test_flatten( + [ + ["a/", "a/b/", "a/b/c/", "a/b/c/bar"], + ["a/", "a/.wh..wh..opq", "a/b/", "a/b/c/", "a/b/c/foo"], + ], + ["a/", "a/b/", "a/b/c/", "a/b/c/foo"], + ) + + self._test_flatten( + [ + ["a/", "a/b/", "a/b/c/", "a/b/c/bar"], + ["a/", "a/b/", "a/b/c/", "a/b/c/foo", "a/.wh..wh..opq"], + ], + ["a/", "a/b/", "a/b/c/", "a/b/c/foo"], + ) + + +if __name__ == "__main__": + unittest.main(verbosity=2) From 496d266286c0abffd1414471ef268b74de74d7da Mon Sep 17 00:00:00 2001 From: Josh Rosen Date: Fri, 21 Sep 2018 23:38:32 -0700 Subject: [PATCH 2/5] Fix opqaue whiteout handling in flattener. --- client/v2_2/docker_image_.py | 15 ++++++++++++--- 1 file changed, 12 insertions(+), 3 deletions(-) diff --git a/client/v2_2/docker_image_.py b/client/v2_2/docker_image_.py index 53e2488b5..7ccc44d07 100755 --- a/client/v2_2/docker_image_.py +++ b/client/v2_2/docker_image_.py @@ -798,18 +798,19 @@ def __exit__(self, unused_type, unused_value, unused_traceback): pass -def _in_whiteout_dir(fs, name): +def _in_whiteout_dir(fs, opaque_whiteouts, name): while name: dirname = os.path.dirname(name) if name == dirname: break - if fs.get(dirname): + if fs.get(dirname) or dirname in opaque_whiteouts: return True name = dirname return False _WHITEOUT_PREFIX = '.wh.' +_OPAQUE_WHITEOUT_FILENAME = '.wh..wh..opq' def extract(image, tar): @@ -823,17 +824,24 @@ def extract(image, tar): # to whether they are a tombstone or not. fs = {} + opaque_whiteouts_in_higher_layers = set() + # Walk the layers, topmost first and add files. If we've seen them in a # higher layer then we skip them for layer in image.diff_ids(): buf = io.BytesIO(image.uncompressed_layer(layer)) with tarfile.open(mode='r:', fileobj=buf) as layer_tar: + opaque_whiteouts_in_this_layer = [] for tarinfo in layer_tar: # If we see a whiteout file, then don't add anything to the tarball # but ensure that any lower layers don't add a file with the whited # out name. basename = os.path.basename(tarinfo.name) dirname = os.path.dirname(tarinfo.name) + + if basename == _OPAQUE_WHITEOUT_FILENAME: + opaque_whiteouts_in_this_layer.append(dirname) + tombstone = basename.startswith(_WHITEOUT_PREFIX) if tombstone: basename = basename[len(_WHITEOUT_PREFIX):] @@ -845,7 +853,7 @@ def extract(image, tar): continue # Check for a whited out parent directory - if _in_whiteout_dir(fs, name): + if _in_whiteout_dir(fs, opaque_whiteouts_in_higher_layers, name): continue # Mark this file as handled by adding its name. @@ -857,3 +865,4 @@ def extract(image, tar): tar.addfile(tarinfo, fileobj=layer_tar.extractfile(tarinfo)) else: tar.addfile(tarinfo, fileobj=None) + opaque_whiteouts_in_higher_layers.update(opaque_whiteouts_in_this_layer) From 32043b77aeac0734bfca8d48d8c5071fe7ef576e Mon Sep 17 00:00:00 2001 From: Josh Rosen Date: Mon, 24 Sep 2018 13:17:45 -0700 Subject: [PATCH 3/5] Add test for parent directory preservation. --- client_v2_2_unit_tests.py | 18 +++++++++++++++++- 1 file changed, 17 insertions(+), 1 deletion(-) diff --git a/client_v2_2_unit_tests.py b/client_v2_2_unit_tests.py index 92dec2e28..f6a07ed5a 100644 --- a/client_v2_2_unit_tests.py +++ b/client_v2_2_unit_tests.py @@ -96,7 +96,7 @@ def test_parent_directory_whiteout(self): ) def test_opaque_whiteout(self): - # Examples from https://github.com/opencontainers/image-spec/blob/master/layer.md#whiteouts + # Example from https://github.com/opencontainers/image-spec/blob/master/layer.md#whiteouts self._test_flatten( [ ["a/", "a/b/", "a/b/c/", "a/b/c/bar"], @@ -113,6 +113,22 @@ def test_opaque_whiteout(self): ["a/", "a/b/", "a/b/c/", "a/b/c/foo"], ) + def test_opaque_whiteout_preserves_parent_directory(self): + # Example from https://github.com/opencontainers/image-spec/blob/master/layer.md#whiteouts + self._test_flatten( + [ + [ + "bin/", + "bin/my-app-binary", + "bin/my-app-tools", + "bin/tools/", + "bin/tools/my-app-tool-one" + ], + ["bin/.wh..wh..opq"], + ], + ["bin/"], + ) + if __name__ == "__main__": unittest.main(verbosity=2) From 6a8dc5519f96d1f49e7d1a3cd1b5254f852a69bd Mon Sep 17 00:00:00 2001 From: Josh Rosen Date: Mon, 24 Sep 2018 13:43:13 -0700 Subject: [PATCH 4/5] Add basic test for file contents. --- client_v2_2_unit_tests.py | 47 +++++++++++++++++++++++++++++++++------ 1 file changed, 40 insertions(+), 7 deletions(-) diff --git a/client_v2_2_unit_tests.py b/client_v2_2_unit_tests.py index f6a07ed5a..55792c41d 100644 --- a/client_v2_2_unit_tests.py +++ b/client_v2_2_unit_tests.py @@ -14,6 +14,7 @@ from collections import OrderedDict import io +from StringIO import StringIO import tarfile import unittest @@ -21,17 +22,31 @@ class MockImage(object): + """Mock of DockerImage, implementing only the methods called by extract().""" + def __init__(self): self._fs_layers = OrderedDict() def add_layer(self, filenames): + """Add a layer to the image. + + Args: + filenames: a list of filenames or (filename, content) pairs. Filenames + with trailing slashes become directory entries in the generated tar + """ buf = io.BytesIO() with tarfile.open(mode='w:', fileobj=buf) as tf: - for name in filenames: + for entry in filenames: + if (isinstance(entry, basestring)): + name = entry + content = "" + else: + (name, content) = entry tarinfo = tarfile.TarInfo(name) + tarinfo.size = len(content) if name.endswith("/"): tarinfo.type = tarfile.DIRTYPE - tf.addfile(tarinfo) + tf.addfile(tarinfo, fileobj=(StringIO(content) if content else None)) buf.seek(0) new_layer_id = str(len(self._fs_layers)) self._fs_layers[new_layer_id] = buf.getvalue() @@ -45,22 +60,31 @@ def uncompressed_layer(self, layer_id): class TestExtract(unittest.TestCase): - def _test_flatten(self, layer_filenames, expected_output_filenames): + def _test_flatten(self, layer_filenames, expected_flattened_output): + # Construct a mock DockerImage with the specified layers: img = MockImage() for filenames in layer_filenames: img.add_layer(filenames) buf = io.BytesIO() + + # Run the actual extract logic: with tarfile.open(mode='w:', fileobj=buf) as tar: v2_2_image.extract(img, tar) + + # Compare the extract() output to the expected results: buf.seek(0) - output_filenames = [] + flattened_output = [] with tarfile.open(mode='r', fileobj=buf) as tar: for tarinfo in tar: if tarinfo.isdir(): - output_filenames.append(tarinfo.name + "/") + flattened_output.append(tarinfo.name + "/") else: - output_filenames.append(tarinfo.name) - self.assertEqual(output_filenames, expected_output_filenames) + contents = tar.extractfile(tarinfo).read() + if contents: + flattened_output.append((tarinfo.name, contents)) + else: + flattened_output.append(tarinfo.name) + self.assertEqual(flattened_output, expected_flattened_output) def test_single_layer(self): self._test_flatten( @@ -77,6 +101,15 @@ def test_purely_additive_layers(self): ["dir/file2", "file2", "dir/", "dir/file1", "file"] ) + def test_highest_layer_of_file_takes_precedence(self): + self._test_flatten( + [ + [("file", "a")], + [("file", "b")] + ], + [("file", "b")] + ) + def test_single_file_whiteout(self): self._test_flatten( [ From 1b32171b0957ca2ae388f85865bebfe65e37bf86 Mon Sep 17 00:00:00 2001 From: Josh Rosen Date: Mon, 24 Sep 2018 13:46:36 -0700 Subject: [PATCH 5/5] Add a comment about the opaque whiteout --- client/v2_2/docker_image_.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/client/v2_2/docker_image_.py b/client/v2_2/docker_image_.py index 7ccc44d07..66e69375b 100755 --- a/client/v2_2/docker_image_.py +++ b/client/v2_2/docker_image_.py @@ -839,6 +839,9 @@ def extract(image, tar): basename = os.path.basename(tarinfo.name) dirname = os.path.dirname(tarinfo.name) + # If we see an opaque whiteout file, then don't add anything to the + # tarball but ensure that any lower layers don't add files or + # directories which are siblings of the whiteout file. if basename == _OPAQUE_WHITEOUT_FILENAME: opaque_whiteouts_in_this_layer.append(dirname)