From 490fa0736cba440e9d544341b30c3b612baeb154 Mon Sep 17 00:00:00 2001 From: Alecto Irene Perez Date: Mon, 6 Jan 2025 17:05:00 -0500 Subject: [PATCH 1/8] Add ASSERT_CONTENTS_EQUAL test macro in testing.cmake Checks if the contents of a file matches the given input --- cmake/testing.cmake | 13 +++++++++++++ 1 file changed, 13 insertions(+) diff --git a/cmake/testing.cmake b/cmake/testing.cmake index bbc1bf27..a46a9f52 100644 --- a/cmake/testing.cmake +++ b/cmake/testing.cmake @@ -79,3 +79,16 @@ function(ASSERT_NOT_EXISTS file) message(FATAL_ERROR "assertion failed: file ${file} exists") endif() endfunction() + +function(ASSERT_CONTENTS_EQUAL file content) + if(EXISTS ${file}) + file(READ ${file} file_content) + if(content STREQUAL file_content) + message(STATUS "test passed: '${file}' exists and contains '${content}'") + else() + message(FATAL_ERROR "assertion failed: file '${file}' does not contain expected content.") + endif() + else() + message(FATAL_ERROR "assertion failed: file '${file} does not exist") + endif() +endfunction() From d3e245fe62c670f64bc3adadf8c5403fde647bef Mon Sep 17 00:00:00 2001 From: Alecto Irene Perez Date: Mon, 6 Jan 2025 17:07:12 -0500 Subject: [PATCH 2/8] Use shorter hashes with CPM_SOURCE_CACHE (#624) Uses shorter hashes with CPM_SOURCE_CACHE. Falls back to a longer hash if necessary (ie, if there's a collision with an existing hash). See: https://github.com/cpm-cmake/CPM.cmake/issues/624 --- cmake/CPM.cmake | 56 +++++++++++++++++++++ test/unit/get_shortest_hash.cmake | 84 +++++++++++++++++++++++++++++++ 2 files changed, 140 insertions(+) create mode 100644 test/unit/get_shortest_hash.cmake diff --git a/cmake/CPM.cmake b/cmake/CPM.cmake index 80e6896c..224637aa 100644 --- a/cmake/CPM.cmake +++ b/cmake/CPM.cmake @@ -198,6 +198,52 @@ function(cpm_package_name_from_git_uri URI RESULT) endif() endfunction() + +# Find the shortest hash that can be used +# eg, if origin_hash is cccb77ae9609d2768ed80dd42cec54f77b1f1455 +# the following files will be checked, until one is found that +# is either empty (allowing us to assign origin_hash), or whose contents matches +# ${origin_hash} +# +# - .../cccb.hash +# - .../cccb77ae.hash +# - .../cccb77ae9609.hash +# - .../cccb77ae9609d276.hash +# etc +# We will be able to use a shorter path with very high probability, but in the +# (rare) event that the first couple characters collide, we will check +# longer and longer substrings. +function(cpm_get_shortest_hash source_cache_dir origin_hash short_hash_output_var) + foreach(len RANGE 4 40 4) + string(SUBSTRING "${origin_hash}" 0 ${len} short_hash) + set(hash_lock ${source_cache_dir}/${short_hash}.lock) + set(hash_fp ${source_cache_dir}/${short_hash}.hash) + # Take a lock, so we don't have a race condition with another instance + # of cmake. We will release this lock when we can, however, if there + # is an error, we want to ensure it gets released on it's own on exit + # from the function. + file(LOCK ${hash_lock} GUARD FUNCTION) + + # Load the contents of .../${short_hash}.hash + file(TOUCH ${hash_fp}) + file(READ ${hash_fp} hash_fp_contents) + + if(hash_fp_contents STREQUAL "") + # Write the origin hash + file(WRITE ${hash_fp} ${origin_hash}) + file(LOCK ${hash_lock} RELEASE) + break() + elseif(hash_fp_contents STREQUAL origin_hash) + file(LOCK ${hash_lock} RELEASE) + break() + else() + file(LOCK ${hash_lock} RELEASE) + endif() + endforeach() + set(${short_hash_output_var} "${short_hash}" PARENT_SCOPE) +endfunction() + + # Try to infer package name and version from a url function(cpm_package_name_and_ver_from_url url outName outVer) if(url MATCHES "[/\\?]([a-zA-Z0-9_\\.-]+)\\.(tar|tar\\.gz|tar\\.bz2|zip|ZIP)(\\?|/|$)") @@ -794,9 +840,19 @@ function(CPMAddPackage) set(download_directory ${CPM_SOURCE_CACHE}/${lower_case_name}/${CPM_ARGS_CUSTOM_CACHE_KEY}) elseif(CPM_USE_NAMED_CACHE_DIRECTORIES) string(SHA1 origin_hash "${origin_parameters};NEW_CACHE_STRUCTURE_TAG") + cpm_get_shortest_hash( + "${CPM_SOURCE_CACHE}/${lower_case_name}" # source cache directory + "${origin_hash}" # Input hash + origin_hash # Computed hash + ) set(download_directory ${CPM_SOURCE_CACHE}/${lower_case_name}/${origin_hash}/${CPM_ARGS_NAME}) else() string(SHA1 origin_hash "${origin_parameters}") + cpm_get_shortest_hash( + "${CPM_SOURCE_CACHE}/${lower_case_name}" # source cache directory + "${origin_hash}" # Input hash + origin_hash # Computed hash + ) set(download_directory ${CPM_SOURCE_CACHE}/${lower_case_name}/${origin_hash}) endif() # Expand `download_directory` relative path. This is important because EXISTS doesn't work for diff --git a/test/unit/get_shortest_hash.cmake b/test/unit/get_shortest_hash.cmake new file mode 100644 index 00000000..776a0d11 --- /dev/null +++ b/test/unit/get_shortest_hash.cmake @@ -0,0 +1,84 @@ +cmake_minimum_required(VERSION 3.14 FATAL_ERROR) + +include(${CPM_PATH}/CPM.cmake) +include(${CPM_PATH}/testing.cmake) + +# Random suffix +string(RANDOM LENGTH 6 ALPHABET "0123456789abcdef" tmpdir_suffix) + +# Seconds since epoch +string(TIMESTAMP tmpdir_base "%s" UTC) + +set(tmp "get_shortest_hash-${tmpdir_base}-${tmpdir_suffix}") + +if(IS_DIRECTORY ${tmp}) + message(FATAL_ERROR "Test directory ${tmp} already exists") +endif() + +file(MAKE_DIRECTORY "${tmp}") + +# 1. Sanity check: none of these directories should exist yet + +assert_not_exists(${tmp}/cccb.hash) +assert_not_exists(${tmp}/cccb77ae.hash) +assert_not_exists(${tmp}/cccb77ae9609.hash) +assert_not_exists(${tmp}/cccb77ae9608.hash) +assert_not_exists(${tmp}/cccb77be.hash) + +# 2. The directory is empty, so it should get a 4-character hash +cpm_get_shortest_hash(${tmp} "cccb77ae9609d2768ed80dd42cec54f77b1f1455" hash) +assert_equal(${hash} "cccb") +assert_contents_equal(${tmp}/cccb.hash cccb77ae9609d2768ed80dd42cec54f77b1f1455) + +# 3. Calling the function with a new hash that differs subtly should result +# in more characters being used, enough to uniquely identify the hash + +cpm_get_shortest_hash(${tmp} "cccb77ae9609d2768ed80dd42cec54f77b1f1456" hash) +assert_equal(${hash} "cccb77ae") +assert_contents_equal(${tmp}/cccb77ae.hash cccb77ae9609d2768ed80dd42cec54f77b1f1456) + +cpm_get_shortest_hash(${tmp} "cccb77ae9609d2768ed80dd42cec54f77b1f1457" hash) +assert_equal(${hash} "cccb77ae9609") +assert_contents_equal(${tmp}/cccb77ae9609.hash cccb77ae9609d2768ed80dd42cec54f77b1f1457) + +cpm_get_shortest_hash(${tmp} "cccb77ae9608d2768ed80dd42cec54f77b1f1455" hash) +assert_equal(${hash} "cccb77ae9608") +assert_contents_equal(${tmp}/cccb77ae9608.hash cccb77ae9608d2768ed80dd42cec54f77b1f1455) + +cpm_get_shortest_hash(${tmp} "cccb77be9609d2768ed80dd42cec54f77b1f1456" hash) +assert_equal(${hash} "cccb77be") +assert_contents_equal(${tmp}/cccb77be.hash cccb77be9609d2768ed80dd42cec54f77b1f1456) + +# 4. The old file should still exist, and have the same content +assert_contents_equal(${tmp}/cccb.hash cccb77ae9609d2768ed80dd42cec54f77b1f1455) +assert_contents_equal(${tmp}/cccb77ae.hash cccb77ae9609d2768ed80dd42cec54f77b1f1456) +assert_contents_equal(${tmp}/cccb77ae9609.hash cccb77ae9609d2768ed80dd42cec54f77b1f1457) +assert_contents_equal(${tmp}/cccb77ae9608.hash cccb77ae9608d2768ed80dd42cec54f77b1f1455) +assert_contents_equal(${tmp}/cccb77be.hash cccb77be9609d2768ed80dd42cec54f77b1f1456) + +# 5. Confirm idempotence: calling any of these function should produce the same hash +# as before (hash lookups work correctly once the .hash files are created) + +cpm_get_shortest_hash(${tmp} "cccb77ae9609d2768ed80dd42cec54f77b1f1455" hash) +assert_equal(${hash} "cccb") +assert_contents_equal(${tmp}/cccb.hash cccb77ae9609d2768ed80dd42cec54f77b1f1455) + +cpm_get_shortest_hash(${tmp} "cccb77ae9609d2768ed80dd42cec54f77b1f1456" hash) +assert_equal(${hash} "cccb77ae") +assert_contents_equal(${tmp}/cccb77ae.hash cccb77ae9609d2768ed80dd42cec54f77b1f1456) + +cpm_get_shortest_hash(${tmp} "cccb77ae9609d2768ed80dd42cec54f77b1f1457" hash) +assert_equal(${hash} "cccb77ae9609") +assert_contents_equal(${tmp}/cccb77ae9609.hash cccb77ae9609d2768ed80dd42cec54f77b1f1457) + +cpm_get_shortest_hash(${tmp} "cccb77ae9608d2768ed80dd42cec54f77b1f1455" hash) +assert_equal(${hash} "cccb77ae9608") +assert_contents_equal(${tmp}/cccb77ae9608.hash cccb77ae9608d2768ed80dd42cec54f77b1f1455) + +cpm_get_shortest_hash(${tmp} "cccb77be9609d2768ed80dd42cec54f77b1f1456" hash) +assert_equal(${hash} "cccb77be") +assert_contents_equal(${tmp}/cccb77be.hash cccb77be9609d2768ed80dd42cec54f77b1f1456) + +# 6. Cleanup - remove the temporary directory that we created + +file(REMOVE_RECURSE ${tmp}) From 7cc1b7b3353fa92882360590f05b2dfba68cc162 Mon Sep 17 00:00:00 2001 From: Alecto Irene Perez Date: Mon, 6 Jan 2025 17:31:49 -0500 Subject: [PATCH 3/8] Update integration tests to support shorter hashes --- test/integration/test_source_cache.rb | 13 ++++++++++++- 1 file changed, 12 insertions(+), 1 deletion(-) diff --git a/test/integration/test_source_cache.rb b/test/integration/test_source_cache.rb index ce2eb1d7..1d075be7 100644 --- a/test/integration/test_source_cache.rb +++ b/test/integration/test_source_cache.rb @@ -75,6 +75,17 @@ def check_package_cache(name, ver, dir_sha1) assert_equal ver, package.ver expected_parent_dir = File.join(@cache_dir, name.downcase) assert package.src_dir.start_with?(expected_parent_dir), "#{package.src_dir} must be in #{expected_parent_dir}" - assert_equal dir_sha1, File.basename(package.src_dir) + + # The hash has been shortened by cpm_get_shortest_hash. The following + # should hold: + # - The short hash should be a prefix of the input hash + # - There should be a file ".../${short_hash}.hash" which matches the full hash + short_hash = File.basename(package.src_dir) + assert dir_sha1.start_with?(short_hash), "short_hash should be a prefix of dir_sha1" + + # Check that the full hash is stored in the .hash file + hash_file = "#{package.src_dir}.hash" + assert File.exist?(hash_file), "Hash file #{hash_file} should exist" + assert_equal dir_sha1, File.read(hash_file), "Hash file should contain the full original hash" end end From a5b073850cce4579d4d28b3eb61f8276cbad51c4 Mon Sep 17 00:00:00 2001 From: Lars Melchior Date: Sun, 18 May 2025 18:11:49 +0200 Subject: [PATCH 4/8] trigger ci From 85bd8965f4435c8620baef4b472e1b65fd2d772b Mon Sep 17 00:00:00 2001 From: Lars Melchior Date: Sun, 18 May 2025 18:16:37 +0200 Subject: [PATCH 5/8] run cmake-format --- cmake/CPM.cmake | 50 +++++++++++++++---------------- test/unit/get_shortest_hash.cmake | 20 ++++++++----- 2 files changed, 36 insertions(+), 34 deletions(-) diff --git a/cmake/CPM.cmake b/cmake/CPM.cmake index 4ca7837c..c00863c3 100644 --- a/cmake/CPM.cmake +++ b/cmake/CPM.cmake @@ -202,30 +202,26 @@ function(cpm_package_name_from_git_uri URI RESULT) endif() endfunction() - -# Find the shortest hash that can be used -# eg, if origin_hash is cccb77ae9609d2768ed80dd42cec54f77b1f1455 -# the following files will be checked, until one is found that -# is either empty (allowing us to assign origin_hash), or whose contents matches -# ${origin_hash} +# Find the shortest hash that can be used eg, if origin_hash is +# cccb77ae9609d2768ed80dd42cec54f77b1f1455 the following files will be checked, until one is found +# that is either empty (allowing us to assign origin_hash), or whose contents matches ${origin_hash} +# +# * .../cccb.hash +# * .../cccb77ae.hash +# * .../cccb77ae9609.hash +# * .../cccb77ae9609d276.hash +# * etc # -# - .../cccb.hash -# - .../cccb77ae.hash -# - .../cccb77ae9609.hash -# - .../cccb77ae9609d276.hash -# etc -# We will be able to use a shorter path with very high probability, but in the -# (rare) event that the first couple characters collide, we will check -# longer and longer substrings. +# We will be able to use a shorter path with very high probability, but in the (rare) event that the +# first couple characters collide, we will check longer and longer substrings. function(cpm_get_shortest_hash source_cache_dir origin_hash short_hash_output_var) foreach(len RANGE 4 40 4) string(SUBSTRING "${origin_hash}" 0 ${len} short_hash) set(hash_lock ${source_cache_dir}/${short_hash}.lock) set(hash_fp ${source_cache_dir}/${short_hash}.hash) - # Take a lock, so we don't have a race condition with another instance - # of cmake. We will release this lock when we can, however, if there - # is an error, we want to ensure it gets released on it's own on exit - # from the function. + # Take a lock, so we don't have a race condition with another instance of cmake. We will release + # this lock when we can, however, if there is an error, we want to ensure it gets released on + # it's own on exit from the function. file(LOCK ${hash_lock} GUARD FUNCTION) # Load the contents of .../${short_hash}.hash @@ -244,10 +240,12 @@ function(cpm_get_shortest_hash source_cache_dir origin_hash short_hash_output_va file(LOCK ${hash_lock} RELEASE) endif() endforeach() - set(${short_hash_output_var} "${short_hash}" PARENT_SCOPE) + set(${short_hash_output_var} + "${short_hash}" + PARENT_SCOPE + ) endfunction() - # Try to infer package name and version from a url function(cpm_package_name_and_ver_from_url url outName outVer) if(url MATCHES "[/\\?]([a-zA-Z0-9_\\.-]+)\\.(tar|tar\\.gz|tar\\.bz2|zip|ZIP)(\\?|/|$)") @@ -845,17 +843,17 @@ function(CPMAddPackage) elseif(CPM_USE_NAMED_CACHE_DIRECTORIES) string(SHA1 origin_hash "${origin_parameters};NEW_CACHE_STRUCTURE_TAG") cpm_get_shortest_hash( - "${CPM_SOURCE_CACHE}/${lower_case_name}" # source cache directory - "${origin_hash}" # Input hash - origin_hash # Computed hash + "${CPM_SOURCE_CACHE}/${lower_case_name}" # source cache directory + "${origin_hash}" # Input hash + origin_hash # Computed hash ) set(download_directory ${CPM_SOURCE_CACHE}/${lower_case_name}/${origin_hash}/${CPM_ARGS_NAME}) else() string(SHA1 origin_hash "${origin_parameters}") cpm_get_shortest_hash( - "${CPM_SOURCE_CACHE}/${lower_case_name}" # source cache directory - "${origin_hash}" # Input hash - origin_hash # Computed hash + "${CPM_SOURCE_CACHE}/${lower_case_name}" # source cache directory + "${origin_hash}" # Input hash + origin_hash # Computed hash ) set(download_directory ${CPM_SOURCE_CACHE}/${lower_case_name}/${origin_hash}) endif() diff --git a/test/unit/get_shortest_hash.cmake b/test/unit/get_shortest_hash.cmake index 776a0d11..bbe6a33a 100644 --- a/test/unit/get_shortest_hash.cmake +++ b/test/unit/get_shortest_hash.cmake @@ -4,7 +4,11 @@ include(${CPM_PATH}/CPM.cmake) include(${CPM_PATH}/testing.cmake) # Random suffix -string(RANDOM LENGTH 6 ALPHABET "0123456789abcdef" tmpdir_suffix) +string( + RANDOM + LENGTH 6 + ALPHABET "0123456789abcdef" tmpdir_suffix +) # Seconds since epoch string(TIMESTAMP tmpdir_base "%s" UTC) @@ -25,13 +29,13 @@ assert_not_exists(${tmp}/cccb77ae9609.hash) assert_not_exists(${tmp}/cccb77ae9608.hash) assert_not_exists(${tmp}/cccb77be.hash) -# 2. The directory is empty, so it should get a 4-character hash +# 1. The directory is empty, so it should get a 4-character hash cpm_get_shortest_hash(${tmp} "cccb77ae9609d2768ed80dd42cec54f77b1f1455" hash) assert_equal(${hash} "cccb") assert_contents_equal(${tmp}/cccb.hash cccb77ae9609d2768ed80dd42cec54f77b1f1455) -# 3. Calling the function with a new hash that differs subtly should result -# in more characters being used, enough to uniquely identify the hash +# 1. Calling the function with a new hash that differs subtly should result in more characters being +# used, enough to uniquely identify the hash cpm_get_shortest_hash(${tmp} "cccb77ae9609d2768ed80dd42cec54f77b1f1456" hash) assert_equal(${hash} "cccb77ae") @@ -49,15 +53,15 @@ cpm_get_shortest_hash(${tmp} "cccb77be9609d2768ed80dd42cec54f77b1f1456" hash) assert_equal(${hash} "cccb77be") assert_contents_equal(${tmp}/cccb77be.hash cccb77be9609d2768ed80dd42cec54f77b1f1456) -# 4. The old file should still exist, and have the same content +# 1. The old file should still exist, and have the same content assert_contents_equal(${tmp}/cccb.hash cccb77ae9609d2768ed80dd42cec54f77b1f1455) assert_contents_equal(${tmp}/cccb77ae.hash cccb77ae9609d2768ed80dd42cec54f77b1f1456) assert_contents_equal(${tmp}/cccb77ae9609.hash cccb77ae9609d2768ed80dd42cec54f77b1f1457) assert_contents_equal(${tmp}/cccb77ae9608.hash cccb77ae9608d2768ed80dd42cec54f77b1f1455) assert_contents_equal(${tmp}/cccb77be.hash cccb77be9609d2768ed80dd42cec54f77b1f1456) -# 5. Confirm idempotence: calling any of these function should produce the same hash -# as before (hash lookups work correctly once the .hash files are created) +# 1. Confirm idempotence: calling any of these function should produce the same hash as before (hash +# lookups work correctly once the .hash files are created) cpm_get_shortest_hash(${tmp} "cccb77ae9609d2768ed80dd42cec54f77b1f1455" hash) assert_equal(${hash} "cccb") @@ -79,6 +83,6 @@ cpm_get_shortest_hash(${tmp} "cccb77be9609d2768ed80dd42cec54f77b1f1456" hash) assert_equal(${hash} "cccb77be") assert_contents_equal(${tmp}/cccb77be.hash cccb77be9609d2768ed80dd42cec54f77b1f1456) -# 6. Cleanup - remove the temporary directory that we created +# 1. Cleanup - remove the temporary directory that we created file(REMOVE_RECURSE ${tmp}) From ea3724a5782a3f98cf5cd6c21d9106f434e75394 Mon Sep 17 00:00:00 2001 From: Lars Melchior Date: Sun, 18 May 2025 18:30:07 +0200 Subject: [PATCH 6/8] if already available, use the legacy cache hash --- cmake/CPM.cmake | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/cmake/CPM.cmake b/cmake/CPM.cmake index 1c137853..2d151bf9 100644 --- a/cmake/CPM.cmake +++ b/cmake/CPM.cmake @@ -215,6 +215,16 @@ endfunction() # We will be able to use a shorter path with very high probability, but in the (rare) event that the # first couple characters collide, we will check longer and longer substrings. function(cpm_get_shortest_hash source_cache_dir origin_hash short_hash_output_var) + # for compatibility with caches populated by a previous version of CPM, check if a directory using + # the full hash already exists + if(EXISTS "${source_cache_dir}/${origin_hash}") + set(${short_hash_output_var} + "${origin_hash}" + PARENT_SCOPE + ) + return() + endif() + foreach(len RANGE 4 40 4) string(SUBSTRING "${origin_hash}" 0 ${len} short_hash) set(hash_lock ${source_cache_dir}/${short_hash}.lock) From 54f8e10fa9cc251aa906cb816e8dcfac13aabfa6 Mon Sep 17 00:00:00 2001 From: Lars Melchior Date: Sun, 18 May 2025 18:46:28 +0200 Subject: [PATCH 7/8] create temporary file in current binary dir --- test/unit/get_shortest_hash.cmake | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/test/unit/get_shortest_hash.cmake b/test/unit/get_shortest_hash.cmake index bbe6a33a..80df3abe 100644 --- a/test/unit/get_shortest_hash.cmake +++ b/test/unit/get_shortest_hash.cmake @@ -13,7 +13,7 @@ string( # Seconds since epoch string(TIMESTAMP tmpdir_base "%s" UTC) -set(tmp "get_shortest_hash-${tmpdir_base}-${tmpdir_suffix}") +set(tmp "${CMAKE_CURRENT_BINARY_DIR}/get_shortest_hash-${tmpdir_base}-${tmpdir_suffix}") if(IS_DIRECTORY ${tmp}) message(FATAL_ERROR "Test directory ${tmp} already exists") From 26eba5f999859cc038f0638ef396db51ea84ea52 Mon Sep 17 00:00:00 2001 From: Lars Melchior Date: Sun, 18 May 2025 18:52:54 +0200 Subject: [PATCH 8/8] add test case for legacy hash --- test/unit/get_shortest_hash.cmake | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/test/unit/get_shortest_hash.cmake b/test/unit/get_shortest_hash.cmake index 80df3abe..52bd2e92 100644 --- a/test/unit/get_shortest_hash.cmake +++ b/test/unit/get_shortest_hash.cmake @@ -53,6 +53,11 @@ cpm_get_shortest_hash(${tmp} "cccb77be9609d2768ed80dd42cec54f77b1f1456" hash) assert_equal(${hash} "cccb77be") assert_contents_equal(${tmp}/cccb77be.hash cccb77be9609d2768ed80dd42cec54f77b1f1456) +# check that legacy hashs are recognized +file(MAKE_DIRECTORY "${tmp}/cccb77be9609d2768ed80dd42cec54f77b1f1457") +cpm_get_shortest_hash(${tmp} "cccb77be9609d2768ed80dd42cec54f77b1f1457" hash) +assert_equal(${hash} "cccb77be9609d2768ed80dd42cec54f77b1f1457") + # 1. The old file should still exist, and have the same content assert_contents_equal(${tmp}/cccb.hash cccb77ae9609d2768ed80dd42cec54f77b1f1455) assert_contents_equal(${tmp}/cccb77ae.hash cccb77ae9609d2768ed80dd42cec54f77b1f1456)