Skip to content

Commit a451ff0

Browse files
authored
[llvm][cas] Improve UnifiedOnDiskActionCache validation to check cas refs (#171732)
Check that action cache references point to valid CAS objects by ensuring they are contained within the corresponding CAS and also that the offsets match. This prevents accidentally referencing "dead" index records that were not properly flushed to disk, which can lead to the action cache pointing to the wrong data or to garbage data. rdar://126642956
1 parent 8471f36 commit a451ff0

File tree

4 files changed

+44
-26
lines changed

4 files changed

+44
-26
lines changed

llvm/include/llvm/CAS/OnDiskGraphDB.h

Lines changed: 16 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -279,16 +279,29 @@ class OnDiskGraphDB {
279279
///
280280
/// Returns \p nullopt if the object is not stored in this CAS.
281281
LLVM_ABI_FOR_TEST std::optional<ObjectID>
282-
getExistingReference(ArrayRef<uint8_t> Digest);
282+
getExistingReference(ArrayRef<uint8_t> Digest, bool CheckUpstream = true);
283283

284284
/// Check whether the object associated with \p Ref is stored in the CAS.
285285
/// Note that this function will fault-in according to the policy.
286286
Expected<bool> isMaterialized(ObjectID Ref);
287287

288288
/// Check whether the object associated with \p Ref is stored in the CAS.
289289
/// Note that this function does not fault-in.
290-
bool containsObject(ObjectID Ref) const {
291-
return containsObject(Ref, /*CheckUpstream=*/true);
290+
bool containsObject(ObjectID Ref, bool CheckUpstream = true) const {
291+
auto Presence = getObjectPresence(Ref, CheckUpstream);
292+
if (!Presence) {
293+
consumeError(Presence.takeError());
294+
return false;
295+
}
296+
switch (*Presence) {
297+
case ObjectPresence::Missing:
298+
return false;
299+
case ObjectPresence::InPrimaryDB:
300+
return true;
301+
case ObjectPresence::OnlyInUpstreamDB:
302+
return true;
303+
}
304+
llvm_unreachable("Unknown ObjectPresence enum");
292305
}
293306

294307
/// \returns the data part of the provided object handle.
@@ -370,24 +383,6 @@ class OnDiskGraphDB {
370383
LLVM_ABI_FOR_TEST Expected<ObjectPresence>
371384
getObjectPresence(ObjectID Ref, bool CheckUpstream) const;
372385

373-
/// \returns true if object can be found in database.
374-
bool containsObject(ObjectID Ref, bool CheckUpstream) const {
375-
auto Presence = getObjectPresence(Ref, CheckUpstream);
376-
if (!Presence) {
377-
consumeError(Presence.takeError());
378-
return false;
379-
}
380-
switch (*Presence) {
381-
case ObjectPresence::Missing:
382-
return false;
383-
case ObjectPresence::InPrimaryDB:
384-
return true;
385-
case ObjectPresence::OnlyInUpstreamDB:
386-
return true;
387-
}
388-
llvm_unreachable("Unknown ObjectPresence enum");
389-
}
390-
391386
/// When \p load is called for a node that doesn't exist, this function tries
392387
/// to load it from the upstream store and copy it to the primary one.
393388
Expected<std::optional<ObjectHandle>> faultInFromUpstream(ObjectID PrimaryID);

llvm/lib/CAS/ActionCaches.cpp

Lines changed: 17 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -235,17 +235,31 @@ Error UnifiedOnDiskActionCache::putImpl(ArrayRef<uint8_t> Key,
235235
}
236236

237237
Error UnifiedOnDiskActionCache::validate() const {
238-
auto ValidateRef = [](FileOffset Offset, ArrayRef<char> Value) -> Error {
238+
auto ValidateRef = [this](FileOffset Offset, ArrayRef<char> Value) -> Error {
239239
auto ID = ondisk::UnifiedOnDiskCache::getObjectIDFromValue(Value);
240240
auto formatError = [&](Twine Msg) {
241241
return createStringError(
242242
llvm::errc::illegal_byte_sequence,
243243
"bad record at 0x" +
244-
utohexstr((unsigned)Offset.get(), /*LowerCase=*/true) + ": " +
245-
Msg.str());
244+
utohexstr((unsigned)Offset.get(), /*LowerCase=*/true) +
245+
" ref=0x" + utohexstr(ID.getOpaqueData(), /*LowerCase=*/true) +
246+
": " + Msg.str());
246247
};
247248
if (ID.getOpaqueData() == 0)
248249
return formatError("zero is not a valid ref");
250+
// Check containsObject first, because other API assumes a valid ObjectID.
251+
if (!UniDB->getGraphDB().containsObject(ID, /*CheckUpstream=*/false))
252+
return formatError("ref is not in cas index");
253+
auto Hash = UniDB->getGraphDB().getDigest(ID);
254+
auto Ref =
255+
UniDB->getGraphDB().getExistingReference(Hash, /*CheckUpstream=*/false);
256+
assert(Ref && "missing object passed containsObject check?");
257+
if (!Ref)
258+
return formatError("ref is not in cas index after contains");
259+
if (*Ref != ID)
260+
return formatError("ref does not match indexed offset " +
261+
utohexstr(Ref->getOpaqueData(), /*LowerCase=*/true) +
262+
" for hash " + toHex(Hash));
249263
return Error::success();
250264
};
251265
return UniDB->getKeyValueDB().validate(ValidateRef);

llvm/lib/CAS/OnDiskGraphDB.cpp

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1109,10 +1109,11 @@ ObjectID OnDiskGraphDB::getExternalReference(const IndexProxy &I) {
11091109
}
11101110

11111111
std::optional<ObjectID>
1112-
OnDiskGraphDB::getExistingReference(ArrayRef<uint8_t> Digest) {
1112+
OnDiskGraphDB::getExistingReference(ArrayRef<uint8_t> Digest,
1113+
bool CheckUpstream) {
11131114
auto tryUpstream =
11141115
[&](std::optional<IndexProxy> I) -> std::optional<ObjectID> {
1115-
if (!UpstreamDB)
1116+
if (!CheckUpstream || !UpstreamDB)
11161117
return std::nullopt;
11171118
std::optional<ObjectID> UpstreamID =
11181119
UpstreamDB->getExistingReference(Digest);

llvm/test/tools/llvm-cas/validation.test

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -25,6 +25,14 @@ RUN: --data - >%t/abc.casid
2525

2626
RUN: llvm-cas --cas %t/ac --put-cache-key @%t/abc.casid @%t/empty.casid
2727
RUN: llvm-cas --cas %t/ac --validate
28+
29+
# Check that validation fails if the objects referenced are missing.
30+
RUN: mv %t/ac/v1.1/index.v1 %t/tmp.index.v1
31+
RUN: not llvm-cas --cas %t/ac --validate
32+
33+
RUN: mv %t/tmp.index.v1 %t/ac/v1.1/index.v1
34+
RUN: llvm-cas --cas %t/ac --validate
35+
2836
# Note: records are 40 bytes (32 hash bytes + 8 byte value), so trim the last
2937
# allocated record, leaving it invalid.
3038
RUN: truncate -s -40 %t/ac/v1.1/actions.v1

0 commit comments

Comments
 (0)