-
Notifications
You must be signed in to change notification settings - Fork 14.8k
[DA] Fix the check between Subscript and Size after delinearization #151326
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from 1 commit
14aced9
7fdb37d
17eba49
91c1ded
cc091b4
434c2d4
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -1126,17 +1126,32 @@ bool DependenceInfo::isKnownLessThan(const SCEV *S, const SCEV *Size) const { | |
Size = SE->getTruncateOrZeroExtend(Size, MaxType); | ||
|
||
// Special check for addrecs using BE taken count | ||
const SCEV *Bound = SE->getMinusSCEV(S, Size); | ||
if (const SCEVAddRecExpr *AddRec = dyn_cast<SCEVAddRecExpr>(Bound)) { | ||
if (AddRec->isAffine()) { | ||
if (const SCEVAddRecExpr *AddRec = dyn_cast<SCEVAddRecExpr>(S)) | ||
if (AddRec->isAffine() && AddRec->hasNoSignedWrap() && | ||
AddRec->hasNoUnsignedWrap()) { | ||
const SCEV *BECount = SE->getBackedgeTakenCount(AddRec->getLoop()); | ||
if (!isa<SCEVCouldNotCompute>(BECount)) { | ||
const SCEV *Limit = AddRec->evaluateAtIteration(BECount, *SE); | ||
if (SE->isKnownNegative(Limit)) | ||
return true; | ||
} | ||
const SCEV *Start = AddRec->getStart(); | ||
const SCEV *Step = AddRec->getStepRecurrence(*SE); | ||
const SCEV *End = AddRec->evaluateAtIteration(BECount, *SE); | ||
const SCEV *Diff0 = SE->getMinusSCEV(Start, Size); | ||
const SCEV *Diff1 = SE->getMinusSCEV(End, Size); | ||
|
||
// If the value of Step is non-negative and the AddRec is non-wrap, it | ||
// reaches its maximum at the last iteration. So it's enouth to check | ||
// whether End - Size is negative. | ||
if (SE->isKnownNonNegative(Step) && SE->isKnownNegative(Diff1)) | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. In this case, it might be sufficient to ensure that the There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
If it is not nsw, shouldn't these SCEVs be SCEVCouldNotCompute anyway? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
As I tried with Anyway, nsw seems sufficient in this case. |
||
return true; | ||
|
||
// If the value of Step is non-positive and the AddRec is non-wrap, the | ||
// initial value is its maximum. | ||
if (SE->isKnownNonPositive(Step) && SE->isKnownNegative(Diff0)) | ||
return true; | ||
|
||
// Even if we don't know the sign of Step, either Start or End must be | ||
// the maximum value of the AddRec since it is non-wrap. | ||
if (SE->isKnownNegative(Diff0) && SE->isKnownNegative(Diff1)) | ||
return true; | ||
} | ||
} | ||
|
||
// Check using normal isKnownNegative | ||
const SCEV *LimitedBound = | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -594,14 +594,15 @@ for.end12: ; preds = %for.inc10, %entry | |
} | ||
|
||
|
||
; FIXME? It seems that we cannot prove that %N is non-negative... | ||
define void @nonnegative(ptr nocapture %A, i32 %N) { | ||
; CHECK-LABEL: 'nonnegative' | ||
; CHECK-NEXT: Src: store i32 1, ptr %arrayidx, align 4 --> Dst: store i32 1, ptr %arrayidx, align 4 | ||
; CHECK-NEXT: da analyze - none! | ||
; CHECK-NEXT: da analyze - output [* *]! | ||
; CHECK-NEXT: Src: store i32 1, ptr %arrayidx, align 4 --> Dst: store i32 2, ptr %arrayidx, align 4 | ||
; CHECK-NEXT: da analyze - consistent output [0 0|<]! | ||
; CHECK-NEXT: da analyze - output [* *|<]! | ||
; CHECK-NEXT: Src: store i32 2, ptr %arrayidx, align 4 --> Dst: store i32 2, ptr %arrayidx, align 4 | ||
; CHECK-NEXT: da analyze - none! | ||
; CHECK-NEXT: da analyze - output [* *]! | ||
Comment on lines
+597
to
+605
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I'm not confident whether this change is valid... There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. The failure test seems to be the same case as this. If I'm correct,
So, in this specific case,
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I would have to debug this to find out. Might have to do with At least the change is only more pessimistic, so still correct. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I suspect the original result is incorrect, we cannot prove There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. The original result is correct in the sense that %N can be larger than INT32_MAX which woud indeed be negative in a signed interpretation. But do we want to only consider signed loop indiction variables? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
Since So the store 1 -> store 1 self-dependency might be something like There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. My concern to the assumption that values are always signed. The only indicator for wrapping behavior are Every single Assuming and i8 SCEV:
so
If we wanted to exploit the fact that memory blocks larger than half the address space might be undefined behavior (due to I seems doubious to me that This PR at least adds a check for the AddRec having signed semantics. But I am considering just LGTM since it clearly fixes a bug. Cannot fix all the pre-existing corner cases. Some idea for rigerous/systematic testing would be to use loops with a single iteration only (in two different loops; so AddRec resolves to a constant), or with two iterations, one before the (unsigned and/or signed) wrap and one below. With so just a single iteration it is easier to reason about and SCEV less often has to fall back to SCEVCouldNotCompute. Instead of comparing just the first/last value of the array index SCEV, one might possibly (also) need to recursively visit the SCEV tyo analyze whether each of its constituents is monotonic. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
I couldn't agree more. I believe this patch fixes some bugs, but not sure if this is a foundamental solution. And probably the similar bugs exist in other places of DA as well... With my earlier comment, "all values should be interpreted as signed", what I meant was that we should proceed with the analysis only if we can prove that none of the subscripts overflow in the signed sense. If we encounter a value that can overflow (e.g., transitioning from Also, I think we need to pay more attention to the SCEV computation (like There is one bit of good news; regarding an AddRec used as an argument of GEP, maybe we can reason about wrapping behavior it if the GEP has
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
Thats exatly what I mentioned as well. I think "non-wrap" of the subscript has to be replaced with a notion of monotonocity of the SCEV expression.
DA does not support loop versioning like LAA does, we cannot require additional checks. I aslo don't think it is even possible to find such an expression to figure out whether GEP results in poison. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
Hm, it looks reasonable to me for now.
I didn't mean inserting additional checks, I'm considering the case where the input IR has such a condition. DA reasons about something based on the GEP flags (example), but violating these flags doesn't trigger immediate UB. It actually triggered when it is used as, e.g.. pointer argument for load/store. I'm now wondering if this inference is only allowed when the value of the GEP is guaranteed to be used. |
||
; | ||
entry: | ||
%cmp44 = icmp eq i32 %N, 0 | ||
|
@@ -630,3 +631,78 @@ for.latch: | |
exit: | ||
ret void | ||
} | ||
|
||
; i = 0; | ||
; do { | ||
; a[k * i] = 42; | ||
; a[k * (i + 1)] = 42; | ||
; i++; | ||
; } while (i < k); | ||
kasuga-fj marked this conversation as resolved.
Show resolved
Hide resolved
|
||
; | ||
; The dependency direction between the two stores depends on the sign of k. | ||
; FIXME: Each store has loop-carried dependencies on itself if k is zero. | ||
; | ||
define void @coeff_may_negative(ptr %a, i32 %k) { | ||
; CHECK-LABEL: 'coeff_may_negative' | ||
; CHECK-NEXT: Src: store i8 42, ptr %idx.0, align 1 --> Dst: store i8 42, ptr %idx.0, align 1 | ||
; CHECK-NEXT: da analyze - none! | ||
; CHECK-NEXT: Src: store i8 42, ptr %idx.0, align 1 --> Dst: store i8 42, ptr %idx.1, align 1 | ||
; CHECK-NEXT: da analyze - consistent output [-1]! | ||
kasuga-fj marked this conversation as resolved.
Show resolved
Hide resolved
|
||
; CHECK-NEXT: Src: store i8 42, ptr %idx.1, align 1 --> Dst: store i8 42, ptr %idx.1, align 1 | ||
; CHECK-NEXT: da analyze - none! | ||
; | ||
entry: | ||
br label %loop | ||
|
||
loop: | ||
%i = phi i32 [ 0, %entry ], [ %i.next, %loop ] | ||
%i.next = add i32 %i, 1 | ||
%subscript.0 = mul i32 %i, %k | ||
%subscript.1 = mul i32 %i.next, %k | ||
%idx.0 = getelementptr i8, ptr %a, i32 %subscript.0 | ||
%idx.1 = getelementptr i8, ptr %a, i32 %subscript.1 | ||
store i8 42, ptr %idx.0 | ||
store i8 42, ptr %idx.1 | ||
%cond.exit = icmp eq i32 %i.next, %k | ||
br i1 %cond.exit, label %exit, label %loop | ||
|
||
exit: | ||
ret void | ||
} | ||
|
||
; i = 0; | ||
; do { | ||
; a[k * i] = 42; | ||
; a[k * (i + 1)] = 42; | ||
; i++; | ||
; } while (i < k); | ||
; | ||
; We can infer that the value of k is non-negative from the nsw flag. | ||
; | ||
define void @coeff_positive(ptr %a, i32 %k) { | ||
; CHECK-LABEL: 'coeff_positive' | ||
; CHECK-NEXT: Src: store i8 42, ptr %idx.0, align 1 --> Dst: store i8 42, ptr %idx.0, align 1 | ||
; CHECK-NEXT: da analyze - none! | ||
; CHECK-NEXT: Src: store i8 42, ptr %idx.0, align 1 --> Dst: store i8 42, ptr %idx.1, align 1 | ||
; CHECK-NEXT: da analyze - consistent output [-1]! | ||
; CHECK-NEXT: Src: store i8 42, ptr %idx.1, align 1 --> Dst: store i8 42, ptr %idx.1, align 1 | ||
; CHECK-NEXT: da analyze - none! | ||
; | ||
entry: | ||
br label %loop | ||
|
||
loop: | ||
%i = phi i32 [ 0, %entry ], [ %i.next, %loop ] | ||
%i.next = add nsw i32 %i, 1 | ||
%subscript.0 = mul i32 %i, %k | ||
%subscript.1 = mul i32 %i.next, %k | ||
%idx.0 = getelementptr i8, ptr %a, i32 %subscript.0 | ||
%idx.1 = getelementptr i8, ptr %a, i32 %subscript.1 | ||
store i8 42, ptr %idx.0 | ||
store i8 42, ptr %idx.1 | ||
%cond.exit = icmp eq i32 %i.next, %k | ||
br i1 %cond.exit, label %exit, label %loop | ||
|
||
exit: | ||
ret void | ||
} |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Although this may not be directly related to the change, this might be incorrect, as
Size
can be negative at this point.