-
Notifications
You must be signed in to change notification settings - Fork 4
Top-Down Methodology #10
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: fdp-develop
Are you sure you want to change the base?
Changes from 4 commits
f02d950
4369171
bc9b6df
b65ac34
913ba59
1193503
8ea14d5
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -355,7 +355,8 @@ CPU::CPUStats::CPUStats(CPU *cpu) | |
"to idling"), | ||
ADD_STAT(quiesceCycles, statistics::units::Cycle::get(), | ||
"Total number of cycles that CPU has spent quiesced or waiting " | ||
"for an interrupt") | ||
"for an interrupt"), | ||
topDownStats(cpu) | ||
{ | ||
// Register any of the O3CPU's stats here. | ||
timesIdled | ||
|
@@ -368,6 +369,153 @@ CPU::CPUStats::CPUStats(CPU *cpu) | |
.prereq(quiesceCycles); | ||
} | ||
|
||
CPU::CPUStats::TopDownStats::TopDownStats(CPU *cpu) | ||
: statistics::Group(cpu, "TopDownStats"), topDownL1(cpu), topDownFbL2(cpu), | ||
topDownBbL2(cpu), topDownBbMem(cpu) {} | ||
|
||
CPU::CPUStats::TopDownStats::TopDownL1::TopDownL1(CPU *cpu) | ||
: statistics::Group(cpu, "TopDownL1"), | ||
ADD_STAT(frontendBound, | ||
statistics::units::Rate<statistics::units::Count, | ||
statistics::units::Count>::get(), | ||
"Frontend Bound, fraction of slots lost due to frontend " | ||
"undersupplying the backend"), | ||
ADD_STAT(badSpeculation, | ||
statistics::units::Rate<statistics::units::Count, | ||
statistics::units::Count>::get(), | ||
"Bad Speculation, fraction of slots lost due to mispeculation"), | ||
ADD_STAT(backendBound, | ||
statistics::units::Rate<statistics::units::Count, | ||
statistics::units::Count>::get(), | ||
"Backend Bound, fraction of slots lost due to backend resource " | ||
"constraints."), | ||
ADD_STAT( | ||
retiring, | ||
statistics::units::Rate<statistics::units::Count, | ||
statistics::units::Count>::get(), | ||
"Retiring, fraction of slots successfully retired by the backend") { | ||
// L1 | ||
frontendBound = cpu->decode.getStats().fetchBubbles / | ||
(cpu->rename.getWidth() * cpu->baseStats.numCycles); | ||
|
||
badSpeculation = (cpu->rename.getStats().renamedInsts - | ||
cpu->commit.getStats().committedInst + | ||
(cpu->commit.getStats().recoveryBubblesMissprediction + | ||
cpu->commit.getStats().recoveryBubblesMemoryNuke) | ||
* cpu->rename.getWidth()) / | ||
(cpu->rename.getWidth() * cpu->baseStats.numCycles); | ||
|
||
retiring = cpu->commit.getStats().committedInst / | ||
(cpu->rename.getWidth() * cpu->baseStats.numCycles); | ||
|
||
backendBound = 1 - (frontendBound + badSpeculation + retiring); | ||
} | ||
|
||
CPU::CPUStats::TopDownStats::TopDownFrontendBoundL2::TopDownFrontendBoundL2( | ||
CPU *cpu) | ||
: statistics::Group(cpu, "TopDownL2_FrontendBound"), | ||
ADD_STAT(fetchLatency, | ||
statistics::units::Rate<statistics::units::Count, | ||
statistics::units::Count>::get(), | ||
"Fetch Latency Bound, frontend stalls due to instruction cache " | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Its not only instruction cache but also TLB and BTB |
||
"inefficiency"), | ||
ADD_STAT(fetchBandwidth, | ||
statistics::units::Rate<statistics::units::Count, | ||
statistics::units::Count>::get(), | ||
"Fetch Bandwidth Bound, frontend stalls due to decoder " | ||
"inefficiency") { | ||
// Frontend L2 | ||
fetchLatency = | ||
cpu->decode.getStats().fetchBubblesMax / (cpu->baseStats.numCycles); | ||
fetchBandwidth = | ||
cpu->cpuStats.topDownStats.topDownL1.frontendBound - fetchLatency; | ||
} | ||
|
||
// CPU::CPUStats::TopDownStats::TopDownFrontendBoundL2::TopDownFrontendBoundL2(CPU | ||
|
||
// *cpu) | ||
// : statistics::Group(cpu, "TopDownL2_FrontendBound"), | ||
// ADD_STAT(fetchLatency, | ||
// statistics::units::Rate<statistics::units::Count, | ||
// statistics::units::Count>::get(), | ||
// "Fetch Latency Bound, frontend stalls due to instruction cache | ||
// inefficiency"), | ||
// ADD_STAT(fetchBandwidth, | ||
// statistics::units::Rate<statistics::units::Count, | ||
// statistics::units::Count>::get(), | ||
// "Fetch Bandwidth Bound, frontend stalls due to decoder | ||
// inefficiency") | ||
// { | ||
// // Frontend L2 | ||
// fetchLatency = cpu->decode.getStats().fetchBubblesMax / | ||
// (cpu->baseStats.numCycles); fetchBandwidth = | ||
// cpu->cpuStats.topDownStats.topDownL1.frontendBound - fetchLatency; | ||
// } | ||
|
||
CPU::CPUStats::TopDownStats::TopDownBackendBoundL2::TopDownBackendBoundL2( | ||
CPU *cpu) | ||
: statistics::Group(cpu, "TopDownL2_BackendBound"), | ||
ADD_STAT(memoryBound, | ||
statistics::units::Rate<statistics::units::Count, | ||
statistics::units::Count>::get(), | ||
"Memory Bound, backend stalls due to memory subsystem"), | ||
ADD_STAT( | ||
coreBound, | ||
statistics::units::Rate<statistics::units::Count, | ||
statistics::units::Count>::get(), | ||
"Core Bound, backend stalls due to functional unit constraints") { | ||
// Backend L2 | ||
executionStalls = (cpu->iew.instQueue.getStats().numInstsExec0 - | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. This is not properly indented. Should be consistent. 4 spaces. |
||
cpu->rename.getStats().idleCycles + | ||
cpu->iew.instQueue.getStats().numInstsExec1 + | ||
cpu->iew.instQueue.getStats().numInstsExec2) / | ||
(cpu->baseStats.numCycles); | ||
// memoryBound = (cpu->iew.instQueue.getStats().loadStallCycles + | ||
// cpu->rename.getStats().SQFullEvents) / (cpu->baseStats.numCycles); | ||
memoryBound = (cpu->iew.instQueue.getStats().loadStallCycles + | ||
cpu->rename.getStats().storeStalls) / | ||
(cpu->baseStats.numCycles); | ||
coreBound = executionStalls - memoryBound; | ||
} | ||
|
||
CPU::CPUStats::TopDownStats::TopDownBackendBoundL3::TopDownBackendBoundL3( | ||
CPU *cpu) | ||
: statistics::Group(cpu, "TopDownL3_BackendBound_MemoryBound"), | ||
ADD_STAT(l1Bound, | ||
statistics::units::Rate<statistics::units::Count, | ||
statistics::units::Count>::get(), | ||
"L1 Cache Bound"), | ||
ADD_STAT(l2Bound, | ||
statistics::units::Rate<statistics::units::Count, | ||
statistics::units::Count>::get(), | ||
"L2 Cache Bound"), | ||
ADD_STAT(l3Bound, | ||
statistics::units::Rate<statistics::units::Count, | ||
statistics::units::Count>::get(), | ||
"L3 Cache Bound"), | ||
ADD_STAT(extMemBound, | ||
statistics::units::Rate<statistics::units::Count, | ||
statistics::units::Count>::get(), | ||
"External Memory Bound"), | ||
ADD_STAT(storeBound, | ||
statistics::units::Rate<statistics::units::Count, | ||
statistics::units::Count>::get(), | ||
"Store Bound") { | ||
// Backend Bound / Memory Bound L3 | ||
l1Bound = (cpu->iew.instQueue.getStats().loadStallCycles - | ||
|
||
cpu->iew.instQueue.getStats().L1miss) / | ||
(cpu->baseStats.numCycles); | ||
l2Bound = (cpu->iew.instQueue.getStats().L1miss - | ||
cpu->iew.instQueue.getStats().L2miss) / | ||
(cpu->baseStats.numCycles); | ||
l3Bound = (cpu->iew.instQueue.getStats().L2miss - | ||
cpu->iew.instQueue.getStats().L3miss) / | ||
(cpu->baseStats.numCycles); | ||
extMemBound = | ||
(cpu->iew.instQueue.getStats().L3miss) / (cpu->baseStats.numCycles); | ||
storeBound = | ||
(cpu->rename.getStats().storeStalls) / (cpu->baseStats.numCycles); | ||
} | ||
|
||
void | ||
CPU::tick() | ||
{ | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -158,18 +158,25 @@ Decode::DecodeStats::DecodeStats(CPU *cpu) | |
ADD_STAT(decodedInsts, statistics::units::Count::get(), | ||
"Number of instructions handled by decode"), | ||
ADD_STAT(squashedInsts, statistics::units::Count::get(), | ||
"Number of squashed instructions handled by decode") | ||
{ | ||
idleCycles.prereq(idleCycles); | ||
blockedCycles.prereq(blockedCycles); | ||
runCycles.prereq(runCycles); | ||
unblockCycles.prereq(unblockCycles); | ||
squashCycles.prereq(squashCycles); | ||
branchResolved.prereq(branchResolved); | ||
branchMispred.prereq(branchMispred); | ||
controlMispred.prereq(controlMispred); | ||
decodedInsts.prereq(decodedInsts); | ||
squashedInsts.prereq(squashedInsts); | ||
"Number of squashed instructions handled by decode"), | ||
ADD_STAT(fetchBubbles, statistics::units::Count::get(), | ||
"Stat for Top-Down Methodology, number of instructions not " | ||
"delivered to backend"), | ||
ADD_STAT(fetchBubblesMax, statistics::units::Count::get(), | ||
"Stat for Top-Down Methodology, number of cycles in which no " | ||
"instructions are delivered to backend") { | ||
idleCycles.prereq(idleCycles); | ||
|
||
blockedCycles.prereq(blockedCycles); | ||
runCycles.prereq(runCycles); | ||
unblockCycles.prereq(unblockCycles); | ||
squashCycles.prereq(squashCycles); | ||
branchResolved.prereq(branchResolved); | ||
branchMispred.prereq(branchMispred); | ||
controlMispred.prereq(controlMispred); | ||
decodedInsts.prereq(decodedInsts); | ||
squashedInsts.prereq(squashedInsts); | ||
fetchBubbles.prereq(fetchBubbles); | ||
fetchBubblesMax.prereq(fetchBubblesMax); | ||
} | ||
|
||
void | ||
|
@@ -565,6 +572,8 @@ Decode::tick() | |
|
||
toRenameIndex = 0; | ||
|
||
fetchBubbles = decodeWidth; | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. This looks incorrect. Edit: I understand now what you do but I found it very unintuitive. Actually the variable is redundant. |
||
|
||
list<ThreadID>::iterator threads = activeThreads->begin(); | ||
list<ThreadID>::iterator end = activeThreads->end(); | ||
|
||
|
@@ -578,6 +587,10 @@ Decode::tick() | |
status_change = checkSignalsAndUpdate(tid) || status_change; | ||
|
||
decode(status_change, tid); | ||
|
||
stats.fetchBubbles += fetchBubbles; | ||
if (fetchBubbles == decodeWidth) | ||
stats.fetchBubblesMax++; | ||
} | ||
|
||
if (status_change) { | ||
|
@@ -602,9 +615,11 @@ Decode::decode(bool &status_change, ThreadID tid) | |
// check if stall conditions have passed | ||
|
||
if (decodeStatus[tid] == Blocked) { | ||
++stats.blockedCycles; | ||
fetchBubbles -= decodeWidth; | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Indentation |
||
++stats.blockedCycles; | ||
} else if (decodeStatus[tid] == Squashing) { | ||
++stats.squashCycles; | ||
fetchBubbles -= decodeWidth; | ||
++stats.squashCycles; | ||
} | ||
|
||
// Decode should try to decode as many instructions as its bandwidth | ||
|
@@ -702,6 +717,7 @@ Decode::decodeInsts(ThreadID tid) | |
++toRenameIndex; | ||
++stats.decodedInsts; | ||
--insts_available; | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. There is the same a few lines above. Shouldn'tyou also put it there |
||
--fetchBubbles; | ||
|
||
#if TRACING_ON | ||
if (debug::O3PipeView) { | ||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Can you format it as done above