Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

L1 TLB: Add support for configurable L1 TLB (sets/ways) #2574

Merged
merged 2 commits into from
Aug 27, 2020
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -30,7 +30,8 @@ class DCacheLogicalTreeNode(dcache: HellaCache, deviceOpt: Option[SimpleDevice],
dataMemorySizeBytes = params.nSets * params.nWays * params.blockBytes,
dataECC = params.dataECC.map(OMECC.fromString),
tagECC = params.tagECC.map(OMECC.fromString),
nTLBEntries = params.nTLBEntries,
nTLBSets = params.nTLBSets,
nTLBWays = params.nTLBWays,
memories = dcache.getOMSRAMs(),
)
)
Expand All @@ -49,7 +50,8 @@ class ICacheLogicalTreeNode(icache: ICache, deviceOpt: Option[SimpleDevice], par
dataMemorySizeBytes = params.nSets * params.nWays * params.blockBytes,
dataECC = params.dataECC.map(OMECC.fromString),
tagECC = params.tagECC.map(OMECC.fromString),
nTLBEntries = params.nTLBEntries,
nTLBSets = params.nTLBSets,
nTLBWays = params.nTLBWays,
maxTimSize = params.nSets * (params.nWays-1) * params.blockBytes,
memories = icache.module.data_arrays.map(_._2),
)
Expand Down
6 changes: 4 additions & 2 deletions src/main/scala/diplomaticobjectmodel/model/OMCaches.scala
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,8 @@ case class OMICache(
dataMemorySizeBytes: Int,
dataECC: Option[OMECC],
tagECC: Option[OMECC],
nTLBEntries: Int,
nTLBSets: Int,
nTLBWays: Int,
maxTimSize: Int,
memories: Seq[OMSRAM],
_types: Seq[String] = Seq("OMICache", "OMCache", "OMDevice", "OMComponent", "OMCompoundType")
Expand All @@ -40,7 +41,8 @@ case class OMDCache(
dataMemorySizeBytes: Int,
dataECC: Option[OMECC],
tagECC: Option[OMECC],
nTLBEntries: Int,
nTLBSets: Int,
nTLBWays: Int,
memories: Seq[OMSRAM],
_types: Seq[String] = Seq("OMDCache", "OMCache", "OMDevice", "OMComponent", "OMCompoundType")
) extends OMCache
Expand Down
4 changes: 2 additions & 2 deletions src/main/scala/rocket/DCache.scala
Original file line number Diff line number Diff line change
Expand Up @@ -112,8 +112,8 @@ class DCacheModule(outer: DCache) extends HellaCacheModule(outer) {
else ClockGate(clock, clock_en_reg, "dcache_clock_gate")
@chiselName class DCacheModuleImpl extends NoChiselNamePrefix { // entering gated-clock domain

val tlb = Module(new TLB(false, log2Ceil(coreDataBytes), TLBConfig(nTLBEntries, cacheParams.nTLBBasePageSectors, cacheParams.nTLBSuperpages)))
val pma_checker = Module(new TLB(false, log2Ceil(coreDataBytes), TLBConfig(nTLBEntries, cacheParams.nTLBBasePageSectors, cacheParams.nTLBSuperpages)) with InlineInstance)
val tlb = Module(new TLB(false, log2Ceil(coreDataBytes), TLBConfig(nTLBSets, nTLBWays, cacheParams.nTLBBasePageSectors, cacheParams.nTLBSuperpages)))
val pma_checker = Module(new TLB(false, log2Ceil(coreDataBytes), TLBConfig(nTLBSets, nTLBWays, cacheParams.nTLBBasePageSectors, cacheParams.nTLBSuperpages)) with InlineInstance)

// tags
val replacer = cacheParams.replacement
Expand Down
2 changes: 1 addition & 1 deletion src/main/scala/rocket/Frontend.scala
Original file line number Diff line number Diff line change
Expand Up @@ -97,7 +97,7 @@ class FrontendModule(outer: Frontend) extends LazyModuleImp(outer)
icache.io.clock_enabled := clock_en
withClock (gated_clock) { // entering gated-clock domain

val tlb = Module(new TLB(true, log2Ceil(fetchBytes), TLBConfig(nTLBEntries, outer.icacheParams.nTLBBasePageSectors, outer.icacheParams.nTLBSuperpages)))
val tlb = Module(new TLB(true, log2Ceil(fetchBytes), TLBConfig(nTLBSets, nTLBWays, outer.icacheParams.nTLBBasePageSectors, outer.icacheParams.nTLBSuperpages)))

val s1_valid = Reg(Bool())
val s2_valid = RegInit(false.B)
Expand Down
3 changes: 2 additions & 1 deletion src/main/scala/rocket/HellaCache.scala
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,8 @@ case class DCacheParams(
nSets: Int = 64,
nWays: Int = 4,
rowBits: Int = 64,
nTLBEntries: Int = 32,
nTLBSets: Int = 1,
nTLBWays: Int = 32,
nTLBBasePageSectors: Int = 4,
nTLBSuperpages: Int = 4,
tagECC: Option[String] = None,
Expand Down
3 changes: 2 additions & 1 deletion src/main/scala/rocket/ICache.scala
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,8 @@ case class ICacheParams(
nSets: Int = 64,
nWays: Int = 4,
rowBits: Int = 128,
nTLBEntries: Int = 32,
nTLBSets: Int = 1,
nTLBWays: Int = 32,
nTLBBasePageSectors: Int = 4,
nTLBSuperpages: Int = 4,
cacheIdBits: Int = 0,
Expand Down
2 changes: 1 addition & 1 deletion src/main/scala/rocket/NBDcache.scala
Original file line number Diff line number Diff line change
Expand Up @@ -718,7 +718,7 @@ class NonBlockingDCacheModule(outer: NonBlockingDCache) extends HellaCacheModule
// check for unsupported operations
assert(!s1_valid || !s1_req.cmd.isOneOf(M_PWR))

val dtlb = Module(new TLB(false, log2Ceil(coreDataBytes), TLBConfig(nTLBEntries)))
val dtlb = Module(new TLB(false, log2Ceil(coreDataBytes), TLBConfig(nTLBSets, nTLBWays)))
io.ptw <> dtlb.io.ptw
dtlb.io.kill := io.cpu.s2_kill
dtlb.io.req.valid := s1_valid && !io.cpu.s1_kill && s1_readwrite
Expand Down
28 changes: 16 additions & 12 deletions src/main/scala/rocket/TLB.scala
Original file line number Diff line number Diff line change
Expand Up @@ -145,6 +145,7 @@ class TLBEntry(val nSectors: Int, val superpage: Boolean, val superpageOnly: Boo
}

case class TLBConfig(
nSets: Int,
nEntries: Int,
nSectors: Int = 4,
nSuperpageEntries: Int = 4)
Expand All @@ -159,18 +160,21 @@ class TLB(instruction: Boolean, lgMaxSize: Int, cfg: TLBConfig)(implicit edge: T
}

val pageGranularityPMPs = pmpGranularity >= (1 << pgIdxBits)
val sectored_entries = Reg(Vec(cfg.nEntries / cfg.nSectors, new TLBEntry(cfg.nSectors, false, false)))
val vpn = io.req.bits.vaddr(vaddrBits-1, pgIdxBits)
val memIdx = vpn.extract(cfg.nSectors.log2 + cfg.nSets.log2 - 1, cfg.nSectors.log2)
val sectored_entries = Reg(Vec(cfg.nSets, Vec(cfg.nEntries / cfg.nSectors, new TLBEntry(cfg.nSectors, false, false))))
val superpage_entries = Reg(Vec(cfg.nSuperpageEntries, new TLBEntry(1, true, true)))
val special_entry = (!pageGranularityPMPs).option(Reg(new TLBEntry(1, true, false)))
def ordinary_entries = sectored_entries ++ superpage_entries
def ordinary_entries = sectored_entries(memIdx) ++ superpage_entries
def all_entries = ordinary_entries ++ special_entry
def all_real_entries = sectored_entries.flatten ++ superpage_entries ++ special_entry

val s_ready :: s_request :: s_wait :: s_wait_invalidate :: Nil = Enum(UInt(), 4)
val state = Reg(init=s_ready)
val r_refill_tag = Reg(UInt(width = vpnBits))
val r_superpage_repl_addr = Reg(UInt(log2Ceil(superpage_entries.size).W))
val r_sectored_repl_addr = Reg(UInt(log2Ceil(sectored_entries.size).W))
val r_sectored_hit_addr = Reg(UInt(log2Ceil(sectored_entries.size).W))
val r_sectored_repl_addr = Reg(UInt(log2Ceil(sectored_entries(0).size).W))
val r_sectored_hit_addr = Reg(UInt(log2Ceil(sectored_entries(0).size).W))
val r_sectored_hit = Reg(Bool())

val priv = if (instruction) io.ptw.status.prv else io.ptw.status.dprv
Expand All @@ -179,7 +183,6 @@ class TLB(instruction: Boolean, lgMaxSize: Int, cfg: TLBConfig)(implicit edge: T
val vm_enabled = Bool(usingVM) && io.ptw.ptbr.mode(io.ptw.ptbr.mode.getWidth-1) && priv_uses_vm && !io.req.bits.passthrough

// share a single physical memory attribute checker (unshare if critical path)
val vpn = io.req.bits.vaddr(vaddrBits-1, pgIdxBits)
val refill_ppn = io.ptw.resp.bits.pte.ppn(ppnBits-1, 0)
val do_refill = Bool(usingVM) && io.ptw.resp.valid
val invalidate_refill = state.isOneOf(s_request /* don't care */, s_wait_invalidate) || io.sfence.valid
Expand All @@ -206,7 +209,7 @@ class TLB(instruction: Boolean, lgMaxSize: Int, cfg: TLBConfig)(implicit edge: T
val prot_x = fastCheck(_.executable) && !deny_access_to_debug && pmp.io.x
val prot_eff = fastCheck(Seq(RegionType.PUT_EFFECTS, RegionType.GET_EFFECTS) contains _.regionType)

val sector_hits = sectored_entries.map(_.sectorHit(vpn))
val sector_hits = sectored_entries(memIdx).map(_.sectorHit(vpn))
val superpage_hits = superpage_entries.map(_.hit(vpn))
val hitsVec = all_entries.map(vm_enabled && _.hit(vpn))
val real_hits = hitsVec.asUInt
Expand Down Expand Up @@ -245,8 +248,9 @@ class TLB(instruction: Boolean, lgMaxSize: Int, cfg: TLBConfig)(implicit edge: T
when (invalidate_refill) { e.invalidate() }
}
}.otherwise {
val r_memIdx = r_refill_tag.extract(cfg.nSectors.log2 + cfg.nSets.log2 - 1, cfg.nSectors.log2)
val waddr = Mux(r_sectored_hit, r_sectored_hit_addr, r_sectored_repl_addr)
for ((e, i) <- sectored_entries.zipWithIndex) when (waddr === i) {
for ((e, i) <- sectored_entries(r_memIdx).zipWithIndex) when (waddr === i) {
when (!r_sectored_hit) { e.invalidate() }
e.insert(r_refill_tag, 0.U, newEntry)
when (invalidate_refill) { e.invalidate() }
Expand Down Expand Up @@ -320,10 +324,10 @@ class TLB(instruction: Boolean, lgMaxSize: Int, cfg: TLBConfig)(implicit edge: T
val tlb_hit = real_hits.orR
val tlb_miss = vm_enabled && !bad_va && !tlb_hit

val sectored_plru = new PseudoLRU(sectored_entries.size)
val sectored_plru = new SetAssocLRU(cfg.nSets, cfg.nEntries, "plru")
val superpage_plru = new PseudoLRU(superpage_entries.size)
when (io.req.valid && vm_enabled) {
when (sector_hits.orR) { sectored_plru.access(OHToUInt(sector_hits)) }
when (sector_hits.orR) { sectored_plru.access(memIdx, OHToUInt(sector_hits)) }
when (superpage_hits.orR) { superpage_plru.access(OHToUInt(superpage_hits)) }
}

Expand Down Expand Up @@ -361,7 +365,7 @@ class TLB(instruction: Boolean, lgMaxSize: Int, cfg: TLBConfig)(implicit edge: T
r_refill_tag := vpn

r_superpage_repl_addr := replacementEntry(superpage_entries, superpage_plru.way)
r_sectored_repl_addr := replacementEntry(sectored_entries, sectored_plru.way)
r_sectored_repl_addr := replacementEntry(sectored_entries(memIdx), sectored_plru.way(memIdx))
r_sectored_hit_addr := OHToUInt(sector_hits)
r_sectored_hit := sector_hits.orR
}
Expand All @@ -379,14 +383,14 @@ class TLB(instruction: Boolean, lgMaxSize: Int, cfg: TLBConfig)(implicit edge: T

when (sfence) {
assert(!io.sfence.bits.rs1 || (io.sfence.bits.addr >> pgIdxBits) === vpn)
for (e <- all_entries) {
for (e <- all_real_entries) {
when (io.sfence.bits.rs1) { e.invalidateVPN(vpn) }
.elsewhen (io.sfence.bits.rs2) { e.invalidateNonGlobal() }
.otherwise { e.invalidate() }
}
}
when (multipleHits || reset) {
all_entries.foreach(_.invalidate())
all_real_entries.foreach(_.invalidate())
}

ccover(io.ptw.req.fire(), "MISS", "TLB miss")
Expand Down
18 changes: 12 additions & 6 deletions src/main/scala/subsystem/Configs.scala
Original file line number Diff line number Diff line change
Expand Up @@ -109,14 +109,16 @@ class WithNMedCores(n: Int, overrideIdOffset: Option[Int] = None) extends Config
rowBits = site(SystemBusKey).beatBits,
nSets = 64,
nWays = 1,
nTLBEntries = 4,
nTLBSets = 1,
nTLBWays = 4,
nMSHRs = 0,
blockBytes = site(CacheBlockBytes))),
icache = Some(ICacheParams(
rowBits = site(SystemBusKey).beatBits,
nSets = 64,
nWays = 1,
nTLBEntries = 4,
nTLBSets = 1,
nTLBWays = 4,
blockBytes = site(CacheBlockBytes))))
List.tabulate(n)(i => med.copy(hartId = i + idOffset)) ++ prev
}
Expand All @@ -133,14 +135,16 @@ class WithNSmallCores(n: Int, overrideIdOffset: Option[Int] = None) extends Conf
rowBits = site(SystemBusKey).beatBits,
nSets = 64,
nWays = 1,
nTLBEntries = 4,
nTLBSets = 1,
nTLBWays = 4,
nMSHRs = 0,
blockBytes = site(CacheBlockBytes))),
icache = Some(ICacheParams(
rowBits = site(SystemBusKey).beatBits,
nSets = 64,
nWays = 1,
nTLBEntries = 4,
nTLBSets = 1,
nTLBWays = 4,
blockBytes = site(CacheBlockBytes))))
List.tabulate(n)(i => small.copy(hartId = i + idOffset)) ++ prev
}
Expand All @@ -158,15 +162,17 @@ class With1TinyCore extends Config((site, here, up) => {
rowBits = site(SystemBusKey).beatBits,
nSets = 256, // 16Kb scratchpad
nWays = 1,
nTLBEntries = 4,
nTLBSets = 1,
nTLBWays = 4,
nMSHRs = 0,
blockBytes = site(CacheBlockBytes),
scratch = Some(0x80000000L))),
icache = Some(ICacheParams(
rowBits = site(SystemBusKey).beatBits,
nSets = 64,
nWays = 1,
nTLBEntries = 4,
nTLBSets = 1,
nTLBWays = 4,
blockBytes = site(CacheBlockBytes)))))
case RocketCrossingKey => List(RocketCrossingParams(
crossingType = SynchronousCrossing(),
Expand Down
8 changes: 4 additions & 4 deletions src/main/scala/tile/BaseTile.scala
Original file line number Diff line number Diff line change
Expand Up @@ -117,12 +117,12 @@ trait HasNonDiplomaticTileParameters {
).getOrElse(Nil)

val dtlb = tileParams.dcache.filter(_ => tileParams.core.useVM).map(d => Map(
"d-tlb-size" -> d.nTLBEntries.asProperty,
"d-tlb-sets" -> 1.asProperty)).getOrElse(Nil)
"d-tlb-size" -> (d.nTLBWays * d.nTLBSets).asProperty,
"d-tlb-sets" -> d.nTLBSets.asProperty)).getOrElse(Nil)

val itlb = tileParams.icache.filter(_ => tileParams.core.useVM).map(i => Map(
"i-tlb-size" -> i.nTLBEntries.asProperty,
"i-tlb-sets" -> 1.asProperty)).getOrElse(Nil)
"i-tlb-size" -> (i.nTLBWays * i.nTLBSets).asProperty,
"i-tlb-sets" -> i.nTLBSets.asProperty)).getOrElse(Nil)

val mmu = if (!tileParams.core.useVM) Nil else Map(
"tlb-split" -> Nil,
Expand Down
6 changes: 4 additions & 2 deletions src/main/scala/tile/L1Cache.scala
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,8 @@ trait L1CacheParams {
def nSets: Int
def nWays: Int
def rowBits: Int
def nTLBEntries: Int
def nTLBSets: Int
def nTLBWays: Int
def blockBytes: Int // TODO this is ignored in favor of p(CacheBlockBytes) in BaseTile
}

Expand All @@ -31,7 +32,8 @@ trait HasL1CacheParameters extends HasTileParameters {
def rowBits = cacheParams.rowBits
def rowBytes = rowBits/8
def rowOffBits = log2Up(rowBytes)
def nTLBEntries = cacheParams.nTLBEntries
def nTLBSets = cacheParams.nTLBSets
def nTLBWays = cacheParams.nTLBWays

def cacheDataBits = tlBundleParams.dataBits
def cacheDataBytes = cacheDataBits / 8
Expand Down
22 changes: 22 additions & 0 deletions src/main/scala/util/Replacement.scala
Original file line number Diff line number Diff line change
Expand Up @@ -54,6 +54,11 @@ abstract class SeqReplacementPolicy {
def way: UInt
}

abstract class SetAssocReplacementPolicy {
def access(set: UInt, touch_way: UInt): Unit
def way(set: UInt): UInt
}

class SeqRandom(n_ways: Int) extends SeqReplacementPolicy {
val logic = new RandomReplacement(n_ways)
def access(set: UInt) = { }
Expand Down Expand Up @@ -283,6 +288,23 @@ class SeqPLRU(n_sets: Int, n_ways: Int) extends SeqReplacementPolicy {
def way = plru_way
}


class SetAssocLRU(n_sets: Int, n_ways: Int, policy: String) extends SetAssocReplacementPolicy {
val logic = policy.toLowerCase match {
case "plru" => new PseudoLRU(n_ways)
case "lru" => new TrueLRU(n_ways)
case t => throw new IllegalArgumentException(s"unknown Replacement Policy type $t")
}
val state_vec = Reg(Vec(n_sets, UInt(logic.nBits.W)))

def access(set: UInt, touch_way: UInt) = {
state_vec(set) := logic.get_next_state(state_vec(set), touch_way)
}

def way(set: UInt) = logic.get_replace_way(state_vec(set))

}

/** Synthesizeable unit tests */
import freechips.rocketchip.unittest._

Expand Down