Skip to content

Commit

Permalink
TLB/PTW: add htval/mtval2 support
Browse files Browse the repository at this point in the history
  • Loading branch information
aswaterman authored and ingallsj committed Dec 23, 2021
1 parent 1714680 commit ebe27d6
Show file tree
Hide file tree
Showing 2 changed files with 55 additions and 8 deletions.
26 changes: 20 additions & 6 deletions src/main/scala/rocket/PTW.scala
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@ import scala.collection.mutable.ListBuffer

class PTWReq(implicit p: Parameters) extends CoreBundle()(p) {
val addr = UInt(width = vpnBits)
val need_gpa = Bool()
val vstage1 = Bool()
val stage2 = Bool()
}
Expand All @@ -34,6 +35,7 @@ class PTWResp(implicit p: Parameters) extends CoreBundle()(p) {
val level = UInt(width = log2Ceil(pgLevels))
val fragmented_superpage = Bool()
val homogeneous = Bool()
val gpa = Valid(UInt(vaddrBits.W))
}

class TLBPTWIO(implicit p: Parameters) extends CoreBundle()(p)
Expand Down Expand Up @@ -154,6 +156,7 @@ class PTW(n: Int)(implicit edge: TLEdgeOut, p: Parameters) extends CoreModule()(

val aux_count = Reg(UInt(log2Ceil(pgLevels).W))
val aux_pte = Reg(new PTE)
val gpa_pgoff = Reg(UInt(pgIdxBits.W)) // only valid in resp_gf case
val stage2 = Reg(Bool())
val stage2_final = Reg(Bool())

Expand Down Expand Up @@ -207,8 +210,8 @@ class PTW(n: Int)(implicit edge: TLEdgeOut, p: Parameters) extends CoreModule()(
(Cat(aux_pte.ppn, vpn_idx) << log2Ceil(xLen / 8))(vaddrBits - 1, 0)
}

val fragmented_superpage_ppn = {
val choices = (pgLevels-1 until 0 by -1).map(i => Cat(r_pte.ppn >> (pgLevelBits*i), r_req.addr(((pgLevelBits*i) min vpnBits)-1, 0).padTo(pgLevelBits*i)))
def makeFragmentedSuperpagePPN(ppn: UInt): UInt = {
val choices = (pgLevels-1 until 0 by -1).map(i => Cat(ppn >> (pgLevelBits*i), r_req.addr(((pgLevelBits*i) min vpnBits)-1, 0).padTo(pgLevelBits*i)))
choices(count)
}

Expand Down Expand Up @@ -312,7 +315,7 @@ class PTW(n: Int)(implicit edge: TLEdgeOut, p: Parameters) extends CoreModule()(
}

val s0_valid = !l2_refill && arb.io.out.fire()
val s0_suitable = arb.io.out.bits.bits.vstage1 === arb.io.out.bits.bits.stage2
val s0_suitable = arb.io.out.bits.bits.vstage1 === arb.io.out.bits.bits.stage2 && !arb.io.out.bits.bits.need_gpa
val s1_valid = RegNext(s0_valid && s0_suitable && arb.io.out.bits.valid)
val s2_valid = RegNext(s1_valid)
val s1_rdata = ram.read(arb.io.out.bits.bits.addr(idxBits-1, 0), s0_valid)
Expand Down Expand Up @@ -374,7 +377,7 @@ class PTW(n: Int)(implicit edge: TLEdgeOut, p: Parameters) extends CoreModule()(
}
val pmaHomogeneous = pmaPgLevelHomogeneous(count)
val pmpHomogeneous = new PMPHomogeneityChecker(io.dpath.pmp).apply(r_pte.ppn << pgIdxBits, count)
val homogeneous = pmaHomogeneous && pmpHomogeneous
val homogeneous = !r_req.need_gpa && pmaHomogeneous && pmpHomogeneous

for (i <- 0 until io.requestor.size) {
io.requestor(i).resp.valid := resp_valid(i)
Expand All @@ -388,6 +391,8 @@ class PTW(n: Int)(implicit edge: TLEdgeOut, p: Parameters) extends CoreModule()(
io.requestor(i).resp.bits.level := max_count
io.requestor(i).resp.bits.homogeneous := homogeneous || pageGranularityPMPs
io.requestor(i).resp.bits.fragmented_superpage := resp_fragmented_superpage && pageGranularityPMPs
io.requestor(i).resp.bits.gpa.valid := r_req.need_gpa
io.requestor(i).resp.bits.gpa.bits := Cat(aux_pte.ppn, gpa_pgoff)
io.requestor(i).ptbr := io.dpath.ptbr
io.requestor(i).hgatp := io.dpath.hgatp
io.requestor(i).vsatp := io.dpath.vsatp
Expand Down Expand Up @@ -425,9 +430,15 @@ class PTW(n: Int)(implicit edge: TLEdgeOut, p: Parameters) extends CoreModule()(
resp_hx := true
resp_fragmented_superpage := false
r_hgatp := io.dpath.hgatp

assert(!arb.io.out.bits.bits.need_gpa || arb.io.out.bits.bits.stage2)
}
}
is (s_req) {
when(stage2 && count === r_hgatp_initial_count) {
gpa_pgoff := Mux(aux_count === pgLevels-1, r_req.addr << (xLen/8).log2, stage2_pte_cache_addr)
}

when (stage2_pte_cache_hit) {
aux_count := aux_count + 1
aux_pte.ppn := stage2_pte_cache_data
Expand Down Expand Up @@ -458,6 +469,9 @@ class PTW(n: Int)(implicit edge: TLEdgeOut, p: Parameters) extends CoreModule()(
when (!homogeneous) {
count := pgLevels-1
resp_fragmented_superpage := true
when(!resp_gf) {
aux_pte.ppn := makeFragmentedSuperpagePPN(aux_pte.ppn)
}
}
}
}
Expand All @@ -475,7 +489,7 @@ class PTW(n: Int)(implicit edge: TLEdgeOut, p: Parameters) extends CoreModule()(
Mux(state === s_req && !stage2_pte_cache_hit && pte_cache_hit, makePTE(pte_cache_data, l2_pte),
Mux(do_switch, makeHypervisorRootPTE(r_hgatp, pte.ppn, r_pte),
Mux(mem_resp_valid, Mux(!traverse && (r_req.vstage1 && stage2), merged_pte, pte),
Mux(state === s_fragment_superpage && !homogeneous, makePTE(fragmented_superpage_ppn, r_pte),
Mux(state === s_fragment_superpage && !homogeneous, makePTE(makeFragmentedSuperpagePPN(r_pte.ppn), r_pte),
Mux(arb.io.out.fire(), Mux(arb.io.out.bits.bits.stage2, makeHypervisorRootPTE(io.dpath.hgatp, satp.ppn, r_pte), makePTE(satp.ppn, r_pte)),
r_pte)))))))

Expand Down Expand Up @@ -505,7 +519,7 @@ class PTW(n: Int)(implicit edge: TLEdgeOut, p: Parameters) extends CoreModule()(
}.otherwise {
val ae = pte.v && invalid_paddr
val success = pte.v && !ae && !gf
l2_refill := success && count === pgLevels-1 &&
l2_refill := success && count === pgLevels-1 && !r_req.need_gpa &&
(!r_req.vstage1 && !r_req.stage2 ||
do_both_stages && aux_count === pgLevels-1 && isFullPermPTE(pte))
count := max_count
Expand Down
37 changes: 35 additions & 2 deletions src/main/scala/rocket/TLB.scala
Original file line number Diff line number Diff line change
Expand Up @@ -52,6 +52,7 @@ class TLBResp(implicit p: Parameters) extends CoreBundle()(p) {
// lookup responses
val miss = Bool()
val paddr = UInt(width = paddrBits)
val gpa = UInt(vaddrBitsExtended.W)
val pf = new TLBExceptions
val gf = new TLBExceptions
val ae = new TLBExceptions
Expand Down Expand Up @@ -201,13 +202,18 @@ class TLB(instruction: Boolean, lgMaxSize: Int, cfg: TLBConfig)(implicit edge: T
val r_sectored_hit = Reg(Bool())
val r_vstage1_en = Reg(Bool())
val r_stage2_en = Reg(Bool())
val r_need_gpa = Reg(Bool())
val r_gpa_valid = Reg(Bool())
val r_gpa = Reg(UInt(vaddrBits.W))
val r_gpa_gf = Reg(Bool())

val priv = io.req.bits.prv
val priv_v = usingHypervisor && io.req.bits.v
val priv_s = priv(0)
val priv_uses_vm = priv <= PRV.S
val satp = Mux(priv_v, io.ptw.vsatp, io.ptw.ptbr)
val stage1_en = Bool(usingVM) && satp.mode(satp.mode.getWidth-1)
val vstage1_en = priv_v && io.ptw.vsatp.mode(satp.mode.getWidth-1)
val stage2_en = Bool(usingHypervisor) && priv_v && io.ptw.hgatp.mode(io.ptw.hgatp.mode.getWidth-1)
val vm_enabled = (stage1_en || stage2_en) && priv_uses_vm && !io.req.bits.passthrough

Expand Down Expand Up @@ -291,6 +297,10 @@ class TLB(instruction: Boolean, lgMaxSize: Int, cfg: TLBConfig)(implicit edge: T
when (invalidate_refill) { e.invalidate() }
}
}

r_gpa_valid := io.ptw.resp.bits.gpa.valid
r_gpa := io.ptw.resp.bits.gpa.bits
r_gpa_gf := io.ptw.resp.bits.gf
}

val entries = all_entries.map(_.getData(vpn))
Expand Down Expand Up @@ -380,7 +390,15 @@ class TLB(instruction: Boolean, lgMaxSize: Int, cfg: TLBConfig)(implicit edge: T
val gf_st_array = Mux(priv_v && cmd_write_perms, ~(hw_array | ptw_ae_array), 0.U)
val gf_inst_array = Mux(priv_v, ~(hx_array | ptw_ae_array), 0.U)

val tlb_hit = real_hits.orR
val gpa_hits = {
val need_gpa_mask = if (instruction) gf_inst_array else gf_ld_array | gf_st_array
val hit_mask = Fill(sectored_entries.head.size, r_gpa_valid && r_refill_tag === vpn) | Fill(all_entries.size, !vstage1_en)
hit_mask | ~need_gpa_mask(all_entries.size-1, 0)
}

val tlb_hit_if_not_gpa_miss = real_hits.orR
val tlb_hit = (real_hits & gpa_hits).orR

val tlb_miss = vm_enabled && !vsatp_mode_mismatch && !bad_va && !tlb_hit

val sectored_plru = new SetAssocLRU(cfg.nSets, sectored_entries(0).size, "plru")
Expand Down Expand Up @@ -415,20 +433,35 @@ class TLB(instruction: Boolean, lgMaxSize: Int, cfg: TLBConfig)(implicit edge: T
io.resp.prefetchable := (prefetchable_array & hits).orR && edge.manager.managers.forall(m => !m.supportsAcquireB || m.supportsHint)
io.resp.miss := do_refill || vsatp_mode_mismatch || tlb_miss || multipleHits
io.resp.paddr := Cat(ppn, io.req.bits.vaddr(pgIdxBits-1, 0))
io.resp.gpa := {
val page = Mux(!vstage1_en, Cat(bad_gpa, vpn), r_gpa >> pgIdxBits)
val offset = Mux(!vstage1_en || !r_gpa_gf, io.req.bits.vaddr(pgIdxBits-1, 0), r_gpa(pgIdxBits-1, 0))
Cat(page, offset)
}

io.ptw.req.valid := state === s_request
io.ptw.req.bits.valid := !io.kill
io.ptw.req.bits.bits.addr := r_refill_tag
io.ptw.req.bits.bits.vstage1 := r_vstage1_en
io.ptw.req.bits.bits.stage2 := r_stage2_en
io.ptw.req.bits.bits.need_gpa := r_need_gpa

if (usingVM) {
val sfence = io.sfence.valid
when (io.req.fire() && tlb_miss) {
state := s_request
r_refill_tag := vpn
r_gpa_valid := false
r_need_gpa := tlb_hit_if_not_gpa_miss

when(tlb_hit_if_not_gpa_miss) {
// the GPA will come back as a fragmented superpage entry, so zap
// superpage hit to prevent a future multi-hit
for ((e, h) <- superpage_entries.zip(superpage_hits))
when(h) { e.invalidate() }
}

r_vstage1_en := priv_v && stage1_en
r_vstage1_en := vstage1_en
r_stage2_en := stage2_en
r_superpage_repl_addr := replacementEntry(superpage_entries, superpage_plru.way)
r_sectored_repl_addr := replacementEntry(sectored_entries(memIdx), sectored_plru.way(memIdx))
Expand Down

0 comments on commit ebe27d6

Please sign in to comment.