From ebe27d681043f6df9393d9ff816db54cd406330a Mon Sep 17 00:00:00 2001 From: Andrew Waterman Date: Fri, 15 Oct 2021 01:52:39 -0700 Subject: [PATCH] TLB/PTW: add htval/mtval2 support --- src/main/scala/rocket/PTW.scala | 26 +++++++++++++++++------ src/main/scala/rocket/TLB.scala | 37 +++++++++++++++++++++++++++++++-- 2 files changed, 55 insertions(+), 8 deletions(-) diff --git a/src/main/scala/rocket/PTW.scala b/src/main/scala/rocket/PTW.scala index adb07095ac0..6ad4232ddd0 100644 --- a/src/main/scala/rocket/PTW.scala +++ b/src/main/scala/rocket/PTW.scala @@ -19,6 +19,7 @@ import scala.collection.mutable.ListBuffer class PTWReq(implicit p: Parameters) extends CoreBundle()(p) { val addr = UInt(width = vpnBits) + val need_gpa = Bool() val vstage1 = Bool() val stage2 = Bool() } @@ -34,6 +35,7 @@ class PTWResp(implicit p: Parameters) extends CoreBundle()(p) { val level = UInt(width = log2Ceil(pgLevels)) val fragmented_superpage = Bool() val homogeneous = Bool() + val gpa = Valid(UInt(vaddrBits.W)) } class TLBPTWIO(implicit p: Parameters) extends CoreBundle()(p) @@ -154,6 +156,7 @@ class PTW(n: Int)(implicit edge: TLEdgeOut, p: Parameters) extends CoreModule()( val aux_count = Reg(UInt(log2Ceil(pgLevels).W)) val aux_pte = Reg(new PTE) + val gpa_pgoff = Reg(UInt(pgIdxBits.W)) // only valid in resp_gf case val stage2 = Reg(Bool()) val stage2_final = Reg(Bool()) @@ -207,8 +210,8 @@ class PTW(n: Int)(implicit edge: TLEdgeOut, p: Parameters) extends CoreModule()( (Cat(aux_pte.ppn, vpn_idx) << log2Ceil(xLen / 8))(vaddrBits - 1, 0) } - val fragmented_superpage_ppn = { - val choices = (pgLevels-1 until 0 by -1).map(i => Cat(r_pte.ppn >> (pgLevelBits*i), r_req.addr(((pgLevelBits*i) min vpnBits)-1, 0).padTo(pgLevelBits*i))) + def makeFragmentedSuperpagePPN(ppn: UInt): UInt = { + val choices = (pgLevels-1 until 0 by -1).map(i => Cat(ppn >> (pgLevelBits*i), r_req.addr(((pgLevelBits*i) min vpnBits)-1, 0).padTo(pgLevelBits*i))) choices(count) } @@ -312,7 +315,7 @@ class PTW(n: Int)(implicit edge: TLEdgeOut, p: Parameters) extends CoreModule()( } val s0_valid = !l2_refill && arb.io.out.fire() - val s0_suitable = arb.io.out.bits.bits.vstage1 === arb.io.out.bits.bits.stage2 + val s0_suitable = arb.io.out.bits.bits.vstage1 === arb.io.out.bits.bits.stage2 && !arb.io.out.bits.bits.need_gpa val s1_valid = RegNext(s0_valid && s0_suitable && arb.io.out.bits.valid) val s2_valid = RegNext(s1_valid) val s1_rdata = ram.read(arb.io.out.bits.bits.addr(idxBits-1, 0), s0_valid) @@ -374,7 +377,7 @@ class PTW(n: Int)(implicit edge: TLEdgeOut, p: Parameters) extends CoreModule()( } val pmaHomogeneous = pmaPgLevelHomogeneous(count) val pmpHomogeneous = new PMPHomogeneityChecker(io.dpath.pmp).apply(r_pte.ppn << pgIdxBits, count) - val homogeneous = pmaHomogeneous && pmpHomogeneous + val homogeneous = !r_req.need_gpa && pmaHomogeneous && pmpHomogeneous for (i <- 0 until io.requestor.size) { io.requestor(i).resp.valid := resp_valid(i) @@ -388,6 +391,8 @@ class PTW(n: Int)(implicit edge: TLEdgeOut, p: Parameters) extends CoreModule()( io.requestor(i).resp.bits.level := max_count io.requestor(i).resp.bits.homogeneous := homogeneous || pageGranularityPMPs io.requestor(i).resp.bits.fragmented_superpage := resp_fragmented_superpage && pageGranularityPMPs + io.requestor(i).resp.bits.gpa.valid := r_req.need_gpa + io.requestor(i).resp.bits.gpa.bits := Cat(aux_pte.ppn, gpa_pgoff) io.requestor(i).ptbr := io.dpath.ptbr io.requestor(i).hgatp := io.dpath.hgatp io.requestor(i).vsatp := io.dpath.vsatp @@ -425,9 +430,15 @@ class PTW(n: Int)(implicit edge: TLEdgeOut, p: Parameters) extends CoreModule()( resp_hx := true resp_fragmented_superpage := false r_hgatp := io.dpath.hgatp + + assert(!arb.io.out.bits.bits.need_gpa || arb.io.out.bits.bits.stage2) } } is (s_req) { + when(stage2 && count === r_hgatp_initial_count) { + gpa_pgoff := Mux(aux_count === pgLevels-1, r_req.addr << (xLen/8).log2, stage2_pte_cache_addr) + } + when (stage2_pte_cache_hit) { aux_count := aux_count + 1 aux_pte.ppn := stage2_pte_cache_data @@ -458,6 +469,9 @@ class PTW(n: Int)(implicit edge: TLEdgeOut, p: Parameters) extends CoreModule()( when (!homogeneous) { count := pgLevels-1 resp_fragmented_superpage := true + when(!resp_gf) { + aux_pte.ppn := makeFragmentedSuperpagePPN(aux_pte.ppn) + } } } } @@ -475,7 +489,7 @@ class PTW(n: Int)(implicit edge: TLEdgeOut, p: Parameters) extends CoreModule()( Mux(state === s_req && !stage2_pte_cache_hit && pte_cache_hit, makePTE(pte_cache_data, l2_pte), Mux(do_switch, makeHypervisorRootPTE(r_hgatp, pte.ppn, r_pte), Mux(mem_resp_valid, Mux(!traverse && (r_req.vstage1 && stage2), merged_pte, pte), - Mux(state === s_fragment_superpage && !homogeneous, makePTE(fragmented_superpage_ppn, r_pte), + Mux(state === s_fragment_superpage && !homogeneous, makePTE(makeFragmentedSuperpagePPN(r_pte.ppn), r_pte), Mux(arb.io.out.fire(), Mux(arb.io.out.bits.bits.stage2, makeHypervisorRootPTE(io.dpath.hgatp, satp.ppn, r_pte), makePTE(satp.ppn, r_pte)), r_pte))))))) @@ -505,7 +519,7 @@ class PTW(n: Int)(implicit edge: TLEdgeOut, p: Parameters) extends CoreModule()( }.otherwise { val ae = pte.v && invalid_paddr val success = pte.v && !ae && !gf - l2_refill := success && count === pgLevels-1 && + l2_refill := success && count === pgLevels-1 && !r_req.need_gpa && (!r_req.vstage1 && !r_req.stage2 || do_both_stages && aux_count === pgLevels-1 && isFullPermPTE(pte)) count := max_count diff --git a/src/main/scala/rocket/TLB.scala b/src/main/scala/rocket/TLB.scala index 07be4a5a229..b1d4fdf834d 100644 --- a/src/main/scala/rocket/TLB.scala +++ b/src/main/scala/rocket/TLB.scala @@ -52,6 +52,7 @@ class TLBResp(implicit p: Parameters) extends CoreBundle()(p) { // lookup responses val miss = Bool() val paddr = UInt(width = paddrBits) + val gpa = UInt(vaddrBitsExtended.W) val pf = new TLBExceptions val gf = new TLBExceptions val ae = new TLBExceptions @@ -201,6 +202,10 @@ class TLB(instruction: Boolean, lgMaxSize: Int, cfg: TLBConfig)(implicit edge: T val r_sectored_hit = Reg(Bool()) val r_vstage1_en = Reg(Bool()) val r_stage2_en = Reg(Bool()) + val r_need_gpa = Reg(Bool()) + val r_gpa_valid = Reg(Bool()) + val r_gpa = Reg(UInt(vaddrBits.W)) + val r_gpa_gf = Reg(Bool()) val priv = io.req.bits.prv val priv_v = usingHypervisor && io.req.bits.v @@ -208,6 +213,7 @@ class TLB(instruction: Boolean, lgMaxSize: Int, cfg: TLBConfig)(implicit edge: T val priv_uses_vm = priv <= PRV.S val satp = Mux(priv_v, io.ptw.vsatp, io.ptw.ptbr) val stage1_en = Bool(usingVM) && satp.mode(satp.mode.getWidth-1) + val vstage1_en = priv_v && io.ptw.vsatp.mode(satp.mode.getWidth-1) val stage2_en = Bool(usingHypervisor) && priv_v && io.ptw.hgatp.mode(io.ptw.hgatp.mode.getWidth-1) val vm_enabled = (stage1_en || stage2_en) && priv_uses_vm && !io.req.bits.passthrough @@ -291,6 +297,10 @@ class TLB(instruction: Boolean, lgMaxSize: Int, cfg: TLBConfig)(implicit edge: T when (invalidate_refill) { e.invalidate() } } } + + r_gpa_valid := io.ptw.resp.bits.gpa.valid + r_gpa := io.ptw.resp.bits.gpa.bits + r_gpa_gf := io.ptw.resp.bits.gf } val entries = all_entries.map(_.getData(vpn)) @@ -380,7 +390,15 @@ class TLB(instruction: Boolean, lgMaxSize: Int, cfg: TLBConfig)(implicit edge: T val gf_st_array = Mux(priv_v && cmd_write_perms, ~(hw_array | ptw_ae_array), 0.U) val gf_inst_array = Mux(priv_v, ~(hx_array | ptw_ae_array), 0.U) - val tlb_hit = real_hits.orR + val gpa_hits = { + val need_gpa_mask = if (instruction) gf_inst_array else gf_ld_array | gf_st_array + val hit_mask = Fill(sectored_entries.head.size, r_gpa_valid && r_refill_tag === vpn) | Fill(all_entries.size, !vstage1_en) + hit_mask | ~need_gpa_mask(all_entries.size-1, 0) + } + + val tlb_hit_if_not_gpa_miss = real_hits.orR + val tlb_hit = (real_hits & gpa_hits).orR + val tlb_miss = vm_enabled && !vsatp_mode_mismatch && !bad_va && !tlb_hit val sectored_plru = new SetAssocLRU(cfg.nSets, sectored_entries(0).size, "plru") @@ -415,20 +433,35 @@ class TLB(instruction: Boolean, lgMaxSize: Int, cfg: TLBConfig)(implicit edge: T io.resp.prefetchable := (prefetchable_array & hits).orR && edge.manager.managers.forall(m => !m.supportsAcquireB || m.supportsHint) io.resp.miss := do_refill || vsatp_mode_mismatch || tlb_miss || multipleHits io.resp.paddr := Cat(ppn, io.req.bits.vaddr(pgIdxBits-1, 0)) + io.resp.gpa := { + val page = Mux(!vstage1_en, Cat(bad_gpa, vpn), r_gpa >> pgIdxBits) + val offset = Mux(!vstage1_en || !r_gpa_gf, io.req.bits.vaddr(pgIdxBits-1, 0), r_gpa(pgIdxBits-1, 0)) + Cat(page, offset) + } io.ptw.req.valid := state === s_request io.ptw.req.bits.valid := !io.kill io.ptw.req.bits.bits.addr := r_refill_tag io.ptw.req.bits.bits.vstage1 := r_vstage1_en io.ptw.req.bits.bits.stage2 := r_stage2_en + io.ptw.req.bits.bits.need_gpa := r_need_gpa if (usingVM) { val sfence = io.sfence.valid when (io.req.fire() && tlb_miss) { state := s_request r_refill_tag := vpn + r_gpa_valid := false + r_need_gpa := tlb_hit_if_not_gpa_miss + + when(tlb_hit_if_not_gpa_miss) { + // the GPA will come back as a fragmented superpage entry, so zap + // superpage hit to prevent a future multi-hit + for ((e, h) <- superpage_entries.zip(superpage_hits)) + when(h) { e.invalidate() } + } - r_vstage1_en := priv_v && stage1_en + r_vstage1_en := vstage1_en r_stage2_en := stage2_en r_superpage_repl_addr := replacementEntry(superpage_entries, superpage_plru.way) r_sectored_repl_addr := replacementEntry(sectored_entries(memIdx), sectored_plru.way(memIdx))