[MIPS] Speed up software refill handler

Emulating each instruction of the software refill handler
has a significant impact on the overall performance of QEMU because
of the overhead of emulating the various CP0 instructions to accurately
reflect the machine state. Running the software TLB handler takes
the equivalent of 1000's of machine cycles.

This patch implements a pseudo hardware TLB refill handler
that significantly reduces the impact of refilling the TLB
to bring it more inline with what would be observed on a real target.

Signed-off-by: Steven Hill <sjhill@mips.com>
Signed-off-by: Chris Dearman <chris@mips.com>
Signed-off-by: Yajin <yajin@mips.com.cn>
diff --git a/target-mips/cpu.h b/target-mips/cpu.h
index 7c04fbe..46519e3 100644
--- a/target-mips/cpu.h
+++ b/target-mips/cpu.h
@@ -39,7 +39,6 @@
 typedef struct CPUMIPSTLBContext CPUMIPSTLBContext;
 struct CPUMIPSTLBContext {
     uint32_t nb_tlb;
-    uint32_t tlb_in_use;
     int (*map_address) (struct CPUMIPSState *env, target_phys_addr_t *physical, int *prot, target_ulong address, int rw, int access_type);
     void (*helper_tlbwi) (void);
     void (*helper_tlbwr) (void);
@@ -630,7 +629,6 @@
                                int mmu_idx, int is_softmmu);
 #define cpu_handle_mmu_fault cpu_mips_handle_mmu_fault
 void do_interrupt (CPUState *env);
-void r4k_invalidate_tlb (CPUState *env, int idx, int use_extra);
 target_phys_addr_t cpu_mips_translate_address (CPUState *env, target_ulong address,
 		                               int rw);
 
diff --git a/target-mips/helper.c b/target-mips/helper.c
index 12caf34..838ccbb 100644
--- a/target-mips/helper.c
+++ b/target-mips/helper.c
@@ -66,14 +66,20 @@
                      target_ulong address, int rw, int access_type)
 {
     uint8_t ASID = env->CP0_EntryHi & 0xFF;
+    r4k_tlb_t *tlb;
+    target_ulong mask;
+    target_ulong tag;
+    target_ulong VPN;
+    int n;
     int i;
 
-    for (i = 0; i < env->tlb->tlb_in_use; i++) {
-        r4k_tlb_t *tlb = &env->tlb->mmu.r4k.tlb[i];
+    for (i = 0; i < env->tlb->nb_tlb; i++) {
+        tlb = &env->tlb->mmu.r4k.tlb[i];
         /* 1k pages are not supported. */
-        target_ulong mask = tlb->PageMask | ~(TARGET_PAGE_MASK << 1);
-        target_ulong tag = address & ~mask;
-        target_ulong VPN = tlb->VPN & ~mask;
+        mask = ~(TARGET_PAGE_MASK << 1);
+        tag = address & ~mask;
+        VPN = tlb->VPN & ~mask;
+
 #if defined(TARGET_MIPS64)
         tag &= env->SEGMask;
 #endif
@@ -81,7 +87,7 @@
         /* Check ASID, virtual page number & size */
         if ((tlb->G == 1 || tlb->ASID == ASID) && VPN == tag) {
             /* TLB match */
-            int n = !!(address & mask & ~(mask >> 1));
+            n = !!(address & mask & ~(mask >> 1));
             /* Check access rights */
             if (!(n ? tlb->V1 : tlb->V0))
                 return TLBRET_INVALID;
@@ -120,7 +126,7 @@
 
     if (address <= (int32_t)0x7FFFFFFFUL) {
         /* useg */
-        if (env->CP0_Status & (1 << CP0St_ERL)) {
+        if (unlikely(env->CP0_Status & (1 << CP0St_ERL))) {
             *physical = address & 0xFFFFFFFF;
             *prot = PAGE_READ | PAGE_WRITE;
         } else {
@@ -253,18 +259,140 @@
     env->error_code = error_code;
 }
 
-target_phys_addr_t cpu_get_phys_page_debug(CPUState *env, target_ulong addr)
+/*
+ * Get the pgd_current from TLB exception handler
+ * The exception handler is generated by function build_r4000_tlb_refill_handler.
+ * 0x80000000:0x3c1b8033: lui k1,0x8033
+ * 0x80000004:0x401a4000: mfc0 k0,c0_badvaddr
+ * 0x80000008:0x8f7bb000: lw  k1,-20480(k1)
+ *
+ */
+static inline target_ulong cpu_mips_get_pgd(CPUState *env)
 {
-#if defined(CONFIG_USER_ONLY)
-    return addr;
-#else
-    target_phys_addr_t phys_addr;
-    int prot;
+    static target_ulong pgd_current_p = 0;
+    static target_ulong probed = 0;
 
-    if (get_physical_address(env, &phys_addr, &prot, addr, 0, ACCESS_INT) != 0)
-        return -1;
-    return phys_addr;
-#endif
+    if (likely(pgd_current_p)) {
+        /* Get pgd_current */
+        return ldl_phys(pgd_current_p);
+    }
+    else if (unlikely(!probed)) {
+        uint32_t ins1, ins2;
+	uint32_t address;
+        uint32_t ebase;
+
+	probed = 1;
+
+	ebase = env->CP0_EBase - 0x80000000;
+
+        /* Get pgd_current pointer from TLB refill exception handler */
+        ins1 = ldl_phys(ebase);        /* lui k1,%hi(pgd_current_p) */
+        ins2 = ldl_phys(ebase + 8);    /* lw  k1,%lo(pgd_current_p)(k1) */
+
+        address = ((ins1 & 0xffff)<<16);
+        address += (((int32_t)(ins2 & 0xffff))<<16)>>16;
+	/* assumes pgd_current_p != 0 */
+	if (address > 0x80000000 && address < 0xa0000000) {
+            pgd_current_p = address -= 0x80000000;
+	    return ldl_phys(pgd_current_p);
+	}
+    }
+    return 0;
+}
+
+static inline int cpu_mips_tlb_refill(CPUState *env, target_ulong address, int rw ,
+                                      int mmu_idx, int is_softmmu)
+{
+    int32_t saved_hflags;
+    target_ulong saved_badvaddr,saved_entryhi,saved_context;
+
+    target_ulong pgd_addr,pt_addr,index;
+    target_ulong fault_addr,ptw_phys;
+    target_ulong elo_even,elo_odd;
+    uint32_t page_valid;
+    int ret;
+
+    saved_badvaddr = env->CP0_BadVAddr;
+    saved_context = env->CP0_Context;
+    saved_entryhi = env->CP0_EntryHi;
+    saved_hflags = env->hflags;
+
+    env->CP0_BadVAddr = address;
+    env->CP0_Context = (env->CP0_Context & ~0x007fffff) |
+                    ((address >> 9) &   0x007ffff0);
+    env->CP0_EntryHi =
+        (env->CP0_EntryHi & 0xFF) | (address & (TARGET_PAGE_MASK << 1));
+
+    env->hflags = MIPS_HFLAG_KM;
+
+    fault_addr = env->CP0_BadVAddr;
+    page_valid = 0;
+
+    pgd_addr = cpu_mips_get_pgd(env);
+    if (unlikely(!pgd_addr))
+    {
+        /*not valid pgd_addr,just return.*/
+        //return TLBRET_NOMATCH;
+        ret = TLBRET_NOMATCH;
+        goto out;
+    }
+
+    ptw_phys = pgd_addr - (int32_t)0x80000000UL;
+    index = (fault_addr>>22)<<2;
+    ptw_phys += index;
+
+    pt_addr = ldl_phys(ptw_phys);
+
+    ptw_phys = pt_addr - (int32_t)0x80000000UL;
+    index = (env->CP0_Context>>1)&0xff8;
+    ptw_phys += index;
+
+    /*get the page table entry*/
+    elo_even = ldl_phys(ptw_phys);
+    elo_odd  = ldl_phys(ptw_phys+4);
+    elo_even = elo_even >> 6;
+    elo_odd = elo_odd >> 6;
+    env->CP0_EntryLo0 = elo_even;
+    env->CP0_EntryLo1 = elo_odd;
+    /*Done. refill the TLB */
+    r4k_helper_ptw_tlbrefill(env);
+
+    /* Since we know the value of TLB entry, we can
+     * return the TLB lookup value here.
+     */
+
+    env->hflags = saved_hflags;
+
+    target_ulong mask = env->CP0_PageMask | ~(TARGET_PAGE_MASK << 1);
+    int n = !!(address & mask & ~(mask >> 1));
+    /* Check access rights */
+    if (!(n ? (elo_odd & 2) != 0 : (elo_even & 2) != 0))
+    {
+        ret = TLBRET_INVALID;
+        goto out;
+    }
+
+    if (rw == 0 || (n ? (elo_odd & 4) != 0 : (elo_even & 4) != 0)) {
+        target_ulong physical = (n?(elo_odd >> 6) << 12 : (elo_even >> 6) << 12);
+        physical |= (address & (mask >> 1));
+        int prot = PAGE_READ;
+        if (n ? (elo_odd & 4) != 0 : (elo_even & 4) != 0)
+            prot |= PAGE_WRITE;
+
+        tlb_set_page(env, address & TARGET_PAGE_MASK,
+                        physical & TARGET_PAGE_MASK, prot,
+                        mmu_idx, is_softmmu);
+        ret = TLBRET_MATCH;
+        goto out;
+    }
+    ret = TLBRET_DIRTY;
+
+out:
+    env->CP0_BadVAddr = saved_badvaddr;
+    env->CP0_Context = saved_context;
+    env->CP0_EntryHi = saved_entryhi;
+    env->hflags = saved_hflags;
+    return ret;
 }
 
 int cpu_mips_handle_mmu_fault (CPUState *env, target_ulong address, int rw,
@@ -274,6 +402,7 @@
     target_phys_addr_t physical;
     int prot;
 #endif
+    int exception = 0, error_code = 0;
     int access_type;
     int ret = 0;
 
@@ -300,7 +429,10 @@
        ret = tlb_set_page(env, address & TARGET_PAGE_MASK,
                           physical & TARGET_PAGE_MASK, prot,
                           mmu_idx, is_softmmu);
-    } else if (ret < 0)
+    }
+    else if (ret == TLBRET_NOMATCH)
+        ret = cpu_mips_tlb_refill(env,address,rw,mmu_idx,is_softmmu);
+    if (ret < 0)
 #endif
     {
         raise_mmu_exception(env, address, rw, ret);
@@ -333,6 +465,48 @@
 }
 #endif
 
+target_phys_addr_t cpu_get_phys_page_debug(CPUState *env, target_ulong addr)
+{
+#if defined(CONFIG_USER_ONLY)
+    return addr;
+#else
+    target_phys_addr_t phys_addr;
+    int prot, ret;
+
+    ret = get_physical_address(env, &phys_addr, &prot, addr, 0, ACCESS_INT);
+    if (ret != TLBRET_MATCH && ret != TLBRET_DIRTY) {
+	    target_ulong pgd_addr = cpu_mips_get_pgd(env);
+	    if (unlikely(!pgd_addr)) {
+		    phys_addr = -1;
+	    }
+	    else {
+		    target_ulong pgd_phys, pgd_index;
+		    target_ulong pt_addr, pt_phys, pt_index;
+		    target_ulong lo;
+		    /* Mimic the steps taken for a TLB refill */
+		    pgd_phys = pgd_addr - (int32_t)0x80000000UL;
+		    pgd_index = (addr >> 22) << 2;
+		    pt_addr = ldl_phys(pgd_phys + pgd_index);
+		    pt_phys = pt_addr - (int32_t)0x80000000UL;
+		    pt_index = (((addr >> 9) & 0x007ffff0) >> 1) & 0xff8;
+		    /* get the entrylo value */
+		    if (addr & 0x1000)
+			    lo = ldl_phys(pt_phys + pt_index + 4);
+		    else
+			    lo = ldl_phys(pt_phys + pt_index);
+		    /* convert software TLB entry to hardware value */
+		    lo >>= 6;
+		    if (lo & 0x00000002)
+			    /* It is valid */
+			    phys_addr = (lo >> 6) << 12;
+		    else
+			    phys_addr = -1;
+	    }
+    }
+    return phys_addr;
+#endif
+}
+
 static const char * const excp_names[EXCP_LAST + 1] = {
     [EXCP_RESET] = "reset",
     [EXCP_SRESET] = "soft reset",
@@ -592,7 +766,7 @@
     env->exception_index = EXCP_NONE;
 }
 
-void r4k_invalidate_tlb (CPUState *env, int idx, int use_extra)
+void r4k_invalidate_tlb (CPUState *env, int idx)
 {
     r4k_tlb_t *tlb;
     target_ulong addr;
@@ -607,15 +781,6 @@
         return;
     }
 
-    if (use_extra && env->tlb->tlb_in_use < MIPS_TLB_MAX) {
-        /* For tlbwr, we can shadow the discarded entry into
-           a new (fake) TLB entry, as long as the guest can not
-           tell that it's there.  */
-        env->tlb->mmu.r4k.tlb[env->tlb->tlb_in_use] = *tlb;
-        env->tlb->tlb_in_use++;
-        return;
-    }
-
     /* 1k pages are not supported. */
     mask = tlb->PageMask | ~(TARGET_PAGE_MASK << 1);
     if (tlb->V0) {
diff --git a/target-mips/machine.c b/target-mips/machine.c
index 9ffac71..04985b1 100644
--- a/target-mips/machine.c
+++ b/target-mips/machine.c
@@ -58,7 +58,6 @@
 
     /* Save TLB */
     qemu_put_be32s(f, &env->tlb->nb_tlb);
-    qemu_put_be32s(f, &env->tlb->tlb_in_use);
     for(i = 0; i < MIPS_TLB_MAX; i++) {
         uint16_t flags = ((env->tlb->mmu.r4k.tlb[i].G << 10) |
                           (env->tlb->mmu.r4k.tlb[i].C0 << 7) |
@@ -209,7 +208,6 @@
 
     /* Load TLB */
     qemu_get_be32s(f, &env->tlb->nb_tlb);
-    qemu_get_be32s(f, &env->tlb->tlb_in_use);
     for(i = 0; i < MIPS_TLB_MAX; i++) {
         uint16_t flags;
         uint8_t asid;
diff --git a/target-mips/op_helper.c b/target-mips/op_helper.c
index a0c64f4..783a1a9 100644
--- a/target-mips/op_helper.c
+++ b/target-mips/op_helper.c
@@ -1498,20 +1498,69 @@
 }
 
 #ifndef CONFIG_USER_ONLY
+static void inline r4k_invalidate_tlb_shadow (CPUState *env, int idx)
+{
+    r4k_tlb_t *tlb;
+    uint8_t ASID = env->CP0_EntryHi & 0xFF;
+
+    tlb = &env->tlb->mmu.r4k.tlb[idx];
+    /* The qemu TLB is flushed when the ASID changes, so no need to
+    flush these entries again.  */
+    if (tlb->G == 0 && tlb->ASID != ASID) {
+        return;
+    }
+}
+
+static void inline r4k_invalidate_tlb (CPUState *env, int idx)
+{
+    r4k_tlb_t *tlb;
+    target_ulong addr;
+    target_ulong end;
+    uint8_t ASID = env->CP0_EntryHi & 0xFF;
+    target_ulong mask;
+
+    tlb = &env->tlb->mmu.r4k.tlb[idx];
+    /* The qemu TLB is flushed when the ASID changes, so no need to
+    flush these entries again.  */
+    if (tlb->G == 0 && tlb->ASID != ASID) {
+        return;
+    }
+
+    /* 1k pages are not supported. */
+    mask = tlb->PageMask | ~(TARGET_PAGE_MASK << 1);
+    if (tlb->V0) {
+        addr = tlb->VPN & ~mask;
+#if defined(TARGET_MIPS64)
+        if (addr >= (0xFFFFFFFF80000000ULL & env->SEGMask)) {
+            addr |= 0x3FFFFF0000000000ULL;
+        }
+#endif
+        end = addr | (mask >> 1);
+        while (addr < end) {
+            tlb_flush_page (env, addr);
+            addr += TARGET_PAGE_SIZE;
+        }
+    }
+    if (tlb->V1) {
+        addr = (tlb->VPN & ~mask) | ((mask >> 1) + 1);
+#if defined(TARGET_MIPS64)
+        if (addr >= (0xFFFFFFFF80000000ULL & env->SEGMask)) {
+            addr |= 0x3FFFFF0000000000ULL;
+        }
+#endif
+        end = addr | mask;
+        while (addr - 1 < end) {
+            tlb_flush_page (env, addr);
+            addr += TARGET_PAGE_SIZE;
+        }
+    }
+}
+
 /* TLB management */
 void cpu_mips_tlb_flush (CPUState *env, int flush_global)
 {
     /* Flush qemu's TLB and discard all shadowed entries.  */
     tlb_flush (env, flush_global);
-    env->tlb->tlb_in_use = env->tlb->nb_tlb;
-}
-
-static void r4k_mips_tlb_flush_extra (CPUState *env, int first)
-{
-    /* Discard entries from env->tlb[first] onwards.  */
-    while (env->tlb->tlb_in_use > first) {
-        r4k_invalidate_tlb(env, --env->tlb->tlb_in_use, 0);
-    }
 }
 
 static void r4k_fill_tlb (int idx)
@@ -1537,26 +1586,65 @@
     tlb->PFN[1] = (env->CP0_EntryLo1 >> 6) << 12;
 }
 
+void r4k_helper_ptw_tlbrefill(CPUState *target_env)
+{
+   CPUState *saved_env;
+
+   /* Save current 'env' value */
+   saved_env = env;
+   env = target_env;
+
+   /* Do TLB load on behalf of Page Table Walk */
+    int r = cpu_mips_get_random(env);
+    r4k_invalidate_tlb_shadow(env, r);
+    r4k_fill_tlb(r);
+
+   /* Restore 'env' value */
+   env = saved_env;
+}
+
 void r4k_helper_tlbwi (void)
 {
-    int idx;
+    r4k_tlb_t *tlb;
+    target_ulong tag;
+    target_ulong VPN;
+    target_ulong mask;
 
-    idx = (env->CP0_Index & ~0x80000000) % env->tlb->nb_tlb;
+    /* If tlbwi is trying to upgrading access permissions on current entry,
+     * we do not need to flush tlb hash table.
+     */
+    tlb = &env->tlb->mmu.r4k.tlb[env->CP0_Index % env->tlb->nb_tlb];
+    mask = tlb->PageMask | ~(TARGET_PAGE_MASK << 1);
+    tag = env->CP0_EntryHi & ~mask;
+    VPN = tlb->VPN & ~mask;
+    if (VPN == tag)
+    {
+        if (tlb->ASID == (env->CP0_EntryHi & 0xFF))
+        {
+            tlb->V0 = (env->CP0_EntryLo0 & 2) != 0;
+            tlb->D0 = (env->CP0_EntryLo0 & 4) != 0;
+            tlb->C0 = (env->CP0_EntryLo0 >> 3) & 0x7;
+            tlb->PFN[0] = (env->CP0_EntryLo0 >> 6) << 12;
+            tlb->V1 = (env->CP0_EntryLo1 & 2) != 0;
+            tlb->D1 = (env->CP0_EntryLo1 & 4) != 0;
+            tlb->C1 = (env->CP0_EntryLo1 >> 3) & 0x7;
+            tlb->PFN[1] = (env->CP0_EntryLo1 >> 6) << 12;
+            return;
+        }
+    }
 
-    /* Discard cached TLB entries.  We could avoid doing this if the
-       tlbwi is just upgrading access permissions on the current entry;
-       that might be a further win.  */
-    r4k_mips_tlb_flush_extra (env, env->tlb->nb_tlb);
+    /*flush all the tlb cache */
+    cpu_mips_tlb_flush (env, 1);
 
-    r4k_invalidate_tlb(env, idx, 0);
-    r4k_fill_tlb(idx);
+    r4k_invalidate_tlb(env, env->CP0_Index % env->tlb->nb_tlb);
+    r4k_fill_tlb(env->CP0_Index % env->tlb->nb_tlb);
 }
 
 void r4k_helper_tlbwr (void)
 {
     int r = cpu_mips_get_random(env);
 
-    r4k_invalidate_tlb(env, r, 1);
+    r4k_invalidate_tlb_shadow(env, r);
     r4k_fill_tlb(r);
 }
 
@@ -1568,6 +1656,8 @@
     target_ulong VPN;
     uint8_t ASID;
     int i;
+    target_ulong addr;
+    target_ulong end;
 
     ASID = env->CP0_EntryHi & 0xFF;
     for (i = 0; i < env->tlb->nb_tlb; i++) {
@@ -1577,27 +1667,16 @@
         tag = env->CP0_EntryHi & ~mask;
         VPN = tlb->VPN & ~mask;
         /* Check ASID, virtual page number & size */
-        if ((tlb->G == 1 || tlb->ASID == ASID) && VPN == tag) {
+        if (unlikely((tlb->G == 1 || tlb->ASID == ASID) && VPN == tag)) {
             /* TLB match */
             env->CP0_Index = i;
             break;
         }
     }
     if (i == env->tlb->nb_tlb) {
-        /* No match.  Discard any shadow entries, if any of them match.  */
-        for (i = env->tlb->nb_tlb; i < env->tlb->tlb_in_use; i++) {
-            tlb = &env->tlb->mmu.r4k.tlb[i];
-            /* 1k pages are not supported. */
-            mask = tlb->PageMask | ~(TARGET_PAGE_MASK << 1);
-            tag = env->CP0_EntryHi & ~mask;
-            VPN = tlb->VPN & ~mask;
-            /* Check ASID, virtual page number & size */
-            if ((tlb->G == 1 || tlb->ASID == ASID) && VPN == tag) {
-                r4k_mips_tlb_flush_extra (env, i);
-                break;
-            }
-        }
-
+        /* No match.  Discard any shadow entries, if any of them match. */
+        int index = ((env->CP0_EntryHi>>5)&0x1ff00) | ASID;
+        index |= (env->CP0_EntryHi>>13)&0x20000;
         env->CP0_Index |= 0x80000000;
     }
 }
@@ -1606,17 +1685,16 @@
 {
     r4k_tlb_t *tlb;
     uint8_t ASID;
-    int idx;
 
     ASID = env->CP0_EntryHi & 0xFF;
-    idx = (env->CP0_Index & ~0x80000000) % env->tlb->nb_tlb;
-    tlb = &env->tlb->mmu.r4k.tlb[idx];
+    tlb = &env->tlb->mmu.r4k.tlb[env->CP0_Index % env->tlb->nb_tlb];
 
     /* If this will change the current ASID, flush qemu's TLB.  */
     if (ASID != tlb->ASID)
         cpu_mips_tlb_flush (env, 1);
 
-    r4k_mips_tlb_flush_extra(env, env->tlb->nb_tlb);
+    /*flush all the tlb cache */
+    cpu_mips_tlb_flush (env, 1);
 
     env->CP0_EntryHi = tlb->VPN | tlb->ASID;
     env->CP0_PageMask = tlb->PageMask;
diff --git a/target-mips/translate.c b/target-mips/translate.c
index 853aafd..27a8df7 100644
--- a/target-mips/translate.c
+++ b/target-mips/translate.c
@@ -8675,7 +8675,6 @@
     }
     env->active_tc.PC = (int32_t)0xBFC00000;
     env->CP0_Random = env->tlb->nb_tlb - 1;
-    env->tlb->tlb_in_use = env->tlb->nb_tlb;
     env->CP0_Wired = 0;
     /* SMP not implemented */
     env->CP0_EBase = 0x80000000;