From 41cada4d2499705b321ab650891e76088d330a37 Mon Sep 17 00:00:00 2001 From: Eugeniy Paltsev Date: Tue, 16 Jan 2018 19:20:26 +0300 Subject: [PATCH] ARC: ARCv2: Cache: Fixed operation without IOC Previous SLC management implementation is broken. Seems like it was never sufficiently tested probably because most of the time IOC was used instead (i.e. no manual cache operations were done). Now if we disable IOC in U-boot we'll get a lot of errors while using DMA-enabled peripherals. This time we fix it by substitution of broken per-line SLC operations region operations as it is done in the Linux kernel (we took it from v4.14 which is the latest stable as of today). Among other things this implementation might be a bit faster because instead of iteration over each and every cache line we're taking care about entire region in one go. Main changes: * Replaced __slc_line_op (per line operations) by __slc_rgn_op (region operations). * Reworked __slc_entire_op to get rid of __after_slc_op and __before_slc_op functions. Note flush fix (flush only instead of flush-n-inv when OP_FLUSH is used, see [1] for more details) is already incorporated here. * Added SLC invalidation to invalidate_icache_all(). * Added (start >= end) check to invalidate_dcache_range() and flush_dcache_range() as some buggy drivers pass region start == end. * Added read-out of MMU BCR so we may know if PAE40 exists in HW and then act on a particular AUX regs accordingly. [1] http://lists.infradead.org/pipermail/linux-snps-arc/2018-January/003357.html Signed-off-by: Eugeniy Paltsev Signed-off-by: Alexey Brodkin --- arch/arc/include/asm/arcregs.h | 7 ++ arch/arc/lib/cache.c | 185 +++++++++++++++++++++------------ 2 files changed, 123 insertions(+), 69 deletions(-) diff --git a/arch/arc/include/asm/arcregs.h b/arch/arc/include/asm/arcregs.h index ba1f7bac77..67f416305d 100644 --- a/arch/arc/include/asm/arcregs.h +++ b/arch/arc/include/asm/arcregs.h @@ -63,8 +63,15 @@ #define ARC_AUX_SLC_INVALIDATE 0x905 #define ARC_AUX_SLC_IVDL 0x910 #define ARC_AUX_SLC_FLDL 0x912 +#define ARC_AUX_SLC_RGN_START 0x914 +#define ARC_AUX_SLC_RGN_START1 0x915 +#define ARC_AUX_SLC_RGN_END 0x916 +#define ARC_AUX_SLC_RGN_END1 0x917 #define ARC_BCR_CLUSTER 0xcf +/* MMU Management regs */ +#define ARC_AUX_MMU_BCR 0x06f + /* IO coherency related auxiliary registers */ #define ARC_AUX_IO_COH_ENABLE 0x500 #define ARC_AUX_IO_COH_PARTIAL 0x501 diff --git a/arch/arc/lib/cache.c b/arch/arc/lib/cache.c index 1073e1570f..a6bbe3ce5d 100644 --- a/arch/arc/lib/cache.c +++ b/arch/arc/lib/cache.c @@ -20,12 +20,17 @@ #define DC_CTRL_INV_MODE_FLUSH (1 << 6) #define DC_CTRL_FLUSH_STATUS (1 << 8) #define CACHE_VER_NUM_MASK 0xF -#define SLC_CTRL_SB (1 << 2) #define OP_INV 0x1 #define OP_FLUSH 0x2 #define OP_INV_IC 0x3 +/* Bit val in SLC_CONTROL */ +#define SLC_CTRL_DIS 0x001 +#define SLC_CTRL_IM 0x040 +#define SLC_CTRL_BUSY 0x100 +#define SLC_CTRL_RGN_OP_INV 0x200 + /* * By default that variable will fall into .bss section. * But .bss section is not relocated and so it will be initilized before @@ -41,88 +46,115 @@ bool icache_exists __section(".data") = false; int slc_line_sz __section(".data"); bool slc_exists __section(".data") = false; bool ioc_exists __section(".data") = false; +bool pae_exists __section(".data") = false; -static unsigned int __before_slc_op(const int op) +void read_decode_mmu_bcr(void) { - unsigned int reg = reg; + /* TODO: should we compare mmu version from BCR and from CONFIG? */ +#if (CONFIG_ARC_MMU_VER >= 4) + u32 tmp; - if (op == OP_INV) { - /* - * IM is set by default and implies Flush-n-inv - * Clear it here for vanilla inv - */ - reg = read_aux_reg(ARC_AUX_SLC_CTRL); - write_aux_reg(ARC_AUX_SLC_CTRL, reg & ~DC_CTRL_INV_MODE_FLUSH); - } + tmp = read_aux_reg(ARC_AUX_MMU_BCR); - return reg; -} + struct bcr_mmu_4 { +#ifdef CONFIG_CPU_BIG_ENDIAN + unsigned int ver:8, sasid:1, sz1:4, sz0:4, res:2, pae:1, + n_ways:2, n_entry:2, n_super:2, u_itlb:3, u_dtlb:3; +#else + /* DTLB ITLB JES JE JA */ + unsigned int u_dtlb:3, u_itlb:3, n_super:2, n_entry:2, n_ways:2, + pae:1, res:2, sz0:4, sz1:4, sasid:1, ver:8; +#endif /* CONFIG_CPU_BIG_ENDIAN */ + } *mmu4; -static void __after_slc_op(const int op, unsigned int reg) -{ - if (op & OP_FLUSH) { /* flush / flush-n-inv both wait */ - /* - * Make sure "busy" bit reports correct status, - * see STAR 9001165532 - */ - read_aux_reg(ARC_AUX_SLC_CTRL); - while (read_aux_reg(ARC_AUX_SLC_CTRL) & - DC_CTRL_FLUSH_STATUS) - ; - } + mmu4 = (struct bcr_mmu_4 *)&tmp; - /* Switch back to default Invalidate mode */ - if (op == OP_INV) - write_aux_reg(ARC_AUX_SLC_CTRL, reg | DC_CTRL_INV_MODE_FLUSH); + pae_exists = !!mmu4->pae; +#endif /* (CONFIG_ARC_MMU_VER >= 4) */ } -static inline void __slc_line_loop(unsigned long paddr, unsigned long sz, - const int op) +static void __slc_entire_op(const int op) { - unsigned int aux_cmd; - int num_lines; + unsigned int ctrl; + + ctrl = read_aux_reg(ARC_AUX_SLC_CTRL); -#define SLC_LINE_MASK (~(slc_line_sz - 1)) + if (!(op & OP_FLUSH)) /* i.e. OP_INV */ + ctrl &= ~SLC_CTRL_IM; /* clear IM: Disable flush before Inv */ + else + ctrl |= SLC_CTRL_IM; - aux_cmd = op & OP_INV ? ARC_AUX_SLC_IVDL : ARC_AUX_SLC_FLDL; + write_aux_reg(ARC_AUX_SLC_CTRL, ctrl); - sz += paddr & ~SLC_LINE_MASK; - paddr &= SLC_LINE_MASK; + if (op & OP_INV) /* Inv or flush-n-inv use same cmd reg */ + write_aux_reg(ARC_AUX_SLC_INVALIDATE, 0x1); + else + write_aux_reg(ARC_AUX_SLC_FLUSH, 0x1); - num_lines = DIV_ROUND_UP(sz, slc_line_sz); + /* Make sure "busy" bit reports correct stataus, see STAR 9001165532 */ + read_aux_reg(ARC_AUX_SLC_CTRL); - while (num_lines-- > 0) { - write_aux_reg(aux_cmd, paddr); - paddr += slc_line_sz; - } + /* Important to wait for flush to complete */ + while (read_aux_reg(ARC_AUX_SLC_CTRL) & SLC_CTRL_BUSY); } -static inline void __slc_entire_op(const int cacheop) +static void slc_upper_region_init(void) { - int aux; - unsigned int ctrl_reg = __before_slc_op(cacheop); + /* + * ARC_AUX_SLC_RGN_END1 and ARC_AUX_SLC_RGN_START1 are always == 0 + * as we don't use PAE40. + */ + write_aux_reg(ARC_AUX_SLC_RGN_END1, 0); + write_aux_reg(ARC_AUX_SLC_RGN_START1, 0); +} - if (cacheop & OP_INV) /* Inv or flush-n-inv use same cmd reg */ - aux = ARC_AUX_SLC_INVALIDATE; +static void __slc_rgn_op(unsigned long paddr, unsigned long sz, const int op) +{ + unsigned int ctrl; + unsigned long end; + + /* + * The Region Flush operation is specified by CTRL.RGN_OP[11..9] + * - b'000 (default) is Flush, + * - b'001 is Invalidate if CTRL.IM == 0 + * - b'001 is Flush-n-Invalidate if CTRL.IM == 1 + */ + ctrl = read_aux_reg(ARC_AUX_SLC_CTRL); + + /* Don't rely on default value of IM bit */ + if (!(op & OP_FLUSH)) /* i.e. OP_INV */ + ctrl &= ~SLC_CTRL_IM; /* clear IM: Disable flush before Inv */ else - aux = ARC_AUX_SLC_FLUSH; + ctrl |= SLC_CTRL_IM; - write_aux_reg(aux, 0x1); + if (op & OP_INV) + ctrl |= SLC_CTRL_RGN_OP_INV; /* Inv or flush-n-inv */ + else + ctrl &= ~SLC_CTRL_RGN_OP_INV; - __after_slc_op(cacheop, ctrl_reg); -} + write_aux_reg(ARC_AUX_SLC_CTRL, ctrl); -static inline void __slc_line_op(unsigned long paddr, unsigned long sz, - const int cacheop) -{ - unsigned int ctrl_reg = __before_slc_op(cacheop); - __slc_line_loop(paddr, sz, cacheop); - __after_slc_op(cacheop, ctrl_reg); + /* + * Lower bits are ignored, no need to clip + * END needs to be setup before START (latter triggers the operation) + * END can't be same as START, so add (l2_line_sz - 1) to sz + */ + end = paddr + sz + slc_line_sz - 1; + + /* + * Upper addresses (ARC_AUX_SLC_RGN_END1 and ARC_AUX_SLC_RGN_START1) + * are always == 0 as we don't use PAE40, so we only setup lower ones + * (ARC_AUX_SLC_RGN_END and ARC_AUX_SLC_RGN_START) + */ + write_aux_reg(ARC_AUX_SLC_RGN_END, end); + write_aux_reg(ARC_AUX_SLC_RGN_START, paddr); + + /* Make sure "busy" bit reports correct stataus, see STAR 9001165532 */ + read_aux_reg(ARC_AUX_SLC_CTRL); + + while (read_aux_reg(ARC_AUX_SLC_CTRL) & SLC_CTRL_BUSY); } -#else -#define __slc_entire_op(cacheop) -#define __slc_line_op(paddr, sz, cacheop) -#endif +#endif /* CONFIG_ISA_ARCV2 */ #ifdef CONFIG_ISA_ARCV2 static void read_decode_cache_bcr_arcv2(void) @@ -244,7 +276,17 @@ void cache_init(void) write_aux_reg(ARC_AUX_IO_COH_ENABLE, 1); } -#endif + + read_decode_mmu_bcr(); + + /* + * ARC_AUX_SLC_RGN_START1 and ARC_AUX_SLC_RGN_END1 register exist + * only if PAE exists in current HW. So we had to check pae_exist + * before using them. + */ + if (slc_exists && pae_exists) + slc_upper_region_init(); +#endif /* CONFIG_ISA_ARCV2 */ } int icache_status(void) @@ -272,7 +314,6 @@ void icache_disable(void) IC_CTRL_CACHE_DISABLE); } -#ifndef CONFIG_SYS_DCACHE_OFF void invalidate_icache_all(void) { /* Any write to IC_IVIC register triggers invalidation of entire I$ */ @@ -287,12 +328,12 @@ void invalidate_icache_all(void) __builtin_arc_nop(); read_aux_reg(ARC_AUX_IC_CTRL); /* blocks */ } -} -#else -void invalidate_icache_all(void) -{ -} + +#ifdef CONFIG_ISA_ARCV2 + if (slc_exists) + __slc_entire_op(OP_INV); #endif +} int dcache_status(void) { @@ -419,6 +460,9 @@ static inline void __dc_line_op(unsigned long paddr, unsigned long sz, void invalidate_dcache_range(unsigned long start, unsigned long end) { + if (start >= end) + return; + #ifdef CONFIG_ISA_ARCV2 if (!ioc_exists) #endif @@ -426,12 +470,15 @@ void invalidate_dcache_range(unsigned long start, unsigned long end) #ifdef CONFIG_ISA_ARCV2 if (slc_exists && !ioc_exists) - __slc_line_op(start, end - start, OP_INV); + __slc_rgn_op(start, end - start, OP_INV); #endif } void flush_dcache_range(unsigned long start, unsigned long end) { + if (start >= end) + return; + #ifdef CONFIG_ISA_ARCV2 if (!ioc_exists) #endif @@ -439,7 +486,7 @@ void flush_dcache_range(unsigned long start, unsigned long end) #ifdef CONFIG_ISA_ARCV2 if (slc_exists && !ioc_exists) - __slc_line_op(start, end - start, OP_FLUSH); + __slc_rgn_op(start, end - start, OP_FLUSH); #endif } -- 2.39.5