From 7cd6f92d41a4bb364eb223f216334c0676eb7e97 Mon Sep 17 00:00:00 2001 From: Hans de Goede Date: Mon, 19 Jan 2015 08:44:07 +0100 Subject: [PATCH] sunxi: video: Use frontend for dma on sun4i to fix memory bandwidth problems Testing has shown that on sun4i the display backend engine does not have deep enough fifo-s causing flickering / tearing in full-hd mode due to fifo underruns. On sun4i use the display frontend engine to do the dma from memory, as the frontend does have deep enough fifo-s. As added advantage of this is that it results in much better memory bandwidth as it reduces the amount of dram bank switches, for more details see: http://ssvb.github.io/2014/11/11/revisiting-fullhd-x11-desktop-performance-of-the-allwinner-a10.html Note that this changes the pipeline searched for in the simplefb node, we can get away with doing this now, since no kernel has yet shipped with simplefb dtb nodes, and I will make sure to get a simplefb node with the new pipeline into 3.19 before it ships. Signed-off-by: Hans de Goede Acked-by: Ian Campbell --- arch/arm/include/asm/arch-sunxi/clock_sun4i.h | 4 + arch/arm/include/asm/arch-sunxi/display.h | 116 +++++++++++++++ drivers/video/sunxi_display.c | 134 +++++++++++++++++- 3 files changed, 250 insertions(+), 4 deletions(-) diff --git a/arch/arm/include/asm/arch-sunxi/clock_sun4i.h b/arch/arm/include/asm/arch-sunxi/clock_sun4i.h index 5ebf8564d4..05fbad3e11 100644 --- a/arch/arm/include/asm/arch-sunxi/clock_sun4i.h +++ b/arch/arm/include/asm/arch-sunxi/clock_sun4i.h @@ -186,6 +186,7 @@ struct sunxi_ccm_reg { /* ahb clock gate bit offset (second register) */ #define AHB_GATE_OFFSET_GMAC 17 +#define AHB_GATE_OFFSET_DE_FE0 14 #define AHB_GATE_OFFSET_DE_BE0 12 #define AHB_GATE_OFFSET_HDMI 11 #define AHB_GATE_OFFSET_LCD1 5 @@ -266,7 +267,10 @@ struct sunxi_ccm_reg { #define CCM_MMC_CTRL_PLL5 (0x2 << 24) #define CCM_MMC_CTRL_ENABLE (0x1 << 31) +#define CCM_DRAM_GATE_OFFSET_DE_FE1 24 /* Note the order of FE1 and */ +#define CCM_DRAM_GATE_OFFSET_DE_FE0 25 /* FE0 is swapped ! */ #define CCM_DRAM_GATE_OFFSET_DE_BE0 26 +#define CCM_DRAM_GATE_OFFSET_DE_BE1 27 #define CCM_LCD_CH0_CTRL_PLL3 (0 << 24) #define CCM_LCD_CH0_CTRL_PLL7 (1 << 24) diff --git a/arch/arm/include/asm/arch-sunxi/display.h b/arch/arm/include/asm/arch-sunxi/display.h index ff92a10c40..5e94253203 100644 --- a/arch/arm/include/asm/arch-sunxi/display.h +++ b/arch/arm/include/asm/arch-sunxi/display.h @@ -9,6 +9,107 @@ #ifndef _SUNXI_DISPLAY_H #define _SUNXI_DISPLAY_H +struct sunxi_de_fe_reg { + u32 enable; /* 0x000 */ + u32 frame_ctrl; /* 0x004 */ + u32 bypass; /* 0x008 */ + u32 algorithm_sel; /* 0x00c */ + u32 line_int_ctrl; /* 0x010 */ + u8 res0[0x0c]; /* 0x014 */ + u32 ch0_addr; /* 0x020 */ + u32 ch1_addr; /* 0x024 */ + u32 ch2_addr; /* 0x028 */ + u32 field_sequence; /* 0x02c */ + u32 ch0_offset; /* 0x030 */ + u32 ch1_offset; /* 0x034 */ + u32 ch2_offset; /* 0x038 */ + u8 res1[0x04]; /* 0x03c */ + u32 ch0_stride; /* 0x040 */ + u32 ch1_stride; /* 0x044 */ + u32 ch2_stride; /* 0x048 */ + u32 input_fmt; /* 0x04c */ + u32 ch3_addr; /* 0x050 */ + u32 ch4_addr; /* 0x054 */ + u32 ch5_addr; /* 0x058 */ + u32 output_fmt; /* 0x05c */ + u32 int_enable; /* 0x060 */ + u32 int_status; /* 0x064 */ + u32 status; /* 0x068 */ + u8 res2[0x04]; /* 0x06c */ + u32 csc_coef00; /* 0x070 */ + u32 csc_coef01; /* 0x074 */ + u32 csc_coef02; /* 0x078 */ + u32 csc_coef03; /* 0x07c */ + u32 csc_coef10; /* 0x080 */ + u32 csc_coef11; /* 0x084 */ + u32 csc_coef12; /* 0x088 */ + u32 csc_coef13; /* 0x08c */ + u32 csc_coef20; /* 0x090 */ + u32 csc_coef21; /* 0x094 */ + u32 csc_coef22; /* 0x098 */ + u32 csc_coef23; /* 0x09c */ + u32 deinterlace_ctrl; /* 0x0a0 */ + u32 deinterlace_diag; /* 0x0a4 */ + u32 deinterlace_tempdiff; /* 0x0a8 */ + u32 deinterlace_sawtooth; /* 0x0ac */ + u32 deinterlace_spatcomp; /* 0x0b0 */ + u32 deinterlace_burstlen; /* 0x0b4 */ + u32 deinterlace_preluma; /* 0x0b8 */ + u32 deinterlace_tile_addr; /* 0x0bc */ + u32 deinterlace_tile_stride; /* 0x0c0 */ + u8 res3[0x0c]; /* 0x0c4 */ + u32 wb_stride_enable; /* 0x0d0 */ + u32 ch3_stride; /* 0x0d4 */ + u32 ch4_stride; /* 0x0d8 */ + u32 ch5_stride; /* 0x0dc */ + u32 fe_3d_ctrl; /* 0x0e0 */ + u32 fe_3d_ch0_addr; /* 0x0e4 */ + u32 fe_3d_ch1_addr; /* 0x0e8 */ + u32 fe_3d_ch2_addr; /* 0x0ec */ + u32 fe_3d_ch0_offset; /* 0x0f0 */ + u32 fe_3d_ch1_offset; /* 0x0f4 */ + u32 fe_3d_ch2_offset; /* 0x0f8 */ + u8 res4[0x04]; /* 0x0fc */ + u32 ch0_insize; /* 0x100 */ + u32 ch0_outsize; /* 0x104 */ + u32 ch0_horzfact; /* 0x108 */ + u32 ch0_vertfact; /* 0x10c */ + u32 ch0_horzphase; /* 0x110 */ + u32 ch0_vertphase0; /* 0x114 */ + u32 ch0_vertphase1; /* 0x118 */ + u8 res5[0x04]; /* 0x11c */ + u32 ch0_horztapoffset0; /* 0x120 */ + u32 ch0_horztapoffset1; /* 0x124 */ + u32 ch0_verttapoffset; /* 0x128 */ + u8 res6[0xd4]; /* 0x12c */ + u32 ch1_insize; /* 0x200 */ + u32 ch1_outsize; /* 0x204 */ + u32 ch1_horzfact; /* 0x208 */ + u32 ch1_vertfact; /* 0x20c */ + u32 ch1_horzphase; /* 0x210 */ + u32 ch1_vertphase0; /* 0x214 */ + u32 ch1_vertphase1; /* 0x218 */ + u8 res7[0x04]; /* 0x21c */ + u32 ch1_horztapoffset0; /* 0x220 */ + u32 ch1_horztapoffset1; /* 0x224 */ + u32 ch1_verttapoffset; /* 0x228 */ + u8 res8[0x1d4]; /* 0x22c */ + u32 ch0_horzcoef0[32]; /* 0x400 */ + u32 ch0_horzcoef1[32]; /* 0x480 */ + u32 ch0_vertcoef[32]; /* 0x500 */ + u8 res9[0x80]; /* 0x580 */ + u32 ch1_horzcoef0[32]; /* 0x600 */ + u32 ch1_horzcoef1[32]; /* 0x680 */ + u32 ch1_vertcoef[32]; /* 0x700 */ + u8 res10[0x280]; /* 0x780 */ + u32 vpp_enable; /* 0xa00 */ + u32 vpp_dcti; /* 0xa04 */ + u32 vpp_lp1; /* 0xa08 */ + u32 vpp_lp2; /* 0xa0c */ + u32 vpp_wle; /* 0xa10 */ + u32 vpp_ble; /* 0xa14 */ +}; + struct sunxi_de_be_reg { u8 res0[0x800]; /* 0x000 */ u32 mode; /* 0x800 */ @@ -209,6 +310,20 @@ struct sunxi_tve_reg { u32 cfg2; /* 0x13c */ }; +/* + * DE-FE register constants. + */ +#define SUNXI_DE_FE_WIDTH(x) (((x) - 1) << 0) +#define SUNXI_DE_FE_HEIGHT(y) (((y) - 1) << 16) +#define SUNXI_DE_FE_FACTOR_INT(n) ((n) << 16) +#define SUNXI_DE_FE_ENABLE_EN (1 << 0) +#define SUNXI_DE_FE_FRAME_CTRL_REG_RDY (1 << 0) +#define SUNXI_DE_FE_FRAME_CTRL_COEF_RDY (1 << 1) +#define SUNXI_DE_FE_FRAME_CTRL_FRM_START (1 << 16) +#define SUNXI_DE_FE_BYPASS_CSC_BYPASS (1 << 1) +#define SUNXI_DE_FE_INPUT_FMT_ARGB8888 0x00000151 +#define SUNXI_DE_FE_OUTPUT_FMT_ARGB8888 0x00000002 + /* * DE-BE register constants. */ @@ -219,6 +334,7 @@ struct sunxi_tve_reg { #define SUNXI_DE_BE_MODE_LAYER0_ENABLE (1 << 8) #define SUNXI_DE_BE_LAYER_STRIDE(x) ((x) << 5) #define SUNXI_DE_BE_REG_CTRL_LOAD_REGS (1 << 0) +#define SUNXI_DE_BE_LAYER_ATTR0_SRC_FE0 0x00000002 #define SUNXI_DE_BE_LAYER_ATTR1_FMT_XRGB8888 (0x09 << 8) /* diff --git a/drivers/video/sunxi_display.c b/drivers/video/sunxi_display.c index 0505f3c963..a6e3778ffe 100644 --- a/drivers/video/sunxi_display.c +++ b/drivers/video/sunxi_display.c @@ -271,6 +271,114 @@ static int sunxi_hdmi_edid_get_mode(struct ctfb_res_modes *mode) #endif /* CONFIG_VIDEO_HDMI */ +#ifdef CONFIG_MACH_SUN4I +/* + * Testing has shown that on sun4i the display backend engine does not have + * deep enough fifo-s causing flickering / tearing in full-hd mode due to + * fifo underruns. So on sun4i we use the display frontend engine to do the + * dma from memory, as the frontend does have deep enough fifo-s. + */ + +static const u32 sun4i_vert_coef[32] = { + 0x00004000, 0x000140ff, 0x00033ffe, 0x00043ffd, + 0x00063efc, 0xff083dfc, 0x000a3bfb, 0xff0d39fb, + 0xff0f37fb, 0xff1136fa, 0xfe1433fb, 0xfe1631fb, + 0xfd192ffb, 0xfd1c2cfb, 0xfd1f29fb, 0xfc2127fc, + 0xfc2424fc, 0xfc2721fc, 0xfb291ffd, 0xfb2c1cfd, + 0xfb2f19fd, 0xfb3116fe, 0xfb3314fe, 0xfa3611ff, + 0xfb370fff, 0xfb390dff, 0xfb3b0a00, 0xfc3d08ff, + 0xfc3e0600, 0xfd3f0400, 0xfe3f0300, 0xff400100, +}; + +static const u32 sun4i_horz_coef[64] = { + 0x40000000, 0x00000000, 0x40fe0000, 0x0000ff03, + 0x3ffd0000, 0x0000ff05, 0x3ffc0000, 0x0000ff06, + 0x3efb0000, 0x0000ff08, 0x3dfb0000, 0x0000ff09, + 0x3bfa0000, 0x0000fe0d, 0x39fa0000, 0x0000fe0f, + 0x38fa0000, 0x0000fe10, 0x36fa0000, 0x0000fe12, + 0x33fa0000, 0x0000fd16, 0x31fa0000, 0x0000fd18, + 0x2ffa0000, 0x0000fd1a, 0x2cfa0000, 0x0000fc1e, + 0x29fa0000, 0x0000fc21, 0x27fb0000, 0x0000fb23, + 0x24fb0000, 0x0000fb26, 0x21fb0000, 0x0000fb29, + 0x1ffc0000, 0x0000fa2b, 0x1cfc0000, 0x0000fa2e, + 0x19fd0000, 0x0000fa30, 0x16fd0000, 0x0000fa33, + 0x14fd0000, 0x0000fa35, 0x11fe0000, 0x0000fa37, + 0x0ffe0000, 0x0000fa39, 0x0dfe0000, 0x0000fa3b, + 0x0afe0000, 0x0000fa3e, 0x08ff0000, 0x0000fb3e, + 0x06ff0000, 0x0000fb40, 0x05ff0000, 0x0000fc40, + 0x03ff0000, 0x0000fd41, 0x01ff0000, 0x0000fe42, +}; + +static void sunxi_frontend_init(void) +{ + struct sunxi_ccm_reg * const ccm = + (struct sunxi_ccm_reg *)SUNXI_CCM_BASE; + struct sunxi_de_fe_reg * const de_fe = + (struct sunxi_de_fe_reg *)SUNXI_DE_FE0_BASE; + int i; + + /* Clocks on */ + setbits_le32(&ccm->ahb_gate1, 1 << AHB_GATE_OFFSET_DE_FE0); + setbits_le32(&ccm->dram_clk_gate, 1 << CCM_DRAM_GATE_OFFSET_DE_FE0); + clock_set_de_mod_clock(&ccm->fe0_clk_cfg, 300000000); + + setbits_le32(&de_fe->enable, SUNXI_DE_FE_ENABLE_EN); + + for (i = 0; i < 32; i++) { + writel(sun4i_horz_coef[2 * i], &de_fe->ch0_horzcoef0[i]); + writel(sun4i_horz_coef[2 * i + 1], &de_fe->ch0_horzcoef1[i]); + writel(sun4i_vert_coef[i], &de_fe->ch0_vertcoef[i]); + writel(sun4i_horz_coef[2 * i], &de_fe->ch1_horzcoef0[i]); + writel(sun4i_horz_coef[2 * i + 1], &de_fe->ch1_horzcoef1[i]); + writel(sun4i_vert_coef[i], &de_fe->ch1_vertcoef[i]); + } + + setbits_le32(&de_fe->frame_ctrl, SUNXI_DE_FE_FRAME_CTRL_COEF_RDY); +} + +static void sunxi_frontend_mode_set(const struct ctfb_res_modes *mode, + unsigned int address) +{ + struct sunxi_de_fe_reg * const de_fe = + (struct sunxi_de_fe_reg *)SUNXI_DE_FE0_BASE; + + setbits_le32(&de_fe->bypass, SUNXI_DE_FE_BYPASS_CSC_BYPASS); + writel(CONFIG_SYS_SDRAM_BASE + address, &de_fe->ch0_addr); + writel(mode->xres * 4, &de_fe->ch0_stride); + writel(SUNXI_DE_FE_INPUT_FMT_ARGB8888, &de_fe->input_fmt); + writel(SUNXI_DE_FE_OUTPUT_FMT_ARGB8888, &de_fe->output_fmt); + + writel(SUNXI_DE_FE_HEIGHT(mode->yres) | SUNXI_DE_FE_WIDTH(mode->xres), + &de_fe->ch0_insize); + writel(SUNXI_DE_FE_HEIGHT(mode->yres) | SUNXI_DE_FE_WIDTH(mode->xres), + &de_fe->ch0_outsize); + writel(SUNXI_DE_FE_FACTOR_INT(1), &de_fe->ch0_horzfact); + writel(SUNXI_DE_FE_FACTOR_INT(1), &de_fe->ch0_vertfact); + + writel(SUNXI_DE_FE_HEIGHT(mode->yres) | SUNXI_DE_FE_WIDTH(mode->xres), + &de_fe->ch1_insize); + writel(SUNXI_DE_FE_HEIGHT(mode->yres) | SUNXI_DE_FE_WIDTH(mode->xres), + &de_fe->ch1_outsize); + writel(SUNXI_DE_FE_FACTOR_INT(1), &de_fe->ch1_horzfact); + writel(SUNXI_DE_FE_FACTOR_INT(1), &de_fe->ch1_vertfact); + + setbits_le32(&de_fe->frame_ctrl, SUNXI_DE_FE_FRAME_CTRL_REG_RDY); +} + +static void sunxi_frontend_enable(void) +{ + struct sunxi_de_fe_reg * const de_fe = + (struct sunxi_de_fe_reg *)SUNXI_DE_FE0_BASE; + + setbits_le32(&de_fe->frame_ctrl, SUNXI_DE_FE_FRAME_CTRL_FRM_START); +} +#else +static void sunxi_frontend_init(void) {} +static void sunxi_frontend_mode_set(const struct ctfb_res_modes *mode, + unsigned int address) {} +static void sunxi_frontend_enable(void) {} +#endif + /* * This is the entity that mixes and matches the different layers and inputs. * Allwinner calls it the back-end, but i like composer better. @@ -283,6 +391,8 @@ static void sunxi_composer_init(void) (struct sunxi_de_be_reg *)SUNXI_DE_BE0_BASE; int i; + sunxi_frontend_init(); + #if defined CONFIG_MACH_SUN6I || defined CONFIG_MACH_SUN8I /* Reset off */ setbits_le32(&ccm->ahb_reset1_cfg, 1 << AHB_RESET_OFFSET_DE_BE0); @@ -290,7 +400,9 @@ static void sunxi_composer_init(void) /* Clocks on */ setbits_le32(&ccm->ahb_gate1, 1 << AHB_GATE_OFFSET_DE_BE0); +#ifndef CONFIG_MACH_SUN4I /* On sun4i the frontend does the dma */ setbits_le32(&ccm->dram_clk_gate, 1 << CCM_DRAM_GATE_OFFSET_DE_BE0); +#endif clock_set_de_mod_clock(&ccm->be0_clk_cfg, 300000000); /* Engine bug, clear registers after reset */ @@ -306,13 +418,19 @@ static void sunxi_composer_mode_set(const struct ctfb_res_modes *mode, struct sunxi_de_be_reg * const de_be = (struct sunxi_de_be_reg *)SUNXI_DE_BE0_BASE; + sunxi_frontend_mode_set(mode, address); + writel(SUNXI_DE_BE_HEIGHT(mode->yres) | SUNXI_DE_BE_WIDTH(mode->xres), &de_be->disp_size); writel(SUNXI_DE_BE_HEIGHT(mode->yres) | SUNXI_DE_BE_WIDTH(mode->xres), &de_be->layer0_size); +#ifndef CONFIG_MACH_SUN4I /* On sun4i the frontend does the dma */ writel(SUNXI_DE_BE_LAYER_STRIDE(mode->xres), &de_be->layer0_stride); writel(address << 3, &de_be->layer0_addr_low32b); writel(address >> 29, &de_be->layer0_addr_high4b); +#else + writel(SUNXI_DE_BE_LAYER_ATTR0_SRC_FE0, &de_be->layer0_attr0_ctrl); +#endif writel(SUNXI_DE_BE_LAYER_ATTR1_FMT_XRGB8888, &de_be->layer0_attr1_ctrl); setbits_le32(&de_be->mode, SUNXI_DE_BE_MODE_LAYER0_ENABLE); @@ -323,6 +441,8 @@ static void sunxi_composer_enable(void) struct sunxi_de_be_reg * const de_be = (struct sunxi_de_be_reg *)SUNXI_DE_BE0_BASE; + sunxi_frontend_enable(); + setbits_le32(&de_be->reg_ctrl, SUNXI_DE_BE_REG_CTRL_LOAD_REGS); setbits_le32(&de_be->mode, SUNXI_DE_BE_MODE_START); } @@ -1060,21 +1180,27 @@ int sunxi_simplefb_setup(void *blob) int offset, ret; const char *pipeline = NULL; +#ifdef CONFIG_MACH_SUN4I +#define PIPELINE_PREFIX "de_fe0-" +#else +#define PIPELINE_PREFIX +#endif + switch (sunxi_display.monitor) { case sunxi_monitor_none: return 0; case sunxi_monitor_dvi: case sunxi_monitor_hdmi: - pipeline = "de_be0-lcd0-hdmi"; + pipeline = PIPELINE_PREFIX "de_be0-lcd0-hdmi"; break; case sunxi_monitor_lcd: - pipeline = "de_be0-lcd0"; + pipeline = PIPELINE_PREFIX "de_be0-lcd0"; break; case sunxi_monitor_vga: #ifdef CONFIG_VIDEO_VGA - pipeline = "de_be0-lcd0-tve0"; + pipeline = PIPELINE_PREFIX "de_be0-lcd0-tve0"; #elif defined CONFIG_VIDEO_VGA_VIA_LCD - pipeline = "de_be0-lcd0"; + pipeline = PIPELINE_PREFIX "de_be0-lcd0"; #endif break; } -- 2.39.5