#ifndef _SUNXI_DISPLAY_H
 #define _SUNXI_DISPLAY_H
 
+struct sunxi_de_fe_reg {
+       u32 enable;                     /* 0x000 */
+       u32 frame_ctrl;                 /* 0x004 */
+       u32 bypass;                     /* 0x008 */
+       u32 algorithm_sel;              /* 0x00c */
+       u32 line_int_ctrl;              /* 0x010 */
+       u8 res0[0x0c];                  /* 0x014 */
+       u32 ch0_addr;                   /* 0x020 */
+       u32 ch1_addr;                   /* 0x024 */
+       u32 ch2_addr;                   /* 0x028 */
+       u32 field_sequence;             /* 0x02c */
+       u32 ch0_offset;                 /* 0x030 */
+       u32 ch1_offset;                 /* 0x034 */
+       u32 ch2_offset;                 /* 0x038 */
+       u8 res1[0x04];                  /* 0x03c */
+       u32 ch0_stride;                 /* 0x040 */
+       u32 ch1_stride;                 /* 0x044 */
+       u32 ch2_stride;                 /* 0x048 */
+       u32 input_fmt;                  /* 0x04c */
+       u32 ch3_addr;                   /* 0x050 */
+       u32 ch4_addr;                   /* 0x054 */
+       u32 ch5_addr;                   /* 0x058 */
+       u32 output_fmt;                 /* 0x05c */
+       u32 int_enable;                 /* 0x060 */
+       u32 int_status;                 /* 0x064 */
+       u32 status;                     /* 0x068 */
+       u8 res2[0x04];                  /* 0x06c */
+       u32 csc_coef00;                 /* 0x070 */
+       u32 csc_coef01;                 /* 0x074 */
+       u32 csc_coef02;                 /* 0x078 */
+       u32 csc_coef03;                 /* 0x07c */
+       u32 csc_coef10;                 /* 0x080 */
+       u32 csc_coef11;                 /* 0x084 */
+       u32 csc_coef12;                 /* 0x088 */
+       u32 csc_coef13;                 /* 0x08c */
+       u32 csc_coef20;                 /* 0x090 */
+       u32 csc_coef21;                 /* 0x094 */
+       u32 csc_coef22;                 /* 0x098 */
+       u32 csc_coef23;                 /* 0x09c */
+       u32 deinterlace_ctrl;           /* 0x0a0 */
+       u32 deinterlace_diag;           /* 0x0a4 */
+       u32 deinterlace_tempdiff;       /* 0x0a8 */
+       u32 deinterlace_sawtooth;       /* 0x0ac */
+       u32 deinterlace_spatcomp;       /* 0x0b0 */
+       u32 deinterlace_burstlen;       /* 0x0b4 */
+       u32 deinterlace_preluma;        /* 0x0b8 */
+       u32 deinterlace_tile_addr;      /* 0x0bc */
+       u32 deinterlace_tile_stride;    /* 0x0c0 */
+       u8 res3[0x0c];                  /* 0x0c4 */
+       u32 wb_stride_enable;           /* 0x0d0 */
+       u32 ch3_stride;                 /* 0x0d4 */
+       u32 ch4_stride;                 /* 0x0d8 */
+       u32 ch5_stride;                 /* 0x0dc */
+       u32 fe_3d_ctrl;                 /* 0x0e0 */
+       u32 fe_3d_ch0_addr;             /* 0x0e4 */
+       u32 fe_3d_ch1_addr;             /* 0x0e8 */
+       u32 fe_3d_ch2_addr;             /* 0x0ec */
+       u32 fe_3d_ch0_offset;           /* 0x0f0 */
+       u32 fe_3d_ch1_offset;           /* 0x0f4 */
+       u32 fe_3d_ch2_offset;           /* 0x0f8 */
+       u8 res4[0x04];                  /* 0x0fc */
+       u32 ch0_insize;                 /* 0x100 */
+       u32 ch0_outsize;                /* 0x104 */
+       u32 ch0_horzfact;               /* 0x108 */
+       u32 ch0_vertfact;               /* 0x10c */
+       u32 ch0_horzphase;              /* 0x110 */
+       u32 ch0_vertphase0;             /* 0x114 */
+       u32 ch0_vertphase1;             /* 0x118 */
+       u8 res5[0x04];                  /* 0x11c */
+       u32 ch0_horztapoffset0;         /* 0x120 */
+       u32 ch0_horztapoffset1;         /* 0x124 */
+       u32 ch0_verttapoffset;          /* 0x128 */
+       u8 res6[0xd4];                  /* 0x12c */
+       u32 ch1_insize;                 /* 0x200 */
+       u32 ch1_outsize;                /* 0x204 */
+       u32 ch1_horzfact;               /* 0x208 */
+       u32 ch1_vertfact;               /* 0x20c */
+       u32 ch1_horzphase;              /* 0x210 */
+       u32 ch1_vertphase0;             /* 0x214 */
+       u32 ch1_vertphase1;             /* 0x218 */
+       u8 res7[0x04];                  /* 0x21c */
+       u32 ch1_horztapoffset0;         /* 0x220 */
+       u32 ch1_horztapoffset1;         /* 0x224 */
+       u32 ch1_verttapoffset;          /* 0x228 */
+       u8 res8[0x1d4];                 /* 0x22c */
+       u32 ch0_horzcoef0[32];          /* 0x400 */
+       u32 ch0_horzcoef1[32];          /* 0x480 */
+       u32 ch0_vertcoef[32];           /* 0x500 */
+       u8 res9[0x80];                  /* 0x580 */
+       u32 ch1_horzcoef0[32];          /* 0x600 */
+       u32 ch1_horzcoef1[32];          /* 0x680 */
+       u32 ch1_vertcoef[32];           /* 0x700 */
+       u8 res10[0x280];                /* 0x780 */
+       u32 vpp_enable;                 /* 0xa00 */
+       u32 vpp_dcti;                   /* 0xa04 */
+       u32 vpp_lp1;                    /* 0xa08 */
+       u32 vpp_lp2;                    /* 0xa0c */
+       u32 vpp_wle;                    /* 0xa10 */
+       u32 vpp_ble;                    /* 0xa14 */
+};
+
 struct sunxi_de_be_reg {
        u8 res0[0x800];                 /* 0x000 */
        u32 mode;                       /* 0x800 */
        u32 cfg2;                       /* 0x13c */
 };
 
+/*
+ * DE-FE register constants.
+ */
+#define SUNXI_DE_FE_WIDTH(x)                   (((x) - 1) << 0)
+#define SUNXI_DE_FE_HEIGHT(y)                  (((y) - 1) << 16)
+#define SUNXI_DE_FE_FACTOR_INT(n)              ((n) << 16)
+#define SUNXI_DE_FE_ENABLE_EN                  (1 << 0)
+#define SUNXI_DE_FE_FRAME_CTRL_REG_RDY         (1 << 0)
+#define SUNXI_DE_FE_FRAME_CTRL_COEF_RDY                (1 << 1)
+#define SUNXI_DE_FE_FRAME_CTRL_FRM_START       (1 << 16)
+#define SUNXI_DE_FE_BYPASS_CSC_BYPASS          (1 << 1)
+#define SUNXI_DE_FE_INPUT_FMT_ARGB8888         0x00000151
+#define SUNXI_DE_FE_OUTPUT_FMT_ARGB8888                0x00000002
+
 /*
  * DE-BE register constants.
  */
 #define SUNXI_DE_BE_MODE_LAYER0_ENABLE         (1 << 8)
 #define SUNXI_DE_BE_LAYER_STRIDE(x)            ((x) << 5)
 #define SUNXI_DE_BE_REG_CTRL_LOAD_REGS         (1 << 0)
+#define SUNXI_DE_BE_LAYER_ATTR0_SRC_FE0                0x00000002
 #define SUNXI_DE_BE_LAYER_ATTR1_FMT_XRGB8888   (0x09 << 8)
 
 /*
 
 
 #endif /* CONFIG_VIDEO_HDMI */
 
+#ifdef CONFIG_MACH_SUN4I
+/*
+ * Testing has shown that on sun4i the display backend engine does not have
+ * deep enough fifo-s causing flickering / tearing in full-hd mode due to
+ * fifo underruns. So on sun4i we use the display frontend engine to do the
+ * dma from memory, as the frontend does have deep enough fifo-s.
+ */
+
+static const u32 sun4i_vert_coef[32] = {
+       0x00004000, 0x000140ff, 0x00033ffe, 0x00043ffd,
+       0x00063efc, 0xff083dfc, 0x000a3bfb, 0xff0d39fb,
+       0xff0f37fb, 0xff1136fa, 0xfe1433fb, 0xfe1631fb,
+       0xfd192ffb, 0xfd1c2cfb, 0xfd1f29fb, 0xfc2127fc,
+       0xfc2424fc, 0xfc2721fc, 0xfb291ffd, 0xfb2c1cfd,
+       0xfb2f19fd, 0xfb3116fe, 0xfb3314fe, 0xfa3611ff,
+       0xfb370fff, 0xfb390dff, 0xfb3b0a00, 0xfc3d08ff,
+       0xfc3e0600, 0xfd3f0400, 0xfe3f0300, 0xff400100,
+};
+
+static const u32 sun4i_horz_coef[64] = {
+       0x40000000, 0x00000000, 0x40fe0000, 0x0000ff03,
+       0x3ffd0000, 0x0000ff05, 0x3ffc0000, 0x0000ff06,
+       0x3efb0000, 0x0000ff08, 0x3dfb0000, 0x0000ff09,
+       0x3bfa0000, 0x0000fe0d, 0x39fa0000, 0x0000fe0f,
+       0x38fa0000, 0x0000fe10, 0x36fa0000, 0x0000fe12,
+       0x33fa0000, 0x0000fd16, 0x31fa0000, 0x0000fd18,
+       0x2ffa0000, 0x0000fd1a, 0x2cfa0000, 0x0000fc1e,
+       0x29fa0000, 0x0000fc21, 0x27fb0000, 0x0000fb23,
+       0x24fb0000, 0x0000fb26, 0x21fb0000, 0x0000fb29,
+       0x1ffc0000, 0x0000fa2b, 0x1cfc0000, 0x0000fa2e,
+       0x19fd0000, 0x0000fa30, 0x16fd0000, 0x0000fa33,
+       0x14fd0000, 0x0000fa35, 0x11fe0000, 0x0000fa37,
+       0x0ffe0000, 0x0000fa39, 0x0dfe0000, 0x0000fa3b,
+       0x0afe0000, 0x0000fa3e, 0x08ff0000, 0x0000fb3e,
+       0x06ff0000, 0x0000fb40, 0x05ff0000, 0x0000fc40,
+       0x03ff0000, 0x0000fd41, 0x01ff0000, 0x0000fe42,
+};
+
+static void sunxi_frontend_init(void)
+{
+       struct sunxi_ccm_reg * const ccm =
+               (struct sunxi_ccm_reg *)SUNXI_CCM_BASE;
+       struct sunxi_de_fe_reg * const de_fe =
+               (struct sunxi_de_fe_reg *)SUNXI_DE_FE0_BASE;
+       int i;
+
+       /* Clocks on */
+       setbits_le32(&ccm->ahb_gate1, 1 << AHB_GATE_OFFSET_DE_FE0);
+       setbits_le32(&ccm->dram_clk_gate, 1 << CCM_DRAM_GATE_OFFSET_DE_FE0);
+       clock_set_de_mod_clock(&ccm->fe0_clk_cfg, 300000000);
+
+       setbits_le32(&de_fe->enable, SUNXI_DE_FE_ENABLE_EN);
+
+       for (i = 0; i < 32; i++) {
+               writel(sun4i_horz_coef[2 * i], &de_fe->ch0_horzcoef0[i]);
+               writel(sun4i_horz_coef[2 * i + 1], &de_fe->ch0_horzcoef1[i]);
+               writel(sun4i_vert_coef[i], &de_fe->ch0_vertcoef[i]);
+               writel(sun4i_horz_coef[2 * i], &de_fe->ch1_horzcoef0[i]);
+               writel(sun4i_horz_coef[2 * i + 1], &de_fe->ch1_horzcoef1[i]);
+               writel(sun4i_vert_coef[i], &de_fe->ch1_vertcoef[i]);
+       }
+
+       setbits_le32(&de_fe->frame_ctrl, SUNXI_DE_FE_FRAME_CTRL_COEF_RDY);
+}
+
+static void sunxi_frontend_mode_set(const struct ctfb_res_modes *mode,
+                                   unsigned int address)
+{
+       struct sunxi_de_fe_reg * const de_fe =
+               (struct sunxi_de_fe_reg *)SUNXI_DE_FE0_BASE;
+
+       setbits_le32(&de_fe->bypass, SUNXI_DE_FE_BYPASS_CSC_BYPASS);
+       writel(CONFIG_SYS_SDRAM_BASE + address, &de_fe->ch0_addr);
+       writel(mode->xres * 4, &de_fe->ch0_stride);
+       writel(SUNXI_DE_FE_INPUT_FMT_ARGB8888, &de_fe->input_fmt);
+       writel(SUNXI_DE_FE_OUTPUT_FMT_ARGB8888, &de_fe->output_fmt);
+
+       writel(SUNXI_DE_FE_HEIGHT(mode->yres) | SUNXI_DE_FE_WIDTH(mode->xres),
+              &de_fe->ch0_insize);
+       writel(SUNXI_DE_FE_HEIGHT(mode->yres) | SUNXI_DE_FE_WIDTH(mode->xres),
+              &de_fe->ch0_outsize);
+       writel(SUNXI_DE_FE_FACTOR_INT(1), &de_fe->ch0_horzfact);
+       writel(SUNXI_DE_FE_FACTOR_INT(1), &de_fe->ch0_vertfact);
+
+       writel(SUNXI_DE_FE_HEIGHT(mode->yres) | SUNXI_DE_FE_WIDTH(mode->xres),
+              &de_fe->ch1_insize);
+       writel(SUNXI_DE_FE_HEIGHT(mode->yres) | SUNXI_DE_FE_WIDTH(mode->xres),
+              &de_fe->ch1_outsize);
+       writel(SUNXI_DE_FE_FACTOR_INT(1), &de_fe->ch1_horzfact);
+       writel(SUNXI_DE_FE_FACTOR_INT(1), &de_fe->ch1_vertfact);
+
+       setbits_le32(&de_fe->frame_ctrl, SUNXI_DE_FE_FRAME_CTRL_REG_RDY);
+}
+
+static void sunxi_frontend_enable(void)
+{
+       struct sunxi_de_fe_reg * const de_fe =
+               (struct sunxi_de_fe_reg *)SUNXI_DE_FE0_BASE;
+
+       setbits_le32(&de_fe->frame_ctrl, SUNXI_DE_FE_FRAME_CTRL_FRM_START);
+}
+#else
+static void sunxi_frontend_init(void) {}
+static void sunxi_frontend_mode_set(const struct ctfb_res_modes *mode,
+                                   unsigned int address) {}
+static void sunxi_frontend_enable(void) {}
+#endif
+
 /*
  * This is the entity that mixes and matches the different layers and inputs.
  * Allwinner calls it the back-end, but i like composer better.
                (struct sunxi_de_be_reg *)SUNXI_DE_BE0_BASE;
        int i;
 
+       sunxi_frontend_init();
+
 #if defined CONFIG_MACH_SUN6I || defined CONFIG_MACH_SUN8I
        /* Reset off */
        setbits_le32(&ccm->ahb_reset1_cfg, 1 << AHB_RESET_OFFSET_DE_BE0);
 
        /* Clocks on */
        setbits_le32(&ccm->ahb_gate1, 1 << AHB_GATE_OFFSET_DE_BE0);
+#ifndef CONFIG_MACH_SUN4I /* On sun4i the frontend does the dma */
        setbits_le32(&ccm->dram_clk_gate, 1 << CCM_DRAM_GATE_OFFSET_DE_BE0);
+#endif
        clock_set_de_mod_clock(&ccm->be0_clk_cfg, 300000000);
 
        /* Engine bug, clear registers after reset */
        struct sunxi_de_be_reg * const de_be =
                (struct sunxi_de_be_reg *)SUNXI_DE_BE0_BASE;
 
+       sunxi_frontend_mode_set(mode, address);
+
        writel(SUNXI_DE_BE_HEIGHT(mode->yres) | SUNXI_DE_BE_WIDTH(mode->xres),
               &de_be->disp_size);
        writel(SUNXI_DE_BE_HEIGHT(mode->yres) | SUNXI_DE_BE_WIDTH(mode->xres),
               &de_be->layer0_size);
+#ifndef CONFIG_MACH_SUN4I /* On sun4i the frontend does the dma */
        writel(SUNXI_DE_BE_LAYER_STRIDE(mode->xres), &de_be->layer0_stride);
        writel(address << 3, &de_be->layer0_addr_low32b);
        writel(address >> 29, &de_be->layer0_addr_high4b);
+#else
+       writel(SUNXI_DE_BE_LAYER_ATTR0_SRC_FE0, &de_be->layer0_attr0_ctrl);
+#endif
        writel(SUNXI_DE_BE_LAYER_ATTR1_FMT_XRGB8888, &de_be->layer0_attr1_ctrl);
 
        setbits_le32(&de_be->mode, SUNXI_DE_BE_MODE_LAYER0_ENABLE);
        struct sunxi_de_be_reg * const de_be =
                (struct sunxi_de_be_reg *)SUNXI_DE_BE0_BASE;
 
+       sunxi_frontend_enable();
+
        setbits_le32(&de_be->reg_ctrl, SUNXI_DE_BE_REG_CTRL_LOAD_REGS);
        setbits_le32(&de_be->mode, SUNXI_DE_BE_MODE_START);
 }
        int offset, ret;
        const char *pipeline = NULL;
 
+#ifdef CONFIG_MACH_SUN4I
+#define PIPELINE_PREFIX "de_fe0-"
+#else
+#define PIPELINE_PREFIX
+#endif
+
        switch (sunxi_display.monitor) {
        case sunxi_monitor_none:
                return 0;
        case sunxi_monitor_dvi:
        case sunxi_monitor_hdmi:
-               pipeline = "de_be0-lcd0-hdmi";
+               pipeline = PIPELINE_PREFIX "de_be0-lcd0-hdmi";
                break;
        case sunxi_monitor_lcd:
-               pipeline = "de_be0-lcd0";
+               pipeline = PIPELINE_PREFIX "de_be0-lcd0";
                break;
        case sunxi_monitor_vga:
 #ifdef CONFIG_VIDEO_VGA
-               pipeline = "de_be0-lcd0-tve0";
+               pipeline = PIPELINE_PREFIX "de_be0-lcd0-tve0";
 #elif defined CONFIG_VIDEO_VGA_VIA_LCD
-               pipeline = "de_be0-lcd0";
+               pipeline = PIPELINE_PREFIX "de_be0-lcd0";
 #endif
                break;
        }