* Perhaps we should detect that ?  Nowhere do we actually
  * use dma memcpy for those types of lengths though ...
  */
-static void *dma_memcpy(void *dst, const void *src, size_t count)
+void dma_memcpy_nocache(void *dst, const void *src, size_t count)
 {
        /* Scratchpad cannot be a DMA source or destination */
        if (((unsigned long)src >= L1_SRAM_SCRATCH &&
             (unsigned long)dst < L1_SRAM_SCRATCH_END))
                hang();
 
-       if (dcache_status())
-               blackfin_dcache_flush_range(src, src + count);
-
-       bfin_write_MDMA_D0_IRQ_STATUS(DMA_DONE | DMA_ERR);
+       bfin_write_MDMA_S0_CONFIG(0);
+       bfin_write_MDMA_D0_CONFIG(0);
+       bfin_write_MDMA_D0_IRQ_STATUS(DMA_RUN | DMA_DONE | DMA_ERR);
 
        /* Copy sram functions from sdram to sram */
        /* Setup destination start address */
 
        /* Enable source DMA */
        bfin_write_MDMA_S0_CONFIG(DMAEN);
-       SSYNC();
 
        bfin_write_MDMA_D0_CONFIG(WNR | DMAEN);
+       SSYNC();
 
        while (bfin_read_MDMA_D0_IRQ_STATUS() & DMA_RUN)
-               bfin_write_MDMA_D0_IRQ_STATUS(bfin_read_MDMA_D0_IRQ_STATUS() | DMA_DONE | DMA_ERR);
-       bfin_write_MDMA_D0_IRQ_STATUS(bfin_read_MDMA_D0_IRQ_STATUS() | DMA_DONE | DMA_ERR);
+               continue;
+
+       bfin_write_MDMA_D0_IRQ_STATUS(bfin_read_MDMA_D0_IRQ_STATUS() | DMA_RUN | DMA_DONE | DMA_ERR);
+       bfin_write_MDMA_D0_CONFIG(0);
+       bfin_write_MDMA_S0_CONFIG(0);
+}
+void *dma_memcpy(void *dst, const void *src, size_t count)
+{
+       if (dcache_status())
+               blackfin_dcache_flush_range(src, src + count);
+
+       dma_memcpy_nocache(dst, src, count);
 
        if (icache_status())
                blackfin_icache_flush_range(dst, dst + count);