+/*
+ * This functions needs to act like a macro to avoid pipeline reloads in the
+ * loops below. Use always_inline. This gains us about 160KiB/s and the bloat
+ * appears to be zero bytes (da830).
+ */
+__attribute__((always_inline))
+static inline u32 davinci_spi_xfer_data(struct davinci_spi_slave *ds, u32 data)
+{
+ u32 buf_reg_val;
+
+ /* send out data */
+ writel(data, &ds->regs->dat1);
+
+ /* wait for the data to clock in/out */
+ while ((buf_reg_val = readl(&ds->regs->buf)) & SPIBUF_RXEMPTY_MASK)
+ ;
+
+ return buf_reg_val;
+}
+
+static int davinci_spi_read(struct spi_slave *slave, unsigned int len,
+ u8 *rxp, unsigned long flags)
+{
+ struct davinci_spi_slave *ds = to_davinci_spi(slave);
+ unsigned int data1_reg_val;
+
+ /* enable CS hold, CS[n] and clear the data bits */
+ data1_reg_val = ((1 << SPIDAT1_CSHOLD_SHIFT) |
+ (slave->cs << SPIDAT1_CSNR_SHIFT));
+
+ /* wait till TXFULL is deasserted */
+ while (readl(&ds->regs->buf) & SPIBUF_TXFULL_MASK)
+ ;
+
+ /* preload the TX buffer to avoid clock starvation */
+ writel(data1_reg_val, &ds->regs->dat1);
+
+ /* keep reading 1 byte until only 1 byte left */
+ while ((len--) > 1)
+ *rxp++ = davinci_spi_xfer_data(ds, data1_reg_val);
+
+ /* clear CS hold when we reach the end */
+ if (flags & SPI_XFER_END)
+ data1_reg_val &= ~(1 << SPIDAT1_CSHOLD_SHIFT);
+
+ /* read the last byte */
+ *rxp = davinci_spi_xfer_data(ds, data1_reg_val);
+
+ return 0;
+}
+
+static int davinci_spi_write(struct spi_slave *slave, unsigned int len,
+ const u8 *txp, unsigned long flags)
+{
+ struct davinci_spi_slave *ds = to_davinci_spi(slave);
+ unsigned int data1_reg_val;
+
+ /* enable CS hold and clear the data bits */
+ data1_reg_val = ((1 << SPIDAT1_CSHOLD_SHIFT) |
+ (slave->cs << SPIDAT1_CSNR_SHIFT));
+
+ /* wait till TXFULL is deasserted */
+ while (readl(&ds->regs->buf) & SPIBUF_TXFULL_MASK)
+ ;
+
+ /* preload the TX buffer to avoid clock starvation */
+ if (len > 2) {
+ writel(data1_reg_val | *txp++, &ds->regs->dat1);
+ len--;
+ }
+
+ /* keep writing 1 byte until only 1 byte left */
+ while ((len--) > 1)
+ davinci_spi_xfer_data(ds, data1_reg_val | *txp++);
+
+ /* clear CS hold when we reach the end */
+ if (flags & SPI_XFER_END)
+ data1_reg_val &= ~(1 << SPIDAT1_CSHOLD_SHIFT);
+
+ /* write the last byte */
+ davinci_spi_xfer_data(ds, data1_reg_val | *txp);
+
+ return 0;
+}
+
+#ifndef CONFIG_SPI_HALF_DUPLEX
+static int davinci_spi_read_write(struct spi_slave *slave, unsigned int len,
+ u8 *rxp, const u8 *txp, unsigned long flags)