]> git.sur5r.net Git - u-boot/blobdiff - arch/mips/include/asm/io.h
SPDX: Convert all of our single license tags to Linux Kernel style
[u-boot] / arch / mips / include / asm / io.h
index 80eab75e153afdfff91beeea87152291edf54426..957442effd23ea1f920990b718028685ac02b2fe 100644 (file)
+/* SPDX-License-Identifier: GPL-2.0 */
 /*
- * This file is subject to the terms and conditions of the GNU General Public
- * License.  See the file "COPYING" in the main directory of this archive
- * for more details.
- *
  * Copyright (C) 1994, 1995 Waldorf GmbH
- * Copyright (C) 1994 - 2000 Ralf Baechle
+ * Copyright (C) 1994 - 2000, 06 Ralf Baechle
  * Copyright (C) 1999, 2000 Silicon Graphics, Inc.
- * Copyright (C) 2000 FSMLabs, Inc.
+ * Copyright (C) 2004, 2005  MIPS Technologies, Inc.  All rights reserved.
+ *     Author: Maciej W. Rozycki <macro@mips.com>
  */
 #ifndef _ASM_IO_H
 #define _ASM_IO_H
 
-#include <linux/config.h>
-#if 0
-#include <linux/pagemap.h>
-#endif
+#include <linux/bug.h>
+#include <linux/compiler.h>
+#include <linux/types.h>
+
 #include <asm/addrspace.h>
 #include <asm/byteorder.h>
+#include <asm/cpu-features.h>
+#include <asm/pgtable-bits.h>
+#include <asm/processor.h>
+#include <asm/string.h>
 
-/*
- * Slowdown I/O port space accesses for antique hardware.
- */
-#undef CONF_SLOWDOWN_IO
+#include <ioremap.h>
+#include <mangle-port.h>
+#include <spaces.h>
 
 /*
- * Sane hardware offers swapping of I/O space accesses in hardware; less
- * sane hardware forces software to fiddle with this ...
+ * Raw operations are never swapped in software.  OTOH values that raw
+ * operations are working on may or may not have been swapped by the bus
+ * hardware.  An example use would be for flash memory that's used for
+ * execute in place.
  */
-#if defined(CONFIG_SWAP_IO_SPACE) && defined(__MIPSEB__)
+# define __raw_ioswabb(a, x)   (x)
+# define __raw_ioswabw(a, x)   (x)
+# define __raw_ioswabl(a, x)   (x)
+# define __raw_ioswabq(a, x)   (x)
+# define ____raw_ioswabq(a, x) (x)
 
-#define __ioswab8(x) (x)
-#define __ioswab16(x) swab16(x)
-#define __ioswab32(x) swab32(x)
+/* ioswab[bwlq], __mem_ioswab[bwlq] are defined in mangle-port.h */
 
-#else
+#define IO_SPACE_LIMIT 0xffff
 
-#define __ioswab8(x) (x)
-#define __ioswab16(x) (x)
-#define __ioswab32(x) (x)
+#ifdef CONFIG_DYNAMIC_IO_PORT_BASE
 
-#endif
+static inline ulong mips_io_port_base(void)
+{
+       DECLARE_GLOBAL_DATA_PTR;
 
-/*
- * This file contains the definitions for the MIPS counterpart of the
- * x86 in/out instructions. This heap of macros and C results in much
- * better code than the approach of doing it in plain C.  The macros
- * result in code that is to fast for certain hardware.  On the other
- * side the performance of the string functions should be improved for
- * sake of certain devices like EIDE disks that do highspeed polled I/O.
- *
- *   Ralf
- *
- * This file contains the definitions for the x86 IO instructions
- * inb/inw/inl/outb/outw/outl and the "string versions" of the same
- * (insb/insw/insl/outsb/outsw/outsl). You can also use "pausing"
- * versions of the single-IO instructions (inb_p/inw_p/..).
- *
- * This file is not meant to be obfuscating: it's just complicated
- * to (a) handle it all in a way that makes gcc able to optimize it
- * as well as possible and (b) trying to avoid writing the same thing
- * over and over again with slight variations and possibly making a
- * mistake somewhere.
- */
+       return gd->arch.io_port_base;
+}
 
-/*
- * On MIPS I/O ports are memory mapped, so we access them using normal
- * load/store instructions. mips_io_port_base is the virtual address to
- * which all ports are being mapped.  For sake of efficiency some code
- * assumes that this is an address that can be loaded with a single lui
- * instruction, so the lower 16 bits must be zero.  Should be true on
- * on any sane architecture; generic code does not use this assumption.
- */
-extern const unsigned long mips_io_port_base;
+static inline void set_io_port_base(unsigned long base)
+{
+       DECLARE_GLOBAL_DATA_PTR;
+
+       gd->arch.io_port_base = base;
+       barrier();
+}
+
+#else /* !CONFIG_DYNAMIC_IO_PORT_BASE */
+
+static inline ulong mips_io_port_base(void)
+{
+       return 0;
+}
 
-/*
- * Gcc will generate code to load the value of mips_io_port_base after each
- * function call which may be fairly wasteful in some cases.  So we don't
- * play quite by the book.  We tell gcc mips_io_port_base is a long variable
- * which solves the code generation issue.  Now we need to violate the
- * aliasing rules a little to make initialization possible and finally we
- * will need the barrier() to fight side effects of the aliasing chat.
- * This trickery will eventually collapse under gcc's optimizer.  Oh well.
- */
 static inline void set_io_port_base(unsigned long base)
 {
-       * (unsigned long *) &mips_io_port_base = base;
+       BUG_ON(base);
 }
 
+#endif /* !CONFIG_DYNAMIC_IO_PORT_BASE */
+
 /*
- * Thanks to James van Artsdalen for a better timing-fix than
- * the two short jumps: using outb's to a nonexistent port seems
- * to guarantee better timings even on fast machines.
+ *     virt_to_phys    -       map virtual addresses to physical
+ *     @address: address to remap
  *
- * On the other hand, I'd like to be sure of a non-existent port:
- * I feel a bit unsafe about using 0x80 (should be safe, though)
- *
- *             Linus
+ *     The returned physical address is the physical (CPU) mapping for
+ *     the memory address given. It is only valid to use this function on
+ *     addresses directly mapped or allocated via kmalloc.
  *
+ *     This function does not give bus mappings for DMA transfers. In
+ *     almost all conceivable cases a device driver should not be using
+ *     this function
  */
+static inline unsigned long virt_to_phys(volatile const void *address)
+{
+       unsigned long addr = (unsigned long)address;
 
-#define __SLOW_DOWN_IO \
-       __asm__ __volatile__( \
-               "sb\t$0,0x80(%0)" \
-               : : "r" (mips_io_port_base));
-
-#ifdef CONF_SLOWDOWN_IO
-#ifdef REALLY_SLOW_IO
-#define SLOW_DOWN_IO { __SLOW_DOWN_IO; __SLOW_DOWN_IO; __SLOW_DOWN_IO; __SLOW_DOWN_IO; }
-#else
-#define SLOW_DOWN_IO __SLOW_DOWN_IO
-#endif
-#else
-#define SLOW_DOWN_IO
+       /* this corresponds to kernel implementation of __pa() */
+#ifdef CONFIG_64BIT
+       if (addr < CKSEG0)
+               return XPHYSADDR(addr);
 #endif
+       return CPHYSADDR(addr);
+}
+#define virt_to_phys virt_to_phys
 
 /*
- * Change virtual addresses to physical addresses and vv.
- * These are trivial on the 1:1 Linux/MIPS mapping
+ *     phys_to_virt    -       map physical address to virtual
+ *     @address: address to remap
+ *
+ *     The returned virtual address is a current CPU mapping for
+ *     the memory address given. It is only valid to use this function on
+ *     addresses that have a kernel mapping
+ *
+ *     This function does not handle bus mappings for DMA transfers. In
+ *     almost all conceivable cases a device driver should not be using
+ *     this function
  */
-extern inline phys_addr_t virt_to_phys(volatile void * address)
+static inline void *phys_to_virt(unsigned long address)
 {
-#ifndef CONFIG_64BIT
-       return CPHYSADDR(address);
-#else
-       return XPHYSADDR(address);
-#endif
-}
-
-extern inline void * phys_to_virt(unsigned long address)
-{
-#ifndef CONFIG_64BIT
-       return (void *)KSEG0ADDR(address);
-#else
-       return (void *)CKSEG0ADDR(address);
-#endif
+       return (void *)(address + PAGE_OFFSET - PHYS_OFFSET);
 }
+#define phys_to_virt phys_to_virt
 
 /*
- * IO bus memory addresses are also 1:1 with the physical address
+ * ISA I/O bus memory addresses are 1:1 with the physical address.
  */
-extern inline unsigned long virt_to_bus(volatile void * address)
+static inline unsigned long isa_virt_to_bus(volatile void *address)
 {
-#ifndef CONFIG_64BIT
-       return CPHYSADDR(address);
-#else
-       return XPHYSADDR(address);
-#endif
+       return (unsigned long)address - PAGE_OFFSET;
 }
 
-extern inline void * bus_to_virt(unsigned long address)
+static inline void *isa_bus_to_virt(unsigned long address)
 {
-#ifndef CONFIG_64BIT
-       return (void *)KSEG0ADDR(address);
-#else
-       return (void *)CKSEG0ADDR(address);
-#endif
+       return (void *)(address + PAGE_OFFSET);
 }
 
+#define isa_page_to_bus page_to_phys
+
 /*
- * isa_slot_offset is the address where E(ISA) busaddress 0 is mapped
- * for the processor.
+ * However PCI ones are not necessarily 1:1 and therefore these interfaces
+ * are forbidden in portable PCI drivers.
+ *
+ * Allow them for x86 for legacy drivers, though.
  */
-extern unsigned long isa_slot_offset;
-
-extern void * __ioremap(unsigned long offset, unsigned long size, unsigned long flags);
+#define virt_to_bus virt_to_phys
+#define bus_to_virt phys_to_virt
 
-#if 0
-extern inline void *ioremap(unsigned long offset, unsigned long size)
+static inline void __iomem *__ioremap_mode(phys_addr_t offset, unsigned long size,
+       unsigned long flags)
 {
-       return __ioremap(offset, size, _CACHE_UNCACHED);
-}
+       void __iomem *addr;
+       phys_addr_t phys_addr;
 
-extern inline void *ioremap_nocache(unsigned long offset, unsigned long size)
-{
-       return __ioremap(offset, size, _CACHE_UNCACHED);
-}
+       addr = plat_ioremap(offset, size, flags);
+       if (addr)
+               return addr;
 
-extern void iounmap(void *addr);
-#endif
+       phys_addr = fixup_bigphys_addr(offset, size);
+       return (void __iomem *)(unsigned long)CKSEG1ADDR(phys_addr);
+}
 
 /*
- * XXX We need system specific versions of these to handle EISA address bits
- * 24-31 on SNI.
- * XXX more SNI hacks.
+ * ioremap     -   map bus memory into CPU space
+ * @offset:    bus address of the memory
+ * @size:      size of the resource to map
+ *
+ * ioremap performs a platform specific sequence of operations to
+ * make bus memory CPU accessible via the readb/readw/readl/writeb/
+ * writew/writel functions and the other mmio helpers. The returned
+ * address is not guaranteed to be usable directly as a virtual
+ * address.
  */
-#define readb(addr) (*(volatile unsigned char *)(addr))
-#define readw(addr) __ioswab16((*(volatile unsigned short *)(addr)))
-#define readl(addr) __ioswab32((*(volatile unsigned int *)(addr)))
-#define __raw_readb readb
-#define __raw_readw readw
-#define __raw_readl readl
-
-#define writeb(b,addr) (*(volatile unsigned char *)(addr)) = (b)
-#define writew(b,addr) (*(volatile unsigned short *)(addr)) = (__ioswab16(b))
-#define writel(b,addr) (*(volatile unsigned int *)(addr)) = (__ioswab32(b))
-#define __raw_writeb writeb
-#define __raw_writew writew
-#define __raw_writel writel
-
-#define memset_io(a,b,c)       memset((void *)(a),(b),(c))
-#define memcpy_fromio(a,b,c)   memcpy((a),(void *)(b),(c))
-#define memcpy_toio(a,b,c)     memcpy((void *)(a),(b),(c))
-
-/* END SNI HACKS ... */
+#define ioremap(offset, size)                                          \
+       __ioremap_mode((offset), (size), _CACHE_UNCACHED)
 
 /*
- * ISA space is 'always mapped' on currently supported MIPS systems, no need
- * to explicitly ioremap() it. The fact that the ISA IO space is mapped
- * to PAGE_OFFSET is pure coincidence - it does not mean ISA values
- * are physical addresses. The following constant pointer can be
- * used as the IO-area pointer (it can be iounmapped as well, so the
- * analogy with PCI is quite large):
+ * ioremap_nocache     -   map bus memory into CPU space
+ * @offset:    bus address of the memory
+ * @size:      size of the resource to map
+ *
+ * ioremap_nocache performs a platform specific sequence of operations to
+ * make bus memory CPU accessible via the readb/readw/readl/writeb/
+ * writew/writel functions and the other mmio helpers. The returned
+ * address is not guaranteed to be usable directly as a virtual
+ * address.
+ *
+ * This version of ioremap ensures that the memory is marked uncachable
+ * on the CPU as well as honouring existing caching rules from things like
+ * the PCI bus. Note that there are other caches and buffers on many
+ * busses. In particular driver authors should read up on PCI writes
+ *
+ * It's useful if some control registers are in such an area and
+ * write combining or read caching is not desirable:
  */
-#define __ISA_IO_base ((char *)(PAGE_OFFSET))
-
-#define isa_readb(a) readb(a)
-#define isa_readw(a) readw(a)
-#define isa_readl(a) readl(a)
-#define isa_writeb(b,a) writeb(b,a)
-#define isa_writew(w,a) writew(w,a)
-#define isa_writel(l,a) writel(l,a)
+#define ioremap_nocache(offset, size)                                  \
+       __ioremap_mode((offset), (size), _CACHE_UNCACHED)
+#define ioremap_uc ioremap_nocache
 
-#define isa_memset_io(a,b,c)     memset_io((a),(b),(c))
-#define isa_memcpy_fromio(a,b,c) memcpy_fromio((a),(b),(c))
-#define isa_memcpy_toio(a,b,c)   memcpy_toio((a),(b),(c))
+/*
+ * ioremap_cachable -  map bus memory into CPU space
+ * @offset:        bus address of the memory
+ * @size:          size of the resource to map
+ *
+ * ioremap_nocache performs a platform specific sequence of operations to
+ * make bus memory CPU accessible via the readb/readw/readl/writeb/
+ * writew/writel functions and the other mmio helpers. The returned
+ * address is not guaranteed to be usable directly as a virtual
+ * address.
+ *
+ * This version of ioremap ensures that the memory is marked cachable by
+ * the CPU.  Also enables full write-combining.         Useful for some
+ * memory-like regions on I/O busses.
+ */
+#define ioremap_cachable(offset, size)                                 \
+       __ioremap_mode((offset), (size), _page_cachable_default)
 
 /*
- * We don't have csum_partial_copy_fromio() yet, so we cheat here and
- * just copy it. The net code will then do the checksum later.
+ * These two are MIPS specific ioremap variant.         ioremap_cacheable_cow
+ * requests a cachable mapping, ioremap_uncached_accelerated requests a
+ * mapping using the uncached accelerated mode which isn't supported on
+ * all processors.
  */
-#define eth_io_copy_and_sum(skb,src,len,unused) memcpy_fromio((skb)->data,(src),(len))
-#define isa_eth_io_copy_and_sum(a,b,c,d) eth_copy_and_sum((a),(b),(c),(d))
+#define ioremap_cacheable_cow(offset, size)                            \
+       __ioremap_mode((offset), (size), _CACHE_CACHABLE_COW)
+#define ioremap_uncached_accelerated(offset, size)                     \
+       __ioremap_mode((offset), (size), _CACHE_UNCACHED_ACCELERATED)
 
-static inline int check_signature(unsigned long io_addr,
-                                 const unsigned char *signature, int length)
+static inline void iounmap(const volatile void __iomem *addr)
 {
-       int retval = 0;
-       do {
-               if (readb(io_addr) != *signature)
-                       goto out;
-               io_addr++;
-               signature++;
-               length--;
-       } while (length);
-       retval = 1;
-out:
-       return retval;
+       plat_iounmap(addr);
 }
-#define isa_check_signature(io, s, l) check_signature(i,s,l)
 
-/*
- * Talk about misusing macros..
- */
+#ifdef CONFIG_CPU_CAVIUM_OCTEON
+#define war_octeon_io_reorder_wmb()            wmb()
+#else
+#define war_octeon_io_reorder_wmb()            do { } while (0)
+#endif
 
-#define __OUT1(s) \
-extern inline void __out##s(unsigned int value, unsigned int port) {
+#define __BUILD_MEMORY_SINGLE(pfx, bwlq, type, irq)                    \
+                                                                       \
+static inline void pfx##write##bwlq(type val,                          \
+                                   volatile void __iomem *mem)         \
+{                                                                      \
+       volatile type *__mem;                                           \
+       type __val;                                                     \
+                                                                       \
+       war_octeon_io_reorder_wmb();                                    \
+                                                                       \
+       __mem = (void *)__swizzle_addr_##bwlq((unsigned long)(mem));    \
+                                                                       \
+       __val = pfx##ioswab##bwlq(__mem, val);                          \
+                                                                       \
+       if (sizeof(type) != sizeof(u64) || sizeof(u64) == sizeof(long)) \
+               *__mem = __val;                                         \
+       else if (cpu_has_64bits) {                                      \
+               type __tmp;                                             \
+                                                                       \
+               __asm__ __volatile__(                                   \
+                       ".set   arch=r4000"     "\t\t# __writeq""\n\t"  \
+                       "dsll32 %L0, %L0, 0"                    "\n\t"  \
+                       "dsrl32 %L0, %L0, 0"                    "\n\t"  \
+                       "dsll32 %M0, %M0, 0"                    "\n\t"  \
+                       "or     %L0, %L0, %M0"                  "\n\t"  \
+                       "sd     %L0, %2"                        "\n\t"  \
+                       ".set   mips0"                          "\n"    \
+                       : "=r" (__tmp)                                  \
+                       : "0" (__val), "m" (*__mem));                   \
+       } else                                                          \
+               BUG();                                                  \
+}                                                                      \
+                                                                       \
+static inline type pfx##read##bwlq(const volatile void __iomem *mem)   \
+{                                                                      \
+       volatile type *__mem;                                           \
+       type __val;                                                     \
+                                                                       \
+       __mem = (void *)__swizzle_addr_##bwlq((unsigned long)(mem));    \
+                                                                       \
+       if (sizeof(type) != sizeof(u64) || sizeof(u64) == sizeof(long)) \
+               __val = *__mem;                                         \
+       else if (cpu_has_64bits) {                                      \
+               __asm__ __volatile__(                                   \
+                       ".set   arch=r4000"     "\t\t# __readq" "\n\t"  \
+                       "ld     %L0, %1"                        "\n\t"  \
+                       "dsra32 %M0, %L0, 0"                    "\n\t"  \
+                       "sll    %L0, %L0, 0"                    "\n\t"  \
+                       ".set   mips0"                          "\n"    \
+                       : "=r" (__val)                                  \
+                       : "m" (*__mem));                                \
+       } else {                                                        \
+               __val = 0;                                              \
+               BUG();                                                  \
+       }                                                               \
+                                                                       \
+       return pfx##ioswab##bwlq(__mem, __val);                         \
+}
 
-#define __OUT2(m) \
-__asm__ __volatile__ ("s" #m "\t%0,%1(%2)"
+#define __BUILD_IOPORT_SINGLE(pfx, bwlq, type, p)                      \
+                                                                       \
+static inline void pfx##out##bwlq##p(type val, unsigned long port)     \
+{                                                                      \
+       volatile type *__addr;                                          \
+       type __val;                                                     \
+                                                                       \
+       war_octeon_io_reorder_wmb();                                    \
+                                                                       \
+       __addr = (void *)__swizzle_addr_##bwlq(mips_io_port_base() + port); \
+                                                                       \
+       __val = pfx##ioswab##bwlq(__addr, val);                         \
+                                                                       \
+       /* Really, we want this to be atomic */                         \
+       BUILD_BUG_ON(sizeof(type) > sizeof(unsigned long));             \
+                                                                       \
+       *__addr = __val;                                                \
+}                                                                      \
+                                                                       \
+static inline type pfx##in##bwlq##p(unsigned long port)                        \
+{                                                                      \
+       volatile type *__addr;                                          \
+       type __val;                                                     \
+                                                                       \
+       __addr = (void *)__swizzle_addr_##bwlq(mips_io_port_base() + port); \
+                                                                       \
+       BUILD_BUG_ON(sizeof(type) > sizeof(unsigned long));             \
+                                                                       \
+       __val = *__addr;                                                \
+                                                                       \
+       return pfx##ioswab##bwlq(__addr, __val);                        \
+}
 
-#define __OUT(m,s,w) \
-__OUT1(s) __OUT2(m) : : "r" (__ioswab##w(value)), "i" (0), "r" (mips_io_port_base+port)); } \
-__OUT1(s##c) __OUT2(m) : : "r" (__ioswab##w(value)), "ir" (port), "r" (mips_io_port_base)); } \
-__OUT1(s##_p) __OUT2(m) : : "r" (__ioswab##w(value)), "i" (0), "r" (mips_io_port_base+port)); \
-       SLOW_DOWN_IO; } \
-__OUT1(s##c_p) __OUT2(m) : : "r" (__ioswab##w(value)), "ir" (port), "r" (mips_io_port_base)); \
-       SLOW_DOWN_IO; }
+#define __BUILD_MEMORY_PFX(bus, bwlq, type)                            \
+                                                                       \
+__BUILD_MEMORY_SINGLE(bus, bwlq, type, 1)
+
+#define BUILDIO_MEM(bwlq, type)                                                \
+                                                                       \
+__BUILD_MEMORY_PFX(__raw_, bwlq, type)                                 \
+__BUILD_MEMORY_PFX(, bwlq, type)                                       \
+__BUILD_MEMORY_PFX(__mem_, bwlq, type)                                 \
+
+BUILDIO_MEM(b, u8)
+BUILDIO_MEM(w, u16)
+BUILDIO_MEM(l, u32)
+BUILDIO_MEM(q, u64)
+
+#define __BUILD_IOPORT_PFX(bus, bwlq, type)                            \
+       __BUILD_IOPORT_SINGLE(bus, bwlq, type, )                        \
+       __BUILD_IOPORT_SINGLE(bus, bwlq, type, _p)
+
+#define BUILDIO_IOPORT(bwlq, type)                                     \
+       __BUILD_IOPORT_PFX(, bwlq, type)                                \
+       __BUILD_IOPORT_PFX(__mem_, bwlq, type)
+
+BUILDIO_IOPORT(b, u8)
+BUILDIO_IOPORT(w, u16)
+BUILDIO_IOPORT(l, u32)
+#ifdef CONFIG_64BIT
+BUILDIO_IOPORT(q, u64)
+#endif
 
-#define __IN1(t,s) \
-extern __inline__ t __in##s(unsigned int port) { t _v;
+#define __BUILDIO(bwlq, type)                                          \
+                                                                       \
+__BUILD_MEMORY_SINGLE(____raw_, bwlq, type, 0)
+
+__BUILDIO(q, u64)
+
+#define readb_relaxed                  readb
+#define readw_relaxed                  readw
+#define readl_relaxed                  readl
+#define readq_relaxed                  readq
+
+#define writeb_relaxed                 writeb
+#define writew_relaxed                 writew
+#define writel_relaxed                 writel
+#define writeq_relaxed                 writeq
+
+#define readb_be(addr)                                                 \
+       __raw_readb((__force unsigned *)(addr))
+#define readw_be(addr)                                                 \
+       be16_to_cpu(__raw_readw((__force unsigned *)(addr)))
+#define readl_be(addr)                                                 \
+       be32_to_cpu(__raw_readl((__force unsigned *)(addr)))
+#define readq_be(addr)                                                 \
+       be64_to_cpu(__raw_readq((__force unsigned *)(addr)))
+
+#define writeb_be(val, addr)                                           \
+       __raw_writeb((val), (__force unsigned *)(addr))
+#define writew_be(val, addr)                                           \
+       __raw_writew(cpu_to_be16((val)), (__force unsigned *)(addr))
+#define writel_be(val, addr)                                           \
+       __raw_writel(cpu_to_be32((val)), (__force unsigned *)(addr))
+#define writeq_be(val, addr)                                           \
+       __raw_writeq(cpu_to_be64((val)), (__force unsigned *)(addr))
 
 /*
- * Required nops will be inserted by the assembler
+ * Some code tests for these symbols
  */
-#define __IN2(m) \
-__asm__ __volatile__ ("l" #m "\t%0,%1(%2)"
-
-#define __IN(t,m,s,w) \
-__IN1(t,s) __IN2(m) : "=r" (_v) : "i" (0), "r" (mips_io_port_base+port)); return __ioswab##w(_v); } \
-__IN1(t,s##c) __IN2(m) : "=r" (_v) : "ir" (port), "r" (mips_io_port_base)); return __ioswab##w(_v); } \
-__IN1(t,s##_p) __IN2(m) : "=r" (_v) : "i" (0), "r" (mips_io_port_base+port)); SLOW_DOWN_IO; return __ioswab##w(_v); } \
-__IN1(t,s##c_p) __IN2(m) : "=r" (_v) : "ir" (port), "r" (mips_io_port_base)); SLOW_DOWN_IO; return __ioswab##w(_v); }
-
-#define __INS1(s) \
-extern inline void __ins##s(unsigned int port, void * addr, unsigned long count) {
-
-#define __INS2(m) \
-if (count) \
-__asm__ __volatile__ ( \
-       ".set\tnoreorder\n\t" \
-       ".set\tnoat\n" \
-       "1:\tl" #m "\t$1,%4(%5)\n\t" \
-       "subu\t%1,1\n\t" \
-       "s" #m "\t$1,(%0)\n\t" \
-       "bne\t$0,%1,1b\n\t" \
-       "addiu\t%0,%6\n\t" \
-       ".set\tat\n\t" \
-       ".set\treorder"
-
-#define __INS(m,s,i) \
-__INS1(s) __INS2(m) \
-       : "=r" (addr), "=r" (count) \
-       : "0" (addr), "1" (count), "i" (0), \
-         "r" (mips_io_port_base+port), "I" (i) \
-       : "$1");} \
-__INS1(s##c) __INS2(m) \
-       : "=r" (addr), "=r" (count) \
-       : "0" (addr), "1" (count), "ir" (port), \
-         "r" (mips_io_port_base), "I" (i) \
-       : "$1");}
-
-#define __OUTS1(s) \
-extern inline void __outs##s(unsigned int port, const void * addr, unsigned long count) {
-
-#define __OUTS2(m) \
-if (count) \
-__asm__ __volatile__ ( \
-       ".set\tnoreorder\n\t" \
-       ".set\tnoat\n" \
-       "1:\tl" #m "\t$1,(%0)\n\t" \
-       "subu\t%1,1\n\t" \
-       "s" #m "\t$1,%4(%5)\n\t" \
-       "bne\t$0,%1,1b\n\t" \
-       "addiu\t%0,%6\n\t" \
-       ".set\tat\n\t" \
-       ".set\treorder"
-
-#define __OUTS(m,s,i) \
-__OUTS1(s) __OUTS2(m) \
-       : "=r" (addr), "=r" (count) \
-       : "0" (addr), "1" (count), "i" (0), "r" (mips_io_port_base+port), "I" (i) \
-       : "$1");} \
-__OUTS1(s##c) __OUTS2(m) \
-       : "=r" (addr), "=r" (count) \
-       : "0" (addr), "1" (count), "ir" (port), "r" (mips_io_port_base), "I" (i) \
-       : "$1");}
-
-__IN(unsigned char,b,b,8)
-__IN(unsigned short,h,w,16)
-__IN(unsigned int,w,l,32)
-
-__OUT(b,b,8)
-__OUT(h,w,16)
-__OUT(w,l,32)
-
-__INS(b,b,1)
-__INS(h,w,2)
-__INS(w,l,4)
-
-__OUTS(b,b,1)
-__OUTS(h,w,2)
-__OUTS(w,l,4)
+#define readq                          readq
+#define writeq                         writeq
+
+#define __BUILD_MEMORY_STRING(bwlq, type)                              \
+                                                                       \
+static inline void writes##bwlq(volatile void __iomem *mem,            \
+                               const void *addr, unsigned int count)   \
+{                                                                      \
+       const volatile type *__addr = addr;                             \
+                                                                       \
+       while (count--) {                                               \
+               __mem_write##bwlq(*__addr, mem);                        \
+               __addr++;                                               \
+       }                                                               \
+}                                                                      \
+                                                                       \
+static inline void reads##bwlq(volatile void __iomem *mem, void *addr, \
+                              unsigned int count)                      \
+{                                                                      \
+       volatile type *__addr = addr;                                   \
+                                                                       \
+       while (count--) {                                               \
+               *__addr = __mem_read##bwlq(mem);                        \
+               __addr++;                                               \
+       }                                                               \
+}
 
+#define __BUILD_IOPORT_STRING(bwlq, type)                              \
+                                                                       \
+static inline void outs##bwlq(unsigned long port, const void *addr,    \
+                             unsigned int count)                       \
+{                                                                      \
+       const volatile type *__addr = addr;                             \
+                                                                       \
+       while (count--) {                                               \
+               __mem_out##bwlq(*__addr, port);                         \
+               __addr++;                                               \
+       }                                                               \
+}                                                                      \
+                                                                       \
+static inline void ins##bwlq(unsigned long port, void *addr,           \
+                            unsigned int count)                        \
+{                                                                      \
+       volatile type *__addr = addr;                                   \
+                                                                       \
+       while (count--) {                                               \
+               *__addr = __mem_in##bwlq(port);                         \
+               __addr++;                                               \
+       }                                                               \
+}
 
-/*
- * Note that due to the way __builtin_constant_p() works, you
- *  - can't use it inside an inline function (it will never be true)
- *  - you don't have to worry about side effects within the __builtin..
- */
-#define outb(val,port) \
-((__builtin_constant_p((port)) && (port) < 32768) ? \
-       __outbc((val),(port)) : \
-       __outb((val),(port)))
-
-#define inb(port) \
-((__builtin_constant_p((port)) && (port) < 32768) ? \
-       __inbc(port) : \
-       __inb(port))
-
-#define outb_p(val,port) \
-((__builtin_constant_p((port)) && (port) < 32768) ? \
-       __outbc_p((val),(port)) : \
-       __outb_p((val),(port)))
-
-#define inb_p(port) \
-((__builtin_constant_p((port)) && (port) < 32768) ? \
-       __inbc_p(port) : \
-       __inb_p(port))
-
-#define outw(val,port) \
-((__builtin_constant_p((port)) && (port) < 32768) ? \
-       __outwc((val),(port)) : \
-       __outw((val),(port)))
-
-#define inw(port) \
-((__builtin_constant_p((port)) && (port) < 32768) ? \
-       __inwc(port) : \
-       __inw(port))
-
-#define outw_p(val,port) \
-((__builtin_constant_p((port)) && (port) < 32768) ? \
-       __outwc_p((val),(port)) : \
-       __outw_p((val),(port)))
-
-#define inw_p(port) \
-((__builtin_constant_p((port)) && (port) < 32768) ? \
-       __inwc_p(port) : \
-       __inw_p(port))
-
-#define outl(val,port) \
-((__builtin_constant_p((port)) && (port) < 32768) ? \
-       __outlc((val),(port)) : \
-       __outl((val),(port)))
-
-#define inl(port) \
-((__builtin_constant_p((port)) && (port) < 32768) ? \
-       __inlc(port) : \
-       __inl(port))
-
-#define outl_p(val,port) \
-((__builtin_constant_p((port)) && (port) < 32768) ? \
-       __outlc_p((val),(port)) : \
-       __outl_p((val),(port)))
-
-#define inl_p(port) \
-((__builtin_constant_p((port)) && (port) < 32768) ? \
-       __inlc_p(port) : \
-       __inl_p(port))
-
-
-#define outsb(port,addr,count) \
-((__builtin_constant_p((port)) && (port) < 32768) ? \
-       __outsbc((port),(addr),(count)) : \
-       __outsb ((port),(addr),(count)))
-
-#define insb(port,addr,count) \
-((__builtin_constant_p((port)) && (port) < 32768) ? \
-       __insbc((port),(addr),(count)) : \
-       __insb((port),(addr),(count)))
-
-#define outsw(port,addr,count) \
-((__builtin_constant_p((port)) && (port) < 32768) ? \
-       __outswc((port),(addr),(count)) : \
-       __outsw ((port),(addr),(count)))
-
-#define insw(port,addr,count) \
-((__builtin_constant_p((port)) && (port) < 32768) ? \
-       __inswc((port),(addr),(count)) : \
-       __insw((port),(addr),(count)))
-
-#define outsl(port,addr,count) \
-((__builtin_constant_p((port)) && (port) < 32768) ? \
-       __outslc((port),(addr),(count)) : \
-       __outsl ((port),(addr),(count)))
-
-#define insl(port,addr,count) \
-((__builtin_constant_p((port)) && (port) < 32768) ? \
-       __inslc((port),(addr),(count)) : \
-       __insl((port),(addr),(count)))
+#define BUILDSTRING(bwlq, type)                                                \
+                                                                       \
+__BUILD_MEMORY_STRING(bwlq, type)                                      \
+__BUILD_IOPORT_STRING(bwlq, type)
 
-#define IO_SPACE_LIMIT 0xffff
+BUILDSTRING(b, u8)
+BUILDSTRING(w, u16)
+BUILDSTRING(l, u32)
+#ifdef CONFIG_64BIT
+BUILDSTRING(q, u64)
+#endif
 
-/*
- * The caches on some architectures aren't dma-coherent and have need to
- * handle this in software.  There are three types of operations that
- * can be applied to dma buffers.
- *
- *  - dma_cache_wback_inv(start, size) makes caches and coherent by
- *    writing the content of the caches back to memory, if necessary.
- *    The function also invalidates the affected part of the caches as
- *    necessary before DMA transfers from outside to memory.
- *  - dma_cache_wback(start, size) makes caches and coherent by
- *    writing the content of the caches back to memory, if necessary.
- *    The function also invalidates the affected part of the caches as
- *    necessary before DMA transfers from outside to memory.
- *  - dma_cache_inv(start, size) invalidates the affected parts of the
- *    caches.  Dirty lines of the caches may be written back or simply
- *    be discarded.  This operation is necessary before dma operations
- *    to the memory.
- */
-extern void (*_dma_cache_wback_inv)(unsigned long start, unsigned long size);
-extern void (*_dma_cache_wback)(unsigned long start, unsigned long size);
-extern void (*_dma_cache_inv)(unsigned long start, unsigned long size);
 
-#define dma_cache_wback_inv(start,size)        _dma_cache_wback_inv(start,size)
-#define dma_cache_wback(start,size)    _dma_cache_wback(start,size)
-#define dma_cache_inv(start,size)      _dma_cache_inv(start,size)
+#ifdef CONFIG_CPU_CAVIUM_OCTEON
+#define mmiowb() wmb()
+#else
+/* Depends on MIPS II instruction set */
+#define mmiowb() asm volatile ("sync" ::: "memory")
+#endif
 
-static inline void sync(void)
+static inline void memset_io(volatile void __iomem *addr, unsigned char val, int count)
 {
+       memset((void __force *)addr, val, count);
+}
+static inline void memcpy_fromio(void *dst, const volatile void __iomem *src, int count)
+{
+       memcpy(dst, (void __force *)src, count);
+}
+static inline void memcpy_toio(volatile void __iomem *dst, const void *src, int count)
+{
+       memcpy((void __force *)dst, src, count);
 }
 
 /*
- * Given a physical address and a length, return a virtual address
- * that can be used to access the memory range with the caching
- * properties specified by "flags".
+ * Read a 32-bit register that requires a 64-bit read cycle on the bus.
+ * Avoid interrupt mucking, just adjust the address for 4-byte access.
+ * Assume the addresses are 8-byte aligned.
  */
-#define MAP_NOCACHE    (0)
-#define MAP_WRCOMBINE  (0)
-#define MAP_WRBACK     (0)
-#define MAP_WRTHROUGH  (0)
+#ifdef __MIPSEB__
+#define __CSR_32_ADJUST 4
+#else
+#define __CSR_32_ADJUST 0
+#endif
+
+#define csr_out32(v, a) (*(volatile u32 *)((unsigned long)(a) + __CSR_32_ADJUST) = (v))
+#define csr_in32(a)    (*(volatile u32 *)((unsigned long)(a) + __CSR_32_ADJUST))
+
+/*
+ * U-Boot specific
+ */
+#define sync()         mmiowb()
+
+#define MAP_NOCACHE    1
 
 static inline void *
 map_physmem(phys_addr_t paddr, unsigned long len, unsigned long flags)
 {
-       return (void *)paddr;
+       if (flags == MAP_NOCACHE)
+               return ioremap(paddr, len);
+
+       return (void *)CKSEG0ADDR(paddr);
+}
+#define map_physmem map_physmem
+
+#define __BUILD_CLRBITS(bwlq, sfx, end, type)                          \
+                                                                       \
+static inline void clrbits_##sfx(volatile void __iomem *mem, type clr) \
+{                                                                      \
+       type __val = __raw_read##bwlq(mem);                             \
+       __val = end##_to_cpu(__val);                                    \
+       __val &= ~clr;                                                  \
+       __val = cpu_to_##end(__val);                                    \
+       __raw_write##bwlq(__val, mem);                                  \
 }
 
-/*
- * Take down a mapping set up by map_physmem().
- */
-static inline void unmap_physmem(void *vaddr, unsigned long flags)
-{
+#define __BUILD_SETBITS(bwlq, sfx, end, type)                          \
+                                                                       \
+static inline void setbits_##sfx(volatile void __iomem *mem, type set) \
+{                                                                      \
+       type __val = __raw_read##bwlq(mem);                             \
+       __val = end##_to_cpu(__val);                                    \
+       __val |= set;                                                   \
+       __val = cpu_to_##end(__val);                                    \
+       __raw_write##bwlq(__val, mem);                                  \
+}
 
+#define __BUILD_CLRSETBITS(bwlq, sfx, end, type)                       \
+                                                                       \
+static inline void clrsetbits_##sfx(volatile void __iomem *mem,                \
+                                       type clr, type set)             \
+{                                                                      \
+       type __val = __raw_read##bwlq(mem);                             \
+       __val = end##_to_cpu(__val);                                    \
+       __val &= ~clr;                                                  \
+       __val |= set;                                                   \
+       __val = cpu_to_##end(__val);                                    \
+       __raw_write##bwlq(__val, mem);                                  \
 }
 
+#define BUILD_CLRSETBITS(bwlq, sfx, end, type)                         \
+                                                                       \
+__BUILD_CLRBITS(bwlq, sfx, end, type)                                  \
+__BUILD_SETBITS(bwlq, sfx, end, type)                                  \
+__BUILD_CLRSETBITS(bwlq, sfx, end, type)
+
+#define __to_cpu(v)            (v)
+#define cpu_to__(v)            (v)
+
+BUILD_CLRSETBITS(b, 8, _, u8)
+BUILD_CLRSETBITS(w, le16, le16, u16)
+BUILD_CLRSETBITS(w, be16, be16, u16)
+BUILD_CLRSETBITS(w, 16, _, u16)
+BUILD_CLRSETBITS(l, le32, le32, u32)
+BUILD_CLRSETBITS(l, be32, be32, u32)
+BUILD_CLRSETBITS(l, 32, _, u32)
+BUILD_CLRSETBITS(q, le64, le64, u64)
+BUILD_CLRSETBITS(q, be64, be64, u64)
+BUILD_CLRSETBITS(q, 64, _, u64)
+
+#include <asm-generic/io.h>
+
 #endif /* _ASM_IO_H */