--- /dev/null
+/******************************************************************************
+*
+* Copyright (C) 2010 - 2016 Xilinx, Inc. All rights reserved.
+*
+* Permission is hereby granted, free of charge, to any person obtaining a copy
+* of this software and associated documentation files (the "Software"), to deal
+* in the Software without restriction, including without limitation the rights
+* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+* copies of the Software, and to permit persons to whom the Software is
+* furnished to do so, subject to the following conditions:
+*
+* The above copyright notice and this permission notice shall be included in
+* all copies or substantial portions of the Software.
+*
+* Use of the Software is limited solely to applications:
+* (a) running on a Xilinx device, or
+* (b) that interact with a Xilinx device through a bus or interconnect.
+*
+* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+* XILINX BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
+* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF
+* OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+* SOFTWARE.
+*
+* Except as contained in this notice, the name of the Xilinx shall not be used
+* in advertising or otherwise to promote the sale, use or other dealings in
+* this Software without prior written authorization from Xilinx.
+*
+******************************************************************************/
+/*****************************************************************************/
+/**
+* @file boot.S
+*
+* @addtogroup a9_boot_code Cortex A9 Processor Boot Code
+* @{
+* <h2> boot.S </h2>
+* The boot code performs minimum configuration which is required for an
+* application to run starting from processor's reset state. Below is a
+* sequence illustrating what all configuration is performed before control
+* reaches to main function.
+*
+* 1. Program vector table base for exception handling
+* 2. Invalidate instruction cache, data cache and TLBs
+* 3. Program stack pointer for various modes (IRQ, FIQ, supervisor, undefine,
+* abort, system)
+* 4. Configure MMU with short descriptor translation table format and program
+* base address of translation table
+* 5. Enable data cache, instruction cache and MMU
+* 6. Enable Floating point unit
+* 7. Transfer control to _start which clears BSS sections, initializes
+* global timer and runs global constructor before jumping to main
+* application
+*
+* <pre>
+* MODIFICATION HISTORY:
+*
+* Ver Who Date Changes
+* ----- ------- -------- ---------------------------------------------------
+* 1.00a ecm/sdm 10/20/09 Initial version
+* 3.06a sgd 05/15/12 Updated L2CC Auxiliary and Tag RAM Latency control
+* register settings.
+* 3.06a asa 06/17/12 Modified the TTBR settings and L2 Cache auxiliary
+* register settings.
+* 3.07a asa 07/16/12 Modified the L2 Cache controller settings to improve
+* performance. Changed the property of the ".boot"
+* section.
+* 3.07a sgd 08/21/12 Modified the L2 Cache controller and cp15 Aux Control
+* Register settings
+* 3.09a sgd 02/06/13 Updated SLCR l2c Ram Control register to a
+* value of 0x00020202. Fix for CR 697094 (SI#687034).
+* 3.10a srt 04/18/13 Implemented ARM Erratas. Please refer to file
+* 'xil_errata.h' for errata description
+* 4.2 pkp 06/19/14 Enabled asynchronous abort exception
+* 5.0 pkp 16/15/14 Modified initialization code to enable scu after
+* MMU is enabled
+* 5.1 pkp 05/13/15 Changed the initialization order so to first invalidate
+* caches and TLB, enable MMU and caches, then enable SMP
+* bit in ACTLR. L2Cache invalidation and enabling of L2Cache
+* is done later.
+* 5.4 asa 12/6/15 Added code to initialize SPSR for all relevant modes.
+* 6.0 mus 08/04/16 Added code to detect zynq-7000 base silicon configuration and
+* attempt to enable dual core behavior on single cpu zynq-7000s
+* devices is prevented from corrupting system behavior.
+* 6.0 mus 08/24/16 Check CPU core before putting cpu1 to reset for single core
+* zynq-7000s devices
+*
+* </pre>
+*
+* @note
+*
+* None.
+*
+******************************************************************************/
+
+#include "xparameters.h"
+#include "xil_errata.h"
+
+.globl MMUTable
+.global _prestart
+.global _boot
+.global __stack
+.global __irq_stack
+.global __supervisor_stack
+.global __abort_stack
+.global __fiq_stack
+.global __undef_stack
+.global _vector_table
+
+.set PSS_L2CC_BASE_ADDR, 0xF8F02000
+.set PSS_SLCR_BASE_ADDR, 0xF8000000
+
+.set RESERVED, 0x0fffff00
+.set TblBase , MMUTable
+.set LRemap, 0xFE00000F /* set the base address of the peripheral block as not shared */
+.set L2CCWay, (PSS_L2CC_BASE_ADDR + 0x077C) /*(PSS_L2CC_BASE_ADDR + PSS_L2CC_CACHE_INVLD_WAY_OFFSET)*/
+.set L2CCSync, (PSS_L2CC_BASE_ADDR + 0x0730) /*(PSS_L2CC_BASE_ADDR + PSS_L2CC_CACHE_SYNC_OFFSET)*/
+.set L2CCCrtl, (PSS_L2CC_BASE_ADDR + 0x0100) /*(PSS_L2CC_BASE_ADDR + PSS_L2CC_CNTRL_OFFSET)*/
+.set L2CCAuxCrtl, (PSS_L2CC_BASE_ADDR + 0x0104) /*(PSS_L2CC_BASE_ADDR + XPSS_L2CC_AUX_CNTRL_OFFSET)*/
+.set L2CCTAGLatReg, (PSS_L2CC_BASE_ADDR + 0x0108) /*(PSS_L2CC_BASE_ADDR + XPSS_L2CC_TAG_RAM_CNTRL_OFFSET)*/
+.set L2CCDataLatReg, (PSS_L2CC_BASE_ADDR + 0x010C) /*(PSS_L2CC_BASE_ADDR + XPSS_L2CC_DATA_RAM_CNTRL_OFFSET)*/
+.set L2CCIntClear, (PSS_L2CC_BASE_ADDR + 0x0220) /*(PSS_L2CC_BASE_ADDR + XPSS_L2CC_IAR_OFFSET)*/
+.set L2CCIntRaw, (PSS_L2CC_BASE_ADDR + 0x021C) /*(PSS_L2CC_BASE_ADDR + XPSS_L2CC_ISR_OFFSET)*/
+
+.set SLCRlockReg, (PSS_SLCR_BASE_ADDR + 0x04) /*(PSS_SLCR_BASE_ADDR + XPSS_SLCR_LOCK_OFFSET)*/
+.set SLCRUnlockReg, (PSS_SLCR_BASE_ADDR + 0x08) /*(PSS_SLCR_BASE_ADDR + XPSS_SLCR_UNLOCK_OFFSET)*/
+.set SLCRL2cRamReg, (PSS_SLCR_BASE_ADDR + 0xA1C) /*(PSS_SLCR_BASE_ADDR + XPSS_SLCR_L2C_RAM_OFFSET)*/
+.set SLCRCPURSTReg, (0xF8000000 + 0x244) /*(XPS_SYS_CTRL_BASEADDR + A9_CPU_RST_CTRL_OFFSET)*/
+.set EFUSEStaus, (0xF800D000 + 0x10) /*(XPS_EFUSE_BASEADDR + EFUSE_STATUS_OFFSET)*/
+
+/* workaround for simulation not working when L1 D and I caches,MMU and L2 cache enabled - DT568997 */
+.if SIM_MODE == 1
+.set CRValMmuCac, 0b00000000000000 /* Disable IDC, and MMU */
+.else
+.set CRValMmuCac, 0b01000000000101 /* Enable IDC, and MMU */
+.endif
+
+.set CRValHiVectorAddr, 0b10000000000000 /* Set the Vector address to high, 0xFFFF0000 */
+
+.set L2CCAuxControl, 0x72360000 /* Enable all prefetching, Cache replacement policy, Parity enable,
+ Event monitor bus enable and Way Size (64 KB) */
+.set L2CCControl, 0x01 /* Enable L2CC */
+.set L2CCTAGLatency, 0x0111 /* latency for TAG RAM */
+.set L2CCDataLatency, 0x0121 /* latency for DATA RAM */
+
+.set SLCRlockKey, 0x767B /* SLCR lock key */
+.set SLCRUnlockKey, 0xDF0D /* SLCR unlock key */
+.set SLCRL2cRamConfig, 0x00020202 /* SLCR L2C ram configuration */
+
+/* Stack Pointer locations for boot code */
+.set Undef_stack, __undef_stack
+.set FIQ_stack, __fiq_stack
+.set Abort_stack, __abort_stack
+.set SPV_stack, __supervisor_stack
+.set IRQ_stack, __irq_stack
+.set SYS_stack, __stack
+
+.set vector_base, _vector_table
+
+.set FPEXC_EN, 0x40000000 /* FPU enable bit, (1 << 30) */
+
+.section .boot,"ax"
+
+
+/* this initializes the various processor modes */
+
+_prestart:
+_boot:
+
+#if XPAR_CPU_ID==0
+ /* only allow cpu0 through */
+ mrc p15,0,r1,c0,c0,5
+ and r1, r1, #0xf
+ cmp r1, #0
+ beq CheckEFUSE
+ EndlessLoop0:
+ wfe
+ b EndlessLoop0
+
+CheckEFUSE:
+ ldr r0,=EFUSEStaus
+ ldr r1,[r0] /* Read eFuse setting */
+ ands r1,r1,#0x80 /* Check whether device is having single core */
+ beq OKToRun
+
+ /* single core device, reset cpu1 */
+ ldr r0,=SLCRUnlockReg /* Load SLCR base address base + unlock register */
+ ldr r1,=SLCRUnlockKey /* set unlock key */
+ str r1, [r0] /* Unlock SLCR */
+
+ ldr r0,=SLCRCPURSTReg
+ ldr r1,[r0] /* Read CPU Software Reset Control register */
+ orr r1,r1,#0x22
+ str r1,[r0] /* Reset CPU1 */
+
+ ldr r0,=SLCRlockReg /* Load SLCR base address base + lock register */
+ ldr r1,=SLCRlockKey /* set lock key */
+ str r1, [r0] /* lock SLCR */
+
+#elif XPAR_CPU_ID==1
+ /* only allow cpu1 through */
+ mrc p15,0,r1,c0,c0,5
+ and r1, r1, #0xf
+ cmp r1, #1
+ beq CheckEFUSE1
+ b EndlessLoop1
+
+CheckEFUSE1:
+ ldr r0,=EFUSEStaus
+ ldr r1,[r0] /* Read eFuse setting */
+ ands r1,r1,#0x80 /* Check whether device is having single core */
+ beq OKToRun
+ EndlessLoop1:
+ wfe
+ b EndlessLoop1
+#endif
+
+OKToRun:
+ mrc p15, 0, r0, c0, c0, 0 /* Get the revision */
+ and r5, r0, #0x00f00000
+ and r6, r0, #0x0000000f
+ orr r6, r6, r5, lsr #20-4
+
+#ifdef CONFIG_ARM_ERRATA_742230
+ cmp r6, #0x22 /* only present up to r2p2 */
+ mrcle p15, 0, r10, c15, c0, 1 /* read diagnostic register */
+ orrle r10, r10, #1 << 4 /* set bit #4 */
+ mcrle p15, 0, r10, c15, c0, 1 /* write diagnostic register */
+#endif
+
+#ifdef CONFIG_ARM_ERRATA_743622
+ teq r5, #0x00200000 /* only present in r2p* */
+ mrceq p15, 0, r10, c15, c0, 1 /* read diagnostic register */
+ orreq r10, r10, #1 << 6 /* set bit #6 */
+ mcreq p15, 0, r10, c15, c0, 1 /* write diagnostic register */
+#endif
+
+ /* set VBAR to the _vector_table address in linker script */
+ ldr r0, =vector_base
+ mcr p15, 0, r0, c12, c0, 0
+
+ /*invalidate scu*/
+ ldr r7, =0xf8f0000c
+ ldr r6, =0xffff
+ str r6, [r7]
+
+ /* Invalidate caches and TLBs */
+ mov r0,#0 /* r0 = 0 */
+ mcr p15, 0, r0, c8, c7, 0 /* invalidate TLBs */
+ mcr p15, 0, r0, c7, c5, 0 /* invalidate icache */
+ mcr p15, 0, r0, c7, c5, 6 /* Invalidate branch predictor array */
+ bl invalidate_dcache /* invalidate dcache */
+
+ /* Disable MMU, if enabled */
+ mrc p15, 0, r0, c1, c0, 0 /* read CP15 register 1 */
+ bic r0, r0, #0x1 /* clear bit 0 */
+ mcr p15, 0, r0, c1, c0, 0 /* write value back */
+
+#ifdef SHAREABLE_DDR
+ /* Mark the entire DDR memory as shareable */
+ ldr r3, =0x3ff /* 1024 entries to cover 1G DDR */
+ ldr r0, =TblBase /* MMU Table address in memory */
+ ldr r2, =0x15de6 /* S=b1 TEX=b101 AP=b11, Domain=b1111, C=b0, B=b1 */
+shareable_loop:
+ str r2, [r0] /* write the entry to MMU table */
+ add r0, r0, #0x4 /* next entry in the table */
+ add r2, r2, #0x100000 /* next section */
+ subs r3, r3, #1
+ bge shareable_loop /* loop till 1G is covered */
+#endif
+
+ mrs r0, cpsr /* get the current PSR */
+ mvn r1, #0x1f /* set up the irq stack pointer */
+ and r2, r1, r0
+ orr r2, r2, #0x12 /* IRQ mode */
+ msr cpsr, r2
+ ldr r13,=IRQ_stack /* IRQ stack pointer */
+ bic r2, r2, #(0x1 << 9) /* Set EE bit to little-endian */
+ msr spsr_fsxc,r2
+
+ mrs r0, cpsr /* get the current PSR */
+ mvn r1, #0x1f /* set up the supervisor stack pointer */
+ and r2, r1, r0
+ orr r2, r2, #0x13 /* supervisor mode */
+ msr cpsr, r2
+ ldr r13,=SPV_stack /* Supervisor stack pointer */
+ bic r2, r2, #(0x1 << 9) /* Set EE bit to little-endian */
+ msr spsr_fsxc,r2
+
+ mrs r0, cpsr /* get the current PSR */
+ mvn r1, #0x1f /* set up the Abort stack pointer */
+ and r2, r1, r0
+ orr r2, r2, #0x17 /* Abort mode */
+ msr cpsr, r2
+ ldr r13,=Abort_stack /* Abort stack pointer */
+ bic r2, r2, #(0x1 << 9) /* Set EE bit to little-endian */
+ msr spsr_fsxc,r2
+
+ mrs r0, cpsr /* get the current PSR */
+ mvn r1, #0x1f /* set up the FIQ stack pointer */
+ and r2, r1, r0
+ orr r2, r2, #0x11 /* FIQ mode */
+ msr cpsr, r2
+ ldr r13,=FIQ_stack /* FIQ stack pointer */
+ bic r2, r2, #(0x1 << 9) /* Set EE bit to little-endian */
+ msr spsr_fsxc,r2
+
+ mrs r0, cpsr /* get the current PSR */
+ mvn r1, #0x1f /* set up the Undefine stack pointer */
+ and r2, r1, r0
+ orr r2, r2, #0x1b /* Undefine mode */
+ msr cpsr, r2
+ ldr r13,=Undef_stack /* Undefine stack pointer */
+ bic r2, r2, #(0x1 << 9) /* Set EE bit to little-endian */
+ msr spsr_fsxc,r2
+
+ mrs r0, cpsr /* get the current PSR */
+ mvn r1, #0x1f /* set up the system stack pointer */
+ and r2, r1, r0
+ orr r2, r2, #0x1F /* SYS mode */
+ msr cpsr, r2
+ ldr r13,=SYS_stack /* SYS stack pointer */
+
+ /*set scu enable bit in scu*/
+ ldr r7, =0xf8f00000
+ ldr r0, [r7]
+ orr r0, r0, #0x1
+ str r0, [r7]
+
+ /* enable MMU and cache */
+
+ ldr r0,=TblBase /* Load MMU translation table base */
+ orr r0, r0, #0x5B /* Outer-cacheable, WB */
+ mcr 15, 0, r0, c2, c0, 0 /* TTB0 */
+
+ mvn r0,#0 /* Load MMU domains -- all ones=manager */
+ mcr p15,0,r0,c3,c0,0
+
+ /* Enable mmu, icahce and dcache */
+ ldr r0,=CRValMmuCac
+ mcr p15,0,r0,c1,c0,0 /* Enable cache and MMU */
+ dsb /* dsb allow the MMU to start up */
+ isb /* isb flush prefetch buffer */
+
+ /* Write to ACTLR */
+ mrc p15, 0, r0, c1, c0, 1 /* Read ACTLR*/
+ orr r0, r0, #(0x01 << 6) /* set SMP bit */
+ orr r0, r0, #(0x01 ) /* Cache/TLB maintenance broadcast */
+ mcr p15, 0, r0, c1, c0, 1 /* Write ACTLR*/
+
+/* Invalidate L2 Cache and enable L2 Cache*/
+/* For AMP, assume running on CPU1. Don't initialize L2 Cache (up to Linux) */
+#if USE_AMP!=1
+ ldr r0,=L2CCCrtl /* Load L2CC base address base + control register */
+ mov r1, #0 /* force the disable bit */
+ str r1, [r0] /* disable the L2 Caches */
+
+ ldr r0,=L2CCAuxCrtl /* Load L2CC base address base + Aux control register */
+ ldr r1,[r0] /* read the register */
+ ldr r2,=L2CCAuxControl /* set the default bits */
+ orr r1,r1,r2
+ str r1, [r0] /* store the Aux Control Register */
+
+ ldr r0,=L2CCTAGLatReg /* Load L2CC base address base + TAG Latency address */
+ ldr r1,=L2CCTAGLatency /* set the latencies for the TAG*/
+ str r1, [r0] /* store the TAG Latency register Register */
+
+ ldr r0,=L2CCDataLatReg /* Load L2CC base address base + Data Latency address */
+ ldr r1,=L2CCDataLatency /* set the latencies for the Data*/
+ str r1, [r0] /* store the Data Latency register Register */
+
+ ldr r0,=L2CCWay /* Load L2CC base address base + way register*/
+ ldr r2, =0xFFFF
+ str r2, [r0] /* force invalidate */
+
+ ldr r0,=L2CCSync /* need to poll 0x730, PSS_L2CC_CACHE_SYNC_OFFSET */
+ /* Load L2CC base address base + sync register*/
+ /* poll for completion */
+Sync: ldr r1, [r0]
+ cmp r1, #0
+ bne Sync
+
+ ldr r0,=L2CCIntRaw /* clear pending interrupts */
+ ldr r1,[r0]
+ ldr r0,=L2CCIntClear
+ str r1,[r0]
+
+ ldr r0,=SLCRUnlockReg /* Load SLCR base address base + unlock register */
+ ldr r1,=SLCRUnlockKey /* set unlock key */
+ str r1, [r0] /* Unlock SLCR */
+
+ ldr r0,=SLCRL2cRamReg /* Load SLCR base address base + l2c Ram Control register */
+ ldr r1,=SLCRL2cRamConfig /* set the configuration value */
+ str r1, [r0] /* store the L2c Ram Control Register */
+
+ ldr r0,=SLCRlockReg /* Load SLCR base address base + lock register */
+ ldr r1,=SLCRlockKey /* set lock key */
+ str r1, [r0] /* lock SLCR */
+
+ ldr r0,=L2CCCrtl /* Load L2CC base address base + control register */
+ ldr r1,[r0] /* read the register */
+ mov r2, #L2CCControl /* set the enable bit */
+ orr r1,r1,r2
+ str r1, [r0] /* enable the L2 Caches */
+#endif
+
+ mov r0, r0
+ mrc p15, 0, r1, c1, c0, 2 /* read cp access control register (CACR) into r1 */
+ orr r1, r1, #(0xf << 20) /* enable full access for p10 & p11 */
+ mcr p15, 0, r1, c1, c0, 2 /* write back into CACR */
+
+ /* enable vfp */
+ fmrx r1, FPEXC /* read the exception register */
+ orr r1,r1, #FPEXC_EN /* set VFP enable bit, leave the others in orig state */
+ fmxr FPEXC, r1 /* write back the exception register */
+
+ mrc p15,0,r0,c1,c0,0 /* flow prediction enable */
+ orr r0, r0, #(0x01 << 11) /* #0x8000 */
+ mcr p15,0,r0,c1,c0,0
+
+ mrc p15,0,r0,c1,c0,1 /* read Auxiliary Control Register */
+ orr r0, r0, #(0x1 << 2) /* enable Dside prefetch */
+ orr r0, r0, #(0x1 << 1) /* enable L2 Prefetch hint */
+ mcr p15,0,r0,c1,c0,1 /* write Auxiliary Control Register */
+
+ mrs r0, cpsr /* get the current PSR */
+ bic r0, r0, #0x100 /* enable asynchronous abort exception */
+ msr cpsr_xsf, r0
+
+
+ b _start /* jump to C startup code */
+ and r0, r0, r0 /* no op */
+
+.Ldone: b .Ldone /* Paranoia: we should never get here */
+
+
+/*
+ *************************************************************************
+ *
+ * invalidate_dcache - invalidate the entire d-cache by set/way
+ *
+ * Note: for Cortex-A9, there is no cp instruction for invalidating
+ * the whole D-cache. Need to invalidate each line.
+ *
+ *************************************************************************
+ */
+invalidate_dcache:
+ mrc p15, 1, r0, c0, c0, 1 /* read CLIDR */
+ ands r3, r0, #0x7000000
+ mov r3, r3, lsr #23 /* cache level value (naturally aligned) */
+ beq finished
+ mov r10, #0 /* start with level 0 */
+loop1:
+ add r2, r10, r10, lsr #1 /* work out 3xcachelevel */
+ mov r1, r0, lsr r2 /* bottom 3 bits are the Cache type for this level */
+ and r1, r1, #7 /* get those 3 bits alone */
+ cmp r1, #2
+ blt skip /* no cache or only instruction cache at this level */
+ mcr p15, 2, r10, c0, c0, 0 /* write the Cache Size selection register */
+ isb /* isb to sync the change to the CacheSizeID reg */
+ mrc p15, 1, r1, c0, c0, 0 /* reads current Cache Size ID register */
+ and r2, r1, #7 /* extract the line length field */
+ add r2, r2, #4 /* add 4 for the line length offset (log2 16 bytes) */
+ ldr r4, =0x3ff
+ ands r4, r4, r1, lsr #3 /* r4 is the max number on the way size (right aligned) */
+ clz r5, r4 /* r5 is the bit position of the way size increment */
+ ldr r7, =0x7fff
+ ands r7, r7, r1, lsr #13 /* r7 is the max number of the index size (right aligned) */
+loop2:
+ mov r9, r4 /* r9 working copy of the max way size (right aligned) */
+loop3:
+ orr r11, r10, r9, lsl r5 /* factor in the way number and cache number into r11 */
+ orr r11, r11, r7, lsl r2 /* factor in the index number */
+ mcr p15, 0, r11, c7, c6, 2 /* invalidate by set/way */
+ subs r9, r9, #1 /* decrement the way number */
+ bge loop3
+ subs r7, r7, #1 /* decrement the index */
+ bge loop2
+skip:
+ add r10, r10, #2 /* increment the cache number */
+ cmp r3, r10
+ bgt loop1
+
+finished:
+ mov r10, #0 /* swith back to cache level 0 */
+ mcr p15, 2, r10, c0, c0, 0 /* select current cache level in cssr */
+ dsb
+ isb
+
+ bx lr
+
+.end
+/**
+* @} End of "addtogroup a9_boot_code".
+*/
\ No newline at end of file