]> git.sur5r.net Git - openocd/blobdiff - src/target/cortex_a.c
target/cortex_a: fix temporary breakpoint during step
[openocd] / src / target / cortex_a.c
old mode 100755 (executable)
new mode 100644 (file)
index 8a1ca3b..e209c80
  *   Copyright (C) ST-Ericsson SA 2011                                     *
  *   michel.jaouen@stericsson.com : smp minimum support                    *
  *                                                                         *
+ *   Copyright (C) Broadcom 2012                                           *
+ *   ehunter@broadcom.com : Cortex-R4 support                              *
+ *                                                                         *
+ *   Copyright (C) 2013 Kamal Dasu                                         *
+ *   kdasu.kdev@gmail.com                                                  *
+ *                                                                         *
  *   This program is free software; you can redistribute it and/or modify  *
  *   it under the terms of the GNU General Public License as published by  *
  *   the Free Software Foundation; either version 2 of the License, or     *
  *   GNU General Public License for more details.                          *
  *                                                                         *
  *   You should have received a copy of the GNU General Public License     *
- *   along with this program; if not, write to the                         *
- *   Free Software Foundation, Inc.,                                       *
- *   59 Temple Place - Suite 330, Boston, MA  02111-1307, USA.             *
+ *   along with this program.  If not, see <http://www.gnu.org/licenses/>. *
  *                                                                         *
  *   Cortex-A8(tm) TRM, ARM DDI 0344H                                      *
  *   Cortex-A9(tm) TRM, ARM DDI 0407F                                      *
+ *   Cortex-A4(tm) TRM, ARM DDI 0363E                                      *
+ *   Cortex-A15(tm)TRM, ARM DDI 0438C                                      *
  *                                                                         *
  ***************************************************************************/
+
 #ifdef HAVE_CONFIG_H
 #include "config.h"
 #endif
 #include "target_request.h"
 #include "target_type.h"
 #include "arm_opcodes.h"
+#include "arm_semihosting.h"
+#include "transport/transport.h"
 #include <helper/time_support.h>
 
-static int cortex_a8_poll(struct target *target);
-static int cortex_a8_debug_entry(struct target *target);
-static int cortex_a8_restore_context(struct target *target, bool bpwp);
-static int cortex_a8_set_breakpoint(struct target *target,
-               struct breakpoint *breakpoint, uint8_t matchmode);
-static int cortex_a8_set_context_breakpoint(struct target *target,
-               struct breakpoint *breakpoint, uint8_t matchmode);
-static int cortex_a8_set_hybrid_breakpoint(struct target *target,
-               struct breakpoint *breakpoint);
-static int cortex_a8_unset_breakpoint(struct target *target,
-               struct breakpoint *breakpoint);
-static int cortex_a8_dap_read_coreregister_u32(struct target *target,
-               uint32_t *value, int regnum);
-static int cortex_a8_dap_write_coreregister_u32(struct target *target,
-               uint32_t value, int regnum);
-static int cortex_a8_mmu(struct target *target, int *enabled);
-static int cortex_a8_virt2phys(struct target *target,
-                uint32_t virt, uint32_t *phys);
+#define foreach_smp_target(pos, head) \
+       for (pos = head; (pos != NULL); pos = pos->next)
+
+static int cortex_a_poll(struct target *target);
+static int cortex_a_debug_entry(struct target *target);
+static int cortex_a_restore_context(struct target *target, bool bpwp);
+static int cortex_a_set_breakpoint(struct target *target,
+       struct breakpoint *breakpoint, uint8_t matchmode);
+static int cortex_a_set_context_breakpoint(struct target *target,
+       struct breakpoint *breakpoint, uint8_t matchmode);
+static int cortex_a_set_hybrid_breakpoint(struct target *target,
+       struct breakpoint *breakpoint);
+static int cortex_a_unset_breakpoint(struct target *target,
+       struct breakpoint *breakpoint);
+static int cortex_a_dap_read_coreregister_u32(struct target *target,
+       uint32_t *value, int regnum);
+static int cortex_a_dap_write_coreregister_u32(struct target *target,
+       uint32_t value, int regnum);
+static int cortex_a_mmu(struct target *target, int *enabled);
+static int cortex_a_mmu_modify(struct target *target, int enable);
+static int cortex_a_virt2phys(struct target *target,
+       target_addr_t virt, target_addr_t *phys);
+static int cortex_a_read_cpu_memory(struct target *target,
+       uint32_t address, uint32_t size, uint32_t count, uint8_t *buffer);
 
-/*
- * FIXME do topology discovery using the ROM; don't
- * assume this is an OMAP3.   Also, allow for multiple ARMv7-A
- * cores, with different AP numbering ... don't use a #define
- * for these numbers, use per-core armv7a state.
- */
-#define swjdp_memoryap 0
-#define swjdp_debugap 1
 
 /*  restore cp15_control_reg at resume */
-static int cortex_a8_restore_cp15_control_reg(struct target* target)
+static int cortex_a_restore_cp15_control_reg(struct target *target)
 {
        int retval = ERROR_OK;
-       struct cortex_a8_common *cortex_a8 = target_to_cortex_a8(target);
-    struct armv7a_common *armv7a = target_to_armv7a(target);
+       struct cortex_a_common *cortex_a = target_to_cortex_a(target);
+       struct armv7a_common *armv7a = target_to_armv7a(target);
 
-       if (cortex_a8->cp15_control_reg !=cortex_a8->cp15_control_reg_curr)
-       {
-               cortex_a8->cp15_control_reg_curr = cortex_a8->cp15_control_reg;
-               //LOG_INFO("cp15_control_reg: %8.8" PRIx32, cortex_a8->cp15_control_reg);
-               retval = armv7a->armv4_5_common.mcr(target, 15,
-                               0, 0,   /* op1, op2 */
-                               1, 0,   /* CRn, CRm */
-                               cortex_a8->cp15_control_reg);
+       if (cortex_a->cp15_control_reg != cortex_a->cp15_control_reg_curr) {
+               cortex_a->cp15_control_reg_curr = cortex_a->cp15_control_reg;
+               /* LOG_INFO("cp15_control_reg: %8.8" PRIx32, cortex_a->cp15_control_reg); */
+               retval = armv7a->arm.mcr(target, 15,
+                               0, 0,   /* op1, op2 */
+                               1, 0,   /* CRn, CRm */
+                               cortex_a->cp15_control_reg);
        }
        return retval;
 }
 
-/*  check address before cortex_a8_apb read write access with mmu on
- *  remove apb predictible data abort */
-static int cortex_a8_check_address(struct target *target, uint32_t address)
+/*
+ * Set up ARM core for memory access.
+ * If !phys_access, switch to SVC mode and make sure MMU is on
+ * If phys_access, switch off mmu
+ */
+static int cortex_a_prep_memaccess(struct target *target, int phys_access)
 {
        struct armv7a_common *armv7a = target_to_armv7a(target);
-       struct cortex_a8_common *cortex_a8 = target_to_cortex_a8(target);
-       uint32_t os_border = armv7a->armv7a_mmu.os_border;
-       if ((address < os_border) &&
-                       (armv7a->armv4_5_common.core_mode == ARM_MODE_SVC)){
-               LOG_ERROR("%x access in userspace and target in supervisor",address);
-               return ERROR_FAIL;
-       }
-       if ((address >= os_border) &&
-                       ( cortex_a8->curr_mode != ARM_MODE_SVC)){
-        dpm_modeswitch(&armv7a->dpm, ARM_MODE_SVC);
-        cortex_a8->curr_mode = ARM_MODE_SVC;
-               LOG_INFO("%x access in kernel space and target not in supervisor",
-                               address);
-               return ERROR_OK;
+       struct cortex_a_common *cortex_a = target_to_cortex_a(target);
+       int mmu_enabled = 0;
+
+       if (phys_access == 0) {
+               dpm_modeswitch(&armv7a->dpm, ARM_MODE_SVC);
+               cortex_a_mmu(target, &mmu_enabled);
+               if (mmu_enabled)
+                       cortex_a_mmu_modify(target, 1);
+               if (cortex_a->dacrfixup_mode == CORTEX_A_DACRFIXUP_ON) {
+                       /* overwrite DACR to all-manager */
+                       armv7a->arm.mcr(target, 15,
+                                       0, 0, 3, 0,
+                                       0xFFFFFFFF);
+               }
+       } else {
+               cortex_a_mmu(target, &mmu_enabled);
+               if (mmu_enabled)
+                       cortex_a_mmu_modify(target, 0);
        }
-       if ((address < os_border) &&
-                       (cortex_a8->curr_mode == ARM_MODE_SVC)){
-        dpm_modeswitch(&armv7a->dpm, ARM_MODE_ANY);
-     cortex_a8->curr_mode = ARM_MODE_ANY;
+       return ERROR_OK;
+}
+
+/*
+ * Restore ARM core after memory access.
+ * If !phys_access, switch to previous mode
+ * If phys_access, restore MMU setting
+ */
+static int cortex_a_post_memaccess(struct target *target, int phys_access)
+{
+       struct armv7a_common *armv7a = target_to_armv7a(target);
+       struct cortex_a_common *cortex_a = target_to_cortex_a(target);
+
+       if (phys_access == 0) {
+               if (cortex_a->dacrfixup_mode == CORTEX_A_DACRFIXUP_ON) {
+                       /* restore */
+                       armv7a->arm.mcr(target, 15,
+                                       0, 0, 3, 0,
+                                       cortex_a->cp15_dacr_reg);
+               }
+               dpm_modeswitch(&armv7a->dpm, ARM_MODE_ANY);
+       } else {
+               int mmu_enabled = 0;
+               cortex_a_mmu(target, &mmu_enabled);
+               if (mmu_enabled)
+                       cortex_a_mmu_modify(target, 1);
        }
        return ERROR_OK;
 }
+
+
 /*  modify cp15_control_reg in order to enable or disable mmu for :
  *  - virt2phys address conversion
  *  - read or write memory in phys or virt address */
-static int cortex_a8_mmu_modify(struct target *target, int enable)
+static int cortex_a_mmu_modify(struct target *target, int enable)
 {
-       struct cortex_a8_common *cortex_a8 = target_to_cortex_a8(target);
+       struct cortex_a_common *cortex_a = target_to_cortex_a(target);
        struct armv7a_common *armv7a = target_to_armv7a(target);
        int retval = ERROR_OK;
-       if (enable)
-       {
+       int need_write = 0;
+
+       if (enable) {
                /*  if mmu enabled at target stop and mmu not enable */
-               if (!(cortex_a8->cp15_control_reg & 0x1U))
-               {
+               if (!(cortex_a->cp15_control_reg & 0x1U)) {
                        LOG_ERROR("trying to enable mmu on target stopped with mmu disable");
                        return ERROR_FAIL;
                }
-               if (!(cortex_a8->cp15_control_reg_curr & 0x1U))
-               {
-                       cortex_a8->cp15_control_reg_curr |= 0x1U;
-                       retval = armv7a->armv4_5_common.mcr(target, 15,
-                                       0, 0,   /* op1, op2 */
-                                       1, 0,   /* CRn, CRm */
-                                       cortex_a8->cp15_control_reg_curr);
-               }
-       }
-       else
-       {
-               if (cortex_a8->cp15_control_reg_curr & 0x4U)
-               {
-                   /*  data cache is active */
-                       cortex_a8->cp15_control_reg_curr &= ~0x4U;
-                       /* flush data cache armv7 function to be called */
-                       if (armv7a->armv7a_mmu.armv7a_cache.flush_all_data_cache)
-                       armv7a->armv7a_mmu.armv7a_cache.flush_all_data_cache(target);
+               if ((cortex_a->cp15_control_reg_curr & 0x1U) == 0) {
+                       cortex_a->cp15_control_reg_curr |= 0x1U;
+                       need_write = 1;
                }
-               if ( (cortex_a8->cp15_control_reg_curr & 0x1U))
-               {
-                       cortex_a8->cp15_control_reg_curr &= ~0x1U;
-                       retval = armv7a->armv4_5_common.mcr(target, 15,
-                                       0, 0,   /* op1, op2 */
-                                       1, 0,   /* CRn, CRm */
-                                       cortex_a8->cp15_control_reg_curr);
+       } else {
+               if ((cortex_a->cp15_control_reg_curr & 0x1U) == 0x1U) {
+                       cortex_a->cp15_control_reg_curr &= ~0x1U;
+                       need_write = 1;
                }
        }
+
+       if (need_write) {
+               LOG_DEBUG("%s, writing cp15 ctrl: %" PRIx32,
+                       enable ? "enable mmu" : "disable mmu",
+                       cortex_a->cp15_control_reg_curr);
+
+               retval = armv7a->arm.mcr(target, 15,
+                               0, 0,   /* op1, op2 */
+                               1, 0,   /* CRn, CRm */
+                               cortex_a->cp15_control_reg_curr);
+       }
        return retval;
 }
 
 /*
- * Cortex-A8 Basic debug access, very low level assumes state is saved
+ * Cortex-A Basic debug access, very low level assumes state is saved
  */
-static int cortex_a8_init_debug_access(struct target *target)
+static int cortex_a_init_debug_access(struct target *target)
 {
        struct armv7a_common *armv7a = target_to_armv7a(target);
-       struct adiv5_dap *swjdp = armv7a->armv4_5_common.dap;
        int retval;
-       uint32_t dummy;
 
-       LOG_DEBUG(" ");
+       /* lock memory-mapped access to debug registers to prevent
+        * software interference */
+       retval = mem_ap_write_u32(armv7a->debug_ap,
+                       armv7a->debug_base + CPUDBG_LOCKACCESS, 0);
+       if (retval != ERROR_OK)
+               return retval;
 
-       /* Unlocking the debug registers for modification */
-       /* The debugport might be uninitialised so try twice */
-       retval = mem_ap_sel_write_atomic_u32(swjdp, swjdp_debugap,
-                       armv7a->debug_base + CPUDBG_LOCKACCESS, 0xC5ACCE55);
+       /* Disable cacheline fills and force cache write-through in debug state */
+       retval = mem_ap_write_u32(armv7a->debug_ap,
+                       armv7a->debug_base + CPUDBG_DSCCR, 0);
        if (retval != ERROR_OK)
-       {
-               /* try again */
-               retval = mem_ap_sel_write_atomic_u32(swjdp, swjdp_debugap,
-                               armv7a->debug_base + CPUDBG_LOCKACCESS, 0xC5ACCE55);
-               if (retval == ERROR_OK)
-               {
-                       LOG_USER("Locking debug access failed on first, but succeeded on second try.");
-               }
-       }
+               return retval;
+
+       /* Disable TLB lookup and refill/eviction in debug state */
+       retval = mem_ap_write_u32(armv7a->debug_ap,
+                       armv7a->debug_base + CPUDBG_DSMCR, 0);
        if (retval != ERROR_OK)
                return retval;
-       /* Clear Sticky Power Down status Bit in PRSR to enable access to
-          the registers in the Core Power Domain */
-       retval = mem_ap_sel_read_atomic_u32(swjdp, swjdp_debugap,
-                               armv7a->debug_base + CPUDBG_PRSR, &dummy);
+
+       retval = dap_run(armv7a->debug_ap->dap);
        if (retval != ERROR_OK)
                return retval;
 
@@ -209,7 +235,30 @@ static int cortex_a8_init_debug_access(struct target *target)
        /* Resync breakpoint registers */
 
        /* Since this is likely called from init or reset, update target state information*/
-       return cortex_a8_poll(target);
+       return cortex_a_poll(target);
+}
+
+static int cortex_a_wait_instrcmpl(struct target *target, uint32_t *dscr, bool force)
+{
+       /* Waits until InstrCmpl_l becomes 1, indicating instruction is done.
+        * Writes final value of DSCR into *dscr. Pass force to force always
+        * reading DSCR at least once. */
+       struct armv7a_common *armv7a = target_to_armv7a(target);
+       int64_t then = timeval_ms();
+       while ((*dscr & DSCR_INSTR_COMP) == 0 || force) {
+               force = false;
+               int retval = mem_ap_read_atomic_u32(armv7a->debug_ap,
+                               armv7a->debug_base + CPUDBG_DSCR, dscr);
+               if (retval != ERROR_OK) {
+                       LOG_ERROR("Could not read DSCR register");
+                       return retval;
+               }
+               if (timeval_ms() > then + 1000) {
+                       LOG_ERROR("Timeout waiting for InstrCompl=1");
+                       return ERROR_FAIL;
+               }
+       }
+       return ERROR_OK;
 }
 
 /* To reduce needless round-trips, pass in a pointer to the current
@@ -217,58 +266,40 @@ static int cortex_a8_init_debug_access(struct target *target)
  * value on return from this function; or DSCR_INSTR_COMP if you
  * happen to know that no instruction is pending.
  */
-static int cortex_a8_exec_opcode(struct target *target,
-               uint32_t opcode, uint32_t *dscr_p)
+static int cortex_a_exec_opcode(struct target *target,
+       uint32_t opcode, uint32_t *dscr_p)
 {
        uint32_t dscr;
        int retval;
        struct armv7a_common *armv7a = target_to_armv7a(target);
-       struct adiv5_dap *swjdp = armv7a->armv4_5_common.dap;
 
        dscr = dscr_p ? *dscr_p : 0;
 
        LOG_DEBUG("exec opcode 0x%08" PRIx32, opcode);
 
        /* Wait for InstrCompl bit to be set */
-       long long then = timeval_ms();
-       while ((dscr & DSCR_INSTR_COMP) == 0)
-       {
-               retval = mem_ap_sel_read_atomic_u32(swjdp, swjdp_debugap,
-                               armv7a->debug_base + CPUDBG_DSCR, &dscr);
-               if (retval != ERROR_OK)
-               {
-                       LOG_ERROR("Could not read DSCR register, opcode = 0x%08" PRIx32, opcode);
-                       return retval;
-               }
-               if (timeval_ms() > then + 1000)
-               {
-                       LOG_ERROR("Timeout waiting for cortex_a8_exec_opcode");
-                       return ERROR_FAIL;
-               }
-       }
+       retval = cortex_a_wait_instrcmpl(target, dscr_p, false);
+       if (retval != ERROR_OK)
+               return retval;
 
-       retval = mem_ap_sel_write_u32(swjdp, swjdp_debugap,
-                               armv7a->debug_base + CPUDBG_ITR, opcode);
+       retval = mem_ap_write_u32(armv7a->debug_ap,
+                       armv7a->debug_base + CPUDBG_ITR, opcode);
        if (retval != ERROR_OK)
                return retval;
 
-       then = timeval_ms();
-       do
-       {
-               retval = mem_ap_sel_read_atomic_u32(swjdp, swjdp_debugap,
+       int64_t then = timeval_ms();
+       do {
+               retval = mem_ap_read_atomic_u32(armv7a->debug_ap,
                                armv7a->debug_base + CPUDBG_DSCR, &dscr);
-               if (retval != ERROR_OK)
-               {
+               if (retval != ERROR_OK) {
                        LOG_ERROR("Could not read DSCR register");
                        return retval;
                }
-               if (timeval_ms() > then + 1000)
-               {
-                       LOG_ERROR("Timeout waiting for cortex_a8_exec_opcode");
+               if (timeval_ms() > then + 1000) {
+                       LOG_ERROR("Timeout waiting for cortex_a_exec_opcode");
                        return ERROR_FAIL;
                }
-       }
-       while ((dscr & DSCR_INSTR_COMP) == 0); /* Wait for InstrCompl bit to be set */
+       } while ((dscr & DSCR_INSTR_COMP) == 0);        /* Wait for InstrCompl bit to be set */
 
        if (dscr_p)
                *dscr_p = dscr;
@@ -280,71 +311,64 @@ static int cortex_a8_exec_opcode(struct target *target,
 Read core register with very few exec_opcode, fast but needs work_area.
 This can cause problems with MMU active.
 **************************************************************************/
-static int cortex_a8_read_regs_through_mem(struct target *target, uint32_t address,
-               uint32_t * regfile)
+static int cortex_a_read_regs_through_mem(struct target *target, uint32_t address,
+       uint32_t *regfile)
 {
        int retval = ERROR_OK;
        struct armv7a_common *armv7a = target_to_armv7a(target);
-       struct adiv5_dap *swjdp = armv7a->armv4_5_common.dap;
 
-       retval = cortex_a8_dap_read_coreregister_u32(target, regfile, 0);
+       retval = cortex_a_dap_read_coreregister_u32(target, regfile, 0);
        if (retval != ERROR_OK)
                return retval;
-       retval = cortex_a8_dap_write_coreregister_u32(target, address, 0);
+       retval = cortex_a_dap_write_coreregister_u32(target, address, 0);
        if (retval != ERROR_OK)
                return retval;
-       retval = cortex_a8_exec_opcode(target, ARMV4_5_STMIA(0, 0xFFFE, 0, 0), NULL);
+       retval = cortex_a_exec_opcode(target, ARMV4_5_STMIA(0, 0xFFFE, 0, 0), NULL);
        if (retval != ERROR_OK)
                return retval;
 
-       retval = mem_ap_sel_read_buf_u32(swjdp, swjdp_memoryap,
-                       (uint8_t *)(&regfile[1]), 4*15, address);
+       retval = mem_ap_read_buf(armv7a->memory_ap,
+                       (uint8_t *)(&regfile[1]), 415, address);
 
        return retval;
 }
 
-static int cortex_a8_dap_read_coreregister_u32(struct target *target,
-               uint32_t *value, int regnum)
+static int cortex_a_dap_read_coreregister_u32(struct target *target,
+       uint32_t *value, int regnum)
 {
        int retval = ERROR_OK;
        uint8_t reg = regnum&0xFF;
        uint32_t dscr = 0;
        struct armv7a_common *armv7a = target_to_armv7a(target);
-       struct adiv5_dap *swjdp = armv7a->armv4_5_common.dap;
 
        if (reg > 17)
                return retval;
 
-       if (reg < 15)
-       {
+       if (reg < 15) {
                /* Rn to DCCTX, "MCR p14, 0, Rn, c0, c5, 0"  0xEE00nE15 */
-               retval = cortex_a8_exec_opcode(target,
+               retval = cortex_a_exec_opcode(target,
                                ARMV4_5_MCR(14, 0, reg, 0, 5, 0),
                                &dscr);
                if (retval != ERROR_OK)
                        return retval;
-       }
-       else if (reg == 15)
-       {
+       } else if (reg == 15) {
                /* "MOV r0, r15"; then move r0 to DCCTX */
-               retval = cortex_a8_exec_opcode(target, 0xE1A0000F, &dscr);
+               retval = cortex_a_exec_opcode(target, 0xE1A0000F, &dscr);
                if (retval != ERROR_OK)
                        return retval;
-               retval = cortex_a8_exec_opcode(target,
+               retval = cortex_a_exec_opcode(target,
                                ARMV4_5_MCR(14, 0, 0, 0, 5, 0),
                                &dscr);
                if (retval != ERROR_OK)
                        return retval;
-       }
-       else
-       {
+       } else {
                /* "MRS r0, CPSR" or "MRS r0, SPSR"
                 * then move r0 to DCCTX
                 */
-               retval = cortex_a8_exec_opcode(target, ARMV4_5_MRS(0, reg & 1), &dscr);
+               retval = cortex_a_exec_opcode(target, ARMV4_5_MRS(0, reg & 1), &dscr);
                if (retval != ERROR_OK)
                        return retval;
-               retval = cortex_a8_exec_opcode(target,
+               retval = cortex_a_exec_opcode(target,
                                ARMV4_5_MCR(14, 0, 0, 0, 5, 0),
                                &dscr);
                if (retval != ERROR_OK)
@@ -352,48 +376,44 @@ static int cortex_a8_dap_read_coreregister_u32(struct target *target,
        }
 
        /* Wait for DTRRXfull then read DTRRTX */
-       long long then = timeval_ms();
-       while ((dscr & DSCR_DTR_TX_FULL) == 0)
-       {
-               retval = mem_ap_sel_read_atomic_u32(swjdp, swjdp_debugap,
+       int64_t then = timeval_ms();
+       while ((dscr & DSCR_DTR_TX_FULL) == 0) {
+               retval = mem_ap_read_atomic_u32(armv7a->debug_ap,
                                armv7a->debug_base + CPUDBG_DSCR, &dscr);
                if (retval != ERROR_OK)
                        return retval;
-               if (timeval_ms() > then + 1000)
-               {
-                       LOG_ERROR("Timeout waiting for cortex_a8_exec_opcode");
+               if (timeval_ms() > then + 1000) {
+                       LOG_ERROR("Timeout waiting for cortex_a_exec_opcode");
                        return ERROR_FAIL;
                }
        }
 
-       retval = mem_ap_sel_read_atomic_u32(swjdp, swjdp_debugap,
+       retval = mem_ap_read_atomic_u32(armv7a->debug_ap,
                        armv7a->debug_base + CPUDBG_DTRTX, value);
        LOG_DEBUG("read DCC 0x%08" PRIx32, *value);
 
        return retval;
 }
 
-static int cortex_a8_dap_write_coreregister_u32(struct target *target,
-               uint32_t value, int regnum)
+static int cortex_a_dap_write_coreregister_u32(struct target *target,
+       uint32_t value, int regnum)
 {
        int retval = ERROR_OK;
        uint8_t Rd = regnum&0xFF;
        uint32_t dscr;
        struct armv7a_common *armv7a = target_to_armv7a(target);
-       struct adiv5_dap *swjdp = armv7a->armv4_5_common.dap;
 
        LOG_DEBUG("register %i, value 0x%08" PRIx32, regnum, value);
 
        /* Check that DCCRX is not full */
-       retval = mem_ap_sel_read_atomic_u32(swjdp, swjdp_debugap,
-                               armv7a->debug_base + CPUDBG_DSCR, &dscr);
+       retval = mem_ap_read_atomic_u32(armv7a->debug_ap,
+                       armv7a->debug_base + CPUDBG_DSCR, &dscr);
        if (retval != ERROR_OK)
                return retval;
-       if (dscr & DSCR_DTR_RX_FULL)
-       {
+       if (dscr & DSCR_DTR_RX_FULL) {
                LOG_ERROR("DSCR_DTR_RX_FULL, dscr 0x%08" PRIx32, dscr);
                /* Clear DCCRX with MRC(p14, 0, Rd, c0, c5, 0), opcode  0xEE100E15 */
-               retval = cortex_a8_exec_opcode(target, ARMV4_5_MRC(14, 0, 0, 0, 5, 0),
+               retval = cortex_a_exec_opcode(target, ARMV4_5_MRC(14, 0, 0, 0, 5, 0),
                                &dscr);
                if (retval != ERROR_OK)
                        return retval;
@@ -404,51 +424,45 @@ static int cortex_a8_dap_write_coreregister_u32(struct target *target,
 
        /* Write DTRRX ... sets DSCR.DTRRXfull but exec_opcode() won't care */
        LOG_DEBUG("write DCC 0x%08" PRIx32, value);
-       retval = mem_ap_sel_write_u32(swjdp, swjdp_debugap,
+       retval = mem_ap_write_u32(armv7a->debug_ap,
                        armv7a->debug_base + CPUDBG_DTRRX, value);
        if (retval != ERROR_OK)
                return retval;
 
-       if (Rd < 15)
-       {
+       if (Rd < 15) {
                /* DCCRX to Rn, "MRC p14, 0, Rn, c0, c5, 0", 0xEE10nE15 */
-               retval = cortex_a8_exec_opcode(target, ARMV4_5_MRC(14, 0, Rd, 0, 5, 0),
+               retval = cortex_a_exec_opcode(target, ARMV4_5_MRC(14, 0, Rd, 0, 5, 0),
                                &dscr);
-       
+
                if (retval != ERROR_OK)
                        return retval;
-       }
-       else if (Rd == 15)
-       {
+       } else if (Rd == 15) {
                /* DCCRX to R0, "MRC p14, 0, R0, c0, c5, 0", 0xEE100E15
                 * then "mov r15, r0"
                 */
-               retval = cortex_a8_exec_opcode(target, ARMV4_5_MRC(14, 0, 0, 0, 5, 0),
+               retval = cortex_a_exec_opcode(target, ARMV4_5_MRC(14, 0, 0, 0, 5, 0),
                                &dscr);
                if (retval != ERROR_OK)
                        return retval;
-               retval = cortex_a8_exec_opcode(target, 0xE1A0F000, &dscr);
+               retval = cortex_a_exec_opcode(target, 0xE1A0F000, &dscr);
                if (retval != ERROR_OK)
                        return retval;
-       }
-       else
-       {
+       } else {
                /* DCCRX to R0, "MRC p14, 0, R0, c0, c5, 0", 0xEE100E15
                 * then "MSR CPSR_cxsf, r0" or "MSR SPSR_cxsf, r0" (all fields)
                 */
-               retval = cortex_a8_exec_opcode(target, ARMV4_5_MRC(14, 0, 0, 0, 5, 0),
+               retval = cortex_a_exec_opcode(target, ARMV4_5_MRC(14, 0, 0, 0, 5, 0),
                                &dscr);
                if (retval != ERROR_OK)
                        return retval;
-               retval = cortex_a8_exec_opcode(target, ARMV4_5_MSR_GP(0, 0xF, Rd & 1),
+               retval = cortex_a_exec_opcode(target, ARMV4_5_MSR_GP(0, 0xF, Rd & 1),
                                &dscr);
                if (retval != ERROR_OK)
                        return retval;
 
                /* "Prefetch flush" after modifying execution status in CPSR */
-               if (Rd == 16)
-               {
-                       retval = cortex_a8_exec_opcode(target,
+               if (Rd == 16) {
+                       retval = cortex_a_exec_opcode(target,
                                        ARMV4_5_MCR(15, 0, 0, 7, 5, 4),
                                        &dscr);
                        if (retval != ERROR_OK)
@@ -460,19 +474,20 @@ static int cortex_a8_dap_write_coreregister_u32(struct target *target,
 }
 
 /* Write to memory mapped registers directly with no cache or mmu handling */
-static int cortex_a8_dap_write_memap_register_u32(struct target *target, uint32_t address, uint32_t value)
+static int cortex_a_dap_write_memap_register_u32(struct target *target,
+       uint32_t address,
+       uint32_t value)
 {
        int retval;
        struct armv7a_common *armv7a = target_to_armv7a(target);
-       struct adiv5_dap *swjdp = armv7a->armv4_5_common.dap;
 
-       retval = mem_ap_sel_write_atomic_u32(swjdp, swjdp_debugap, address, value);
+       retval = mem_ap_write_atomic_u32(armv7a->debug_ap, address, value);
 
        return retval;
 }
 
 /*
- * Cortex-A8 implementation of Debug Programmer's Model
+ * Cortex-A implementation of Debug Programmer's Model
  *
  * NOTE the invariant:  these routines return with DSCR_INSTR_COMP set,
  * so there's no need to poll for it before executing an instruction.
@@ -482,22 +497,21 @@ static int cortex_a8_dap_write_memap_register_u32(struct target *target, uint32_
  * be the places to enable/disable that mode.
  */
 
-static inline struct cortex_a8_common *dpm_to_a8(struct arm_dpm *dpm)
+static inline struct cortex_a_common *dpm_to_a(struct arm_dpm *dpm)
 {
-       return container_of(dpm, struct cortex_a8_common, armv7a_common.dpm);
+       return container_of(dpm, struct cortex_a_common, armv7a_common.dpm);
 }
 
-static int cortex_a8_write_dcc(struct cortex_a8_common *a8, uint32_t data)
+static int cortex_a_write_dcc(struct cortex_a_common *a, uint32_t data)
 {
        LOG_DEBUG("write DCC 0x%08" PRIx32, data);
-       return mem_ap_sel_write_u32(a8->armv7a_common.armv4_5_common.dap,
-                       swjdp_debugap,a8->armv7a_common.debug_base + CPUDBG_DTRRX, data);
+       return mem_ap_write_u32(a->armv7a_common.debug_ap,
+                       a->armv7a_common.debug_base + CPUDBG_DTRRX, data);
 }
 
-static int cortex_a8_read_dcc(struct cortex_a8_common *a8, uint32_t *data,
-               uint32_t *dscr_p)
+static int cortex_a_read_dcc(struct cortex_a_common *a, uint32_t *data,
+       uint32_t *dscr_p)
 {
-       struct adiv5_dap *swjdp = a8->armv7a_common.armv4_5_common.dap;
        uint32_t dscr = DSCR_INSTR_COMP;
        int retval;
 
@@ -505,25 +519,24 @@ static int cortex_a8_read_dcc(struct cortex_a8_common *a8, uint32_t *data,
                dscr = *dscr_p;
 
        /* Wait for DTRRXfull */
-       long long then = timeval_ms();
+       int64_t then = timeval_ms();
        while ((dscr & DSCR_DTR_TX_FULL) == 0) {
-               retval = mem_ap_sel_read_atomic_u32(swjdp, swjdp_debugap,
-                               a8->armv7a_common.debug_base + CPUDBG_DSCR,
+               retval = mem_ap_read_atomic_u32(a->armv7a_common.debug_ap,
+                               a->armv7a_common.debug_base + CPUDBG_DSCR,
                                &dscr);
                if (retval != ERROR_OK)
                        return retval;
-               if (timeval_ms() > then + 1000)
-               {
+               if (timeval_ms() > then + 1000) {
                        LOG_ERROR("Timeout waiting for read dcc");
                        return ERROR_FAIL;
                }
        }
 
-       retval = mem_ap_sel_read_atomic_u32(swjdp, swjdp_debugap,
-                       a8->armv7a_common.debug_base + CPUDBG_DTRTX, data);
+       retval = mem_ap_read_atomic_u32(a->armv7a_common.debug_ap,
+                       a->armv7a_common.debug_base + CPUDBG_DTRTX, data);
        if (retval != ERROR_OK)
                return retval;
-       //LOG_DEBUG("read DCC 0x%08" PRIx32, *data);
+       /* LOG_DEBUG("read DCC 0x%08" PRIx32, *data); */
 
        if (dscr_p)
                *dscr_p = dscr;
@@ -531,26 +544,23 @@ static int cortex_a8_read_dcc(struct cortex_a8_common *a8, uint32_t *data,
        return retval;
 }
 
-static int cortex_a8_dpm_prepare(struct arm_dpm *dpm)
+static int cortex_a_dpm_prepare(struct arm_dpm *dpm)
 {
-       struct cortex_a8_common *a8 = dpm_to_a8(dpm);
-       struct adiv5_dap *swjdp = a8->armv7a_common.armv4_5_common.dap;
+       struct cortex_a_common *a = dpm_to_a(dpm);
        uint32_t dscr;
        int retval;
 
        /* set up invariant:  INSTR_COMP is set after ever DPM operation */
-       long long then = timeval_ms();
-       for (;;)
-       {
-               retval = mem_ap_sel_read_atomic_u32(swjdp, swjdp_debugap,
-                               a8->armv7a_common.debug_base + CPUDBG_DSCR,
+       int64_t then = timeval_ms();
+       for (;; ) {
+               retval = mem_ap_read_atomic_u32(a->armv7a_common.debug_ap,
+                               a->armv7a_common.debug_base + CPUDBG_DSCR,
                                &dscr);
                if (retval != ERROR_OK)
                        return retval;
                if ((dscr & DSCR_INSTR_COMP) != 0)
                        break;
-               if (timeval_ms() > then + 1000)
-               {
+               if (timeval_ms() > then + 1000) {
                        LOG_ERROR("Timeout waiting for dpm prepare");
                        return ERROR_FAIL;
                }
@@ -560,8 +570,8 @@ static int cortex_a8_dpm_prepare(struct arm_dpm *dpm)
        if (dscr & DSCR_DTR_RX_FULL) {
                LOG_ERROR("DSCR_DTR_RX_FULL, dscr 0x%08" PRIx32, dscr);
                /* Clear DCCRX */
-               retval = cortex_a8_exec_opcode(
-                               a8->armv7a_common.armv4_5_common.target,
+               retval = cortex_a_exec_opcode(
+                               a->armv7a_common.arm.target,
                                ARMV4_5_MRC(14, 0, 0, 0, 5, 0),
                                &dscr);
                if (retval != ERROR_OK)
@@ -571,193 +581,193 @@ static int cortex_a8_dpm_prepare(struct arm_dpm *dpm)
        return retval;
 }
 
-static int cortex_a8_dpm_finish(struct arm_dpm *dpm)
+static int cortex_a_dpm_finish(struct arm_dpm *dpm)
 {
        /* REVISIT what could be done here? */
        return ERROR_OK;
 }
 
-static int cortex_a8_instr_write_data_dcc(struct arm_dpm *dpm,
-               uint32_t opcode, uint32_t data)
+static int cortex_a_instr_write_data_dcc(struct arm_dpm *dpm,
+       uint32_t opcode, uint32_t data)
 {
-       struct cortex_a8_common *a8 = dpm_to_a8(dpm);
+       struct cortex_a_common *a = dpm_to_a(dpm);
        int retval;
        uint32_t dscr = DSCR_INSTR_COMP;
 
-       retval = cortex_a8_write_dcc(a8, data);
+       retval = cortex_a_write_dcc(a, data);
        if (retval != ERROR_OK)
                return retval;
 
-       return cortex_a8_exec_opcode(
-                       a8->armv7a_common.armv4_5_common.target,
+       return cortex_a_exec_opcode(
+                       a->armv7a_common.arm.target,
                        opcode,
                        &dscr);
 }
 
-static int cortex_a8_instr_write_data_r0(struct arm_dpm *dpm,
-               uint32_t opcode, uint32_t data)
+static int cortex_a_instr_write_data_r0(struct arm_dpm *dpm,
+       uint32_t opcode, uint32_t data)
 {
-       struct cortex_a8_common *a8 = dpm_to_a8(dpm);
+       struct cortex_a_common *a = dpm_to_a(dpm);
        uint32_t dscr = DSCR_INSTR_COMP;
        int retval;
 
-       retval = cortex_a8_write_dcc(a8, data);
+       retval = cortex_a_write_dcc(a, data);
        if (retval != ERROR_OK)
                return retval;
 
        /* DCCRX to R0, "MCR p14, 0, R0, c0, c5, 0", 0xEE000E15 */
-       retval = cortex_a8_exec_opcode(
-                       a8->armv7a_common.armv4_5_common.target,
+       retval = cortex_a_exec_opcode(
+                       a->armv7a_common.arm.target,
                        ARMV4_5_MRC(14, 0, 0, 0, 5, 0),
                        &dscr);
        if (retval != ERROR_OK)
                return retval;
 
        /* then the opcode, taking data from R0 */
-       retval = cortex_a8_exec_opcode(
-                       a8->armv7a_common.armv4_5_common.target,
+       retval = cortex_a_exec_opcode(
+                       a->armv7a_common.arm.target,
                        opcode,
                        &dscr);
 
        return retval;
 }
 
-static int cortex_a8_instr_cpsr_sync(struct arm_dpm *dpm)
+static int cortex_a_instr_cpsr_sync(struct arm_dpm *dpm)
 {
        struct target *target = dpm->arm->target;
        uint32_t dscr = DSCR_INSTR_COMP;
 
        /* "Prefetch flush" after modifying execution status in CPSR */
-       return cortex_a8_exec_opcode(target,
+       return cortex_a_exec_opcode(target,
                        ARMV4_5_MCR(15, 0, 0, 7, 5, 4),
                        &dscr);
 }
 
-static int cortex_a8_instr_read_data_dcc(struct arm_dpm *dpm,
-               uint32_t opcode, uint32_t *data)
+static int cortex_a_instr_read_data_dcc(struct arm_dpm *dpm,
+       uint32_t opcode, uint32_t *data)
 {
-       struct cortex_a8_common *a8 = dpm_to_a8(dpm);
+       struct cortex_a_common *a = dpm_to_a(dpm);
        int retval;
        uint32_t dscr = DSCR_INSTR_COMP;
 
        /* the opcode, writing data to DCC */
-       retval = cortex_a8_exec_opcode(
-                       a8->armv7a_common.armv4_5_common.target,
+       retval = cortex_a_exec_opcode(
+                       a->armv7a_common.arm.target,
                        opcode,
                        &dscr);
        if (retval != ERROR_OK)
                return retval;
 
-       return cortex_a8_read_dcc(a8, data, &dscr);
+       return cortex_a_read_dcc(a, data, &dscr);
 }
 
 
-static int cortex_a8_instr_read_data_r0(struct arm_dpm *dpm,
-               uint32_t opcode, uint32_t *data)
+static int cortex_a_instr_read_data_r0(struct arm_dpm *dpm,
+       uint32_t opcode, uint32_t *data)
 {
-       struct cortex_a8_common *a8 = dpm_to_a8(dpm);
+       struct cortex_a_common *a = dpm_to_a(dpm);
        uint32_t dscr = DSCR_INSTR_COMP;
        int retval;
 
        /* the opcode, writing data to R0 */
-       retval = cortex_a8_exec_opcode(
-                       a8->armv7a_common.armv4_5_common.target,
+       retval = cortex_a_exec_opcode(
+                       a->armv7a_common.arm.target,
                        opcode,
                        &dscr);
        if (retval != ERROR_OK)
                return retval;
 
        /* write R0 to DCC */
-       retval = cortex_a8_exec_opcode(
-                       a8->armv7a_common.armv4_5_common.target,
+       retval = cortex_a_exec_opcode(
+                       a->armv7a_common.arm.target,
                        ARMV4_5_MCR(14, 0, 0, 0, 5, 0),
                        &dscr);
        if (retval != ERROR_OK)
                return retval;
 
-       return cortex_a8_read_dcc(a8, data, &dscr);
+       return cortex_a_read_dcc(a, data, &dscr);
 }
 
-static int cortex_a8_bpwp_enable(struct arm_dpm *dpm, unsigned index_t,
-               uint32_t addr, uint32_t control)
+static int cortex_a_bpwp_enable(struct arm_dpm *dpm, unsigned index_t,
+       uint32_t addr, uint32_t control)
 {
-       struct cortex_a8_common *a8 = dpm_to_a8(dpm);
-       uint32_t vr = a8->armv7a_common.debug_base;
-       uint32_t cr = a8->armv7a_common.debug_base;
+       struct cortex_a_common *a = dpm_to_a(dpm);
+       uint32_t vr = a->armv7a_common.debug_base;
+       uint32_t cr = a->armv7a_common.debug_base;
        int retval;
 
        switch (index_t) {
-       case 0 ... 15:          /* breakpoints */
-               vr += CPUDBG_BVR_BASE;
-               cr += CPUDBG_BCR_BASE;
-               break;
-       case 16 ... 31:         /* watchpoints */
-               vr += CPUDBG_WVR_BASE;
-               cr += CPUDBG_WCR_BASE;
-               index_t -= 16;
-               break;
-       default:
-               return ERROR_FAIL;
+               case 0 ... 15:  /* breakpoints */
+                       vr += CPUDBG_BVR_BASE;
+                       cr += CPUDBG_BCR_BASE;
+                       break;
+               case 16 ... 31: /* watchpoints */
+                       vr += CPUDBG_WVR_BASE;
+                       cr += CPUDBG_WCR_BASE;
+                       index_t -= 16;
+                       break;
+               default:
+                       return ERROR_FAIL;
        }
        vr += 4 * index_t;
        cr += 4 * index_t;
 
-       LOG_DEBUG("A8: bpwp enable, vr %08x cr %08x",
-                       (unsigned) vr, (unsigned) cr);
+       LOG_DEBUG("A: bpwp enable, vr %08x cr %08x",
+               (unsigned) vr, (unsigned) cr);
 
-       retval = cortex_a8_dap_write_memap_register_u32(dpm->arm->target,
+       retval = cortex_a_dap_write_memap_register_u32(dpm->arm->target,
                        vr, addr);
        if (retval != ERROR_OK)
                return retval;
-       retval = cortex_a8_dap_write_memap_register_u32(dpm->arm->target,
+       retval = cortex_a_dap_write_memap_register_u32(dpm->arm->target,
                        cr, control);
        return retval;
 }
 
-static int cortex_a8_bpwp_disable(struct arm_dpm *dpm, unsigned index_t)
+static int cortex_a_bpwp_disable(struct arm_dpm *dpm, unsigned index_t)
 {
-       struct cortex_a8_common *a8 = dpm_to_a8(dpm);
+       struct cortex_a_common *a = dpm_to_a(dpm);
        uint32_t cr;
 
        switch (index_t) {
-       case 0 ... 15:
-               cr = a8->armv7a_common.debug_base + CPUDBG_BCR_BASE;
-               break;
-       case 16 ... 31:
-               cr = a8->armv7a_common.debug_base + CPUDBG_WCR_BASE;
-               index_t -= 16;
-               break;
-       default:
-               return ERROR_FAIL;
+               case 0 ... 15:
+                       cr = a->armv7a_common.debug_base + CPUDBG_BCR_BASE;
+                       break;
+               case 16 ... 31:
+                       cr = a->armv7a_common.debug_base + CPUDBG_WCR_BASE;
+                       index_t -= 16;
+                       break;
+               default:
+                       return ERROR_FAIL;
        }
        cr += 4 * index_t;
 
-       LOG_DEBUG("A8: bpwp disable, cr %08x", (unsigned) cr);
+       LOG_DEBUG("A: bpwp disable, cr %08x", (unsigned) cr);
 
        /* clear control register */
-       return cortex_a8_dap_write_memap_register_u32(dpm->arm->target, cr, 0);
+       return cortex_a_dap_write_memap_register_u32(dpm->arm->target, cr, 0);
 }
 
-static int cortex_a8_dpm_setup(struct cortex_a8_common *a8, uint32_t didr)
+static int cortex_a_dpm_setup(struct cortex_a_common *a, uint32_t didr)
 {
-       struct arm_dpm *dpm = &a8->armv7a_common.dpm;
+       struct arm_dpm *dpm = &a->armv7a_common.dpm;
        int retval;
 
-       dpm->arm = &a8->armv7a_common.armv4_5_common;
+       dpm->arm = &a->armv7a_common.arm;
        dpm->didr = didr;
 
-       dpm->prepare = cortex_a8_dpm_prepare;
-       dpm->finish = cortex_a8_dpm_finish;
+       dpm->prepare = cortex_a_dpm_prepare;
+       dpm->finish = cortex_a_dpm_finish;
 
-       dpm->instr_write_data_dcc = cortex_a8_instr_write_data_dcc;
-       dpm->instr_write_data_r0 = cortex_a8_instr_write_data_r0;
-       dpm->instr_cpsr_sync = cortex_a8_instr_cpsr_sync;
+       dpm->instr_write_data_dcc = cortex_a_instr_write_data_dcc;
+       dpm->instr_write_data_r0 = cortex_a_instr_write_data_r0;
+       dpm->instr_cpsr_sync = cortex_a_instr_cpsr_sync;
 
-       dpm->instr_read_data_dcc = cortex_a8_instr_read_data_dcc;
-       dpm->instr_read_data_r0 = cortex_a8_instr_read_data_r0;
+       dpm->instr_read_data_dcc = cortex_a_instr_read_data_dcc;
+       dpm->instr_read_data_r0 = cortex_a_instr_read_data_r0;
 
-       dpm->bpwp_enable = cortex_a8_bpwp_enable;
-       dpm->bpwp_disable = cortex_a8_bpwp_disable;
+       dpm->bpwp_enable = cortex_a_bpwp_enable;
+       dpm->bpwp_disable = cortex_a_bpwp_disable;
 
        retval = arm_dpm_setup(dpm);
        if (retval == ERROR_OK)
@@ -765,38 +775,33 @@ static int cortex_a8_dpm_setup(struct cortex_a8_common *a8, uint32_t didr)
 
        return retval;
 }
-static struct target *get_cortex_a8(struct target *target, int32_t coreid)
+static struct target *get_cortex_a(struct target *target, int32_t coreid)
 {
-struct target_list *head;
-struct target *curr;
+       struct target_list *head;
+       struct target *curr;
 
        head = target->head;
-       while(head != (struct target_list*)NULL)
-       {
+       while (head != (struct target_list *)NULL) {
                curr = head->target;
                if ((curr->coreid == coreid) && (curr->state == TARGET_HALTED))
-               {
-        return curr;
-               }
+                       return curr;
                head = head->next;
        }
-   return target;
+       return target;
 }
-static int cortex_a8_halt(struct target *target);
+static int cortex_a_halt(struct target *target);
 
-static int cortex_a8_halt_smp(struct target *target)
+static int cortex_a_halt_smp(struct target *target)
 {
        int retval = 0;
        struct target_list *head;
        struct target *curr;
        head = target->head;
-       while(head != (struct target_list*)NULL)
-       {
+       while (head != (struct target_list *)NULL) {
                curr = head->target;
-               if ((curr != target) && (curr->state!= TARGET_HALTED))
-               {
-                       retval += cortex_a8_halt(curr);
-               }
+               if ((curr != target) && (curr->state != TARGET_HALTED)
+                       && target_was_examined(curr))
+                       retval += cortex_a_halt(curr);
                head = head->next;
        }
        return retval;
@@ -804,116 +809,131 @@ static int cortex_a8_halt_smp(struct target *target)
 
 static int update_halt_gdb(struct target *target)
 {
+       struct target *gdb_target = NULL;
+       struct target_list *head;
+       struct target *curr;
        int retval = 0;
-       if (target->gdb_service->core[0]==-1)
-       {
+
+       if (target->gdb_service && target->gdb_service->core[0] == -1) {
                target->gdb_service->target = target;
                target->gdb_service->core[0] = target->coreid;
-               retval += cortex_a8_halt_smp(target);
+               retval += cortex_a_halt_smp(target);
        }
+
+       if (target->gdb_service)
+               gdb_target = target->gdb_service->target;
+
+       foreach_smp_target(head, target->head) {
+               curr = head->target;
+               /* skip calling context */
+               if (curr == target)
+                       continue;
+               if (!target_was_examined(curr))
+                       continue;
+               /* skip targets that were already halted */
+               if (curr->state == TARGET_HALTED)
+                       continue;
+               /* Skip gdb_target; it alerts GDB so has to be polled as last one */
+               if (curr == gdb_target)
+                       continue;
+
+               /* avoid recursion in cortex_a_poll() */
+               curr->smp = 0;
+               cortex_a_poll(curr);
+               curr->smp = 1;
+       }
+
+       /* after all targets were updated, poll the gdb serving target */
+       if (gdb_target != NULL && gdb_target != target)
+               cortex_a_poll(gdb_target);
        return retval;
 }
 
 /*
- * Cortex-A8 Run control
+ * Cortex-A Run control
  */
 
-static int cortex_a8_poll(struct target *target)
+static int cortex_a_poll(struct target *target)
 {
        int retval = ERROR_OK;
        uint32_t dscr;
-       struct cortex_a8_common *cortex_a8 = target_to_cortex_a8(target);
-       struct armv7a_common *armv7a = &cortex_a8->armv7a_common;
-       struct adiv5_dap *swjdp = armv7a->armv4_5_common.dap;
+       struct cortex_a_common *cortex_a = target_to_cortex_a(target);
+       struct armv7a_common *armv7a = &cortex_a->armv7a_common;
        enum target_state prev_target_state = target->state;
-       //  toggle to another core is done by gdb as follow
-       //  maint packet J core_id
-       //  continue
-       //  the next polling trigger an halt event sent to gdb
+       /*  toggle to another core is done by gdb as follow */
+       /*  maint packet J core_id */
+       /*  continue */
+       /*  the next polling trigger an halt event sent to gdb */
        if ((target->state == TARGET_HALTED) && (target->smp) &&
-                       (target->gdb_service) &&
-                       (target->gdb_service->target==NULL) )
-       {
+               (target->gdb_service) &&
+               (target->gdb_service->target == NULL)) {
                target->gdb_service->target =
-                       get_cortex_a8(target, target->gdb_service->core[1]);
-               target_call_event_callbacks(target,
-                               TARGET_EVENT_HALTED);
+                       get_cortex_a(target, target->gdb_service->core[1]);
+               target_call_event_callbacks(target, TARGET_EVENT_HALTED);
                return retval;
        }
-       retval = mem_ap_sel_read_atomic_u32(swjdp, swjdp_debugap,
+       retval = mem_ap_read_atomic_u32(armv7a->debug_ap,
                        armv7a->debug_base + CPUDBG_DSCR, &dscr);
        if (retval != ERROR_OK)
-       {
                return retval;
-       }
-       cortex_a8->cpudbg_dscr = dscr;
+       cortex_a->cpudbg_dscr = dscr;
 
-       if (DSCR_RUN_MODE(dscr) == (DSCR_CORE_HALTED | DSCR_CORE_RESTARTED))
-       {
-               if (prev_target_state != TARGET_HALTED)
-               {
+       if (DSCR_RUN_MODE(dscr) == (DSCR_CORE_HALTED | DSCR_CORE_RESTARTED)) {
+               if (prev_target_state != TARGET_HALTED) {
                        /* We have a halting debug event */
                        LOG_DEBUG("Target halted");
                        target->state = TARGET_HALTED;
                        if ((prev_target_state == TARGET_RUNNING)
-                                       || (prev_target_state == TARGET_RESET))
-                       {
-                               retval = cortex_a8_debug_entry(target);
+                               || (prev_target_state == TARGET_UNKNOWN)
+                               || (prev_target_state == TARGET_RESET)) {
+                               retval = cortex_a_debug_entry(target);
                                if (retval != ERROR_OK)
                                        return retval;
-                               if (target->smp)
-                               {
+                               if (target->smp) {
                                        retval = update_halt_gdb(target);
                                        if (retval != ERROR_OK)
                                                return retval;
                                }
+
+                               if (arm_semihosting(target, &retval) != 0)
+                                       return retval;
+
                                target_call_event_callbacks(target,
-                                               TARGET_EVENT_HALTED);
+                                       TARGET_EVENT_HALTED);
                        }
-                       if (prev_target_state == TARGET_DEBUG_RUNNING)
-                       {
+                       if (prev_target_state == TARGET_DEBUG_RUNNING) {
                                LOG_DEBUG(" ");
 
-                               retval = cortex_a8_debug_entry(target);
+                               retval = cortex_a_debug_entry(target);
                                if (retval != ERROR_OK)
                                        return retval;
-                               if (target->smp)
-                               {
+                               if (target->smp) {
                                        retval = update_halt_gdb(target);
                                        if (retval != ERROR_OK)
                                                return retval;
                                }
 
                                target_call_event_callbacks(target,
-                                               TARGET_EVENT_DEBUG_HALTED);
+                                       TARGET_EVENT_DEBUG_HALTED);
                        }
                }
-       }
-       else if (DSCR_RUN_MODE(dscr) == DSCR_CORE_RESTARTED)
-       {
+       } else
                target->state = TARGET_RUNNING;
-       }
-       else
-       {
-               LOG_DEBUG("Unknown target state dscr = 0x%08" PRIx32, dscr);
-               target->state = TARGET_UNKNOWN;
-       }
 
        return retval;
 }
 
-static int cortex_a8_halt(struct target *target)
+static int cortex_a_halt(struct target *target)
 {
        int retval = ERROR_OK;
        uint32_t dscr;
        struct armv7a_common *armv7a = target_to_armv7a(target);
-       struct adiv5_dap *swjdp = armv7a->armv4_5_common.dap;
 
        /*
         * Tell the core to be halted by writing DRCR with 0x1
         * and then wait for the core to be halted.
         */
-       retval = mem_ap_sel_write_atomic_u32(swjdp, swjdp_debugap,
+       retval = mem_ap_write_atomic_u32(armv7a->debug_ap,
                        armv7a->debug_base + CPUDBG_DRCR, DRCR_HALT);
        if (retval != ERROR_OK)
                return retval;
@@ -921,29 +941,25 @@ static int cortex_a8_halt(struct target *target)
        /*
         * enter halting debug mode
         */
-       retval = mem_ap_sel_read_atomic_u32(swjdp, swjdp_debugap,
-                               armv7a->debug_base + CPUDBG_DSCR, &dscr);
+       retval = mem_ap_read_atomic_u32(armv7a->debug_ap,
+                       armv7a->debug_base + CPUDBG_DSCR, &dscr);
        if (retval != ERROR_OK)
                return retval;
 
-       retval = mem_ap_sel_write_atomic_u32(swjdp, swjdp_debugap,
-               armv7a->debug_base + CPUDBG_DSCR, dscr | DSCR_HALT_DBG_MODE);
+       retval = mem_ap_write_atomic_u32(armv7a->debug_ap,
+                       armv7a->debug_base + CPUDBG_DSCR, dscr | DSCR_HALT_DBG_MODE);
        if (retval != ERROR_OK)
                return retval;
 
-       long long then = timeval_ms();
-       for (;;)
-       {
-               retval = mem_ap_sel_read_atomic_u32(swjdp, swjdp_debugap,
-                       armv7a->debug_base + CPUDBG_DSCR, &dscr);
+       int64_t then = timeval_ms();
+       for (;; ) {
+               retval = mem_ap_read_atomic_u32(armv7a->debug_ap,
+                               armv7a->debug_base + CPUDBG_DSCR, &dscr);
                if (retval != ERROR_OK)
                        return retval;
                if ((dscr & DSCR_CORE_HALTED) != 0)
-               {
                        break;
-               }
-               if (timeval_ms() > then + 1000)
-               {
+               if (timeval_ms() > then + 1000) {
                        LOG_ERROR("Timeout waiting for halt");
                        return ERROR_FAIL;
                }
@@ -954,11 +970,11 @@ static int cortex_a8_halt(struct target *target)
        return ERROR_OK;
 }
 
-static int cortex_a8_internal_restore(struct target *target, int current,
-               uint32_t *address, int handle_breakpoints, int debug_execution)
+static int cortex_a_internal_restore(struct target *target, int current,
+       target_addr_t *address, int handle_breakpoints, int debug_execution)
 {
        struct armv7a_common *armv7a = target_to_armv7a(target);
-       struct arm *armv4_5 = &armv7a->armv4_5_common;
+       struct arm *arm = &armv7a->arm;
        int retval;
        uint32_t resume_pc;
 
@@ -966,8 +982,7 @@ static int cortex_a8_internal_restore(struct target *target, int current,
                target_free_all_working_areas(target);
 
 #if 0
-       if (debug_execution)
-       {
+       if (debug_execution) {
                /* Disable interrupts */
                /* We disable interrupts in the PRIMASK register instead of
                 * masking with C_MASKINTS,
@@ -980,14 +995,15 @@ static int cortex_a8_internal_restore(struct target *target, int current,
 
                /* Make sure we are in Thumb mode */
                buf_set_u32(armv7m->core_cache->reg_list[ARMV7M_xPSR].value, 0, 32,
-                       buf_get_u32(armv7m->core_cache->reg_list[ARMV7M_xPSR].value, 0, 32) | (1 << 24));
+                       buf_get_u32(armv7m->core_cache->reg_list[ARMV7M_xPSR].value, 0,
+                       32) | (1 << 24));
                armv7m->core_cache->reg_list[ARMV7M_xPSR].dirty = 1;
                armv7m->core_cache->reg_list[ARMV7M_xPSR].valid = 1;
        }
 #endif
 
        /* current = 1: continue on current pc, otherwise continue at <address> */
-       resume_pc = buf_get_u32(armv4_5->pc->value, 0, 32);
+       resume_pc = buf_get_u32(arm->pc->value, 0, 32);
        if (!current)
                resume_pc = *address;
        else
@@ -996,49 +1012,51 @@ static int cortex_a8_internal_restore(struct target *target, int current,
        /* Make sure that the Armv7 gdb thumb fixups does not
         * kill the return address
         */
-       switch (armv4_5->core_state)
-       {
-       case ARM_STATE_ARM:
-               resume_pc &= 0xFFFFFFFC;
-               break;
-       case ARM_STATE_THUMB:
-       case ARM_STATE_THUMB_EE:
-               /* When the return address is loaded into PC
-                * bit 0 must be 1 to stay in Thumb state
-                */
-               resume_pc |= 0x1;
-               break;
-       case ARM_STATE_JAZELLE:
-               LOG_ERROR("How do I resume into Jazelle state??");
-               return ERROR_FAIL;
+       switch (arm->core_state) {
+               case ARM_STATE_ARM:
+                       resume_pc &= 0xFFFFFFFC;
+                       break;
+               case ARM_STATE_THUMB:
+               case ARM_STATE_THUMB_EE:
+                       /* When the return address is loaded into PC
+                        * bit 0 must be 1 to stay in Thumb state
+                        */
+                       resume_pc |= 0x1;
+                       break;
+               case ARM_STATE_JAZELLE:
+                       LOG_ERROR("How do I resume into Jazelle state??");
+                       return ERROR_FAIL;
+               case ARM_STATE_AARCH64:
+                       LOG_ERROR("Shoudn't be in AARCH64 state");
+                       return ERROR_FAIL;
        }
        LOG_DEBUG("resume pc = 0x%08" PRIx32, resume_pc);
-       buf_set_u32(armv4_5->pc->value, 0, 32, resume_pc);
-       armv4_5->pc->dirty = 1;
-       armv4_5->pc->valid = 1;
+       buf_set_u32(arm->pc->value, 0, 32, resume_pc);
+       arm->pc->dirty = 1;
+       arm->pc->valid = 1;
+
        /* restore dpm_mode at system halt */
-    dpm_modeswitch(&armv7a->dpm, ARM_MODE_ANY);
-    /* called it now before restoring context because it uses cpu
+       dpm_modeswitch(&armv7a->dpm, ARM_MODE_ANY);
+       /* called it now before restoring context because it uses cpu
         * register r0 for restoring cp15 control register */
-       retval = cortex_a8_restore_cp15_control_reg(target);
+       retval = cortex_a_restore_cp15_control_reg(target);
        if (retval != ERROR_OK)
                return retval;
-       retval = cortex_a8_restore_context(target, handle_breakpoints);
+       retval = cortex_a_restore_context(target, handle_breakpoints);
        if (retval != ERROR_OK)
                return retval;
-    target->debug_reason = DBG_REASON_NOTHALTED;
+       target->debug_reason = DBG_REASON_NOTHALTED;
        target->state = TARGET_RUNNING;
 
        /* registers are now invalid */
-       register_cache_invalidate(armv4_5->core_cache);
+       register_cache_invalidate(arm->core_cache);
 
 #if 0
        /* the front-end may request us not to handle breakpoints */
-       if (handle_breakpoints)
-       {
+       if (handle_breakpoints) {
                /* Single step past breakpoint at current address */
-               if ((breakpoint = breakpoint_find(target, resume_pc)))
-               {
+               breakpoint = breakpoint_find(target, resume_pc);
+               if (breakpoint) {
                        LOG_DEBUG("unset breakpoint at 0x%8.8x", breakpoint->address);
                        cortex_m3_unset_breakpoint(target, breakpoint);
                        cortex_m3_single_step_core(target);
@@ -1050,22 +1068,21 @@ static int cortex_a8_internal_restore(struct target *target, int current,
        return retval;
 }
 
-static int cortex_a8_internal_restart(struct target *target)
+static int cortex_a_internal_restart(struct target *target)
 {
        struct armv7a_common *armv7a = target_to_armv7a(target);
-       struct arm *armv4_5 = &armv7a->armv4_5_common;
-       struct adiv5_dap *swjdp = armv4_5->dap;
+       struct arm *arm = &armv7a->arm;
        int retval;
        uint32_t dscr;
-/*
-        * Restart core and wait for it to be started.  Clear ITRen and sticky
-        * exception flags: see ARMv7 ARM, C5.9.
+       /*
+        * Restart core and wait for it to be started.  Clear ITRen and sticky
+        * exception flags: see ARMv7 ARM, C5.9.
         *
         * REVISIT: for single stepping, we probably want to
         * disable IRQs by default, with optional override...
         */
 
-       retval = mem_ap_sel_read_atomic_u32(swjdp, swjdp_debugap,
+       retval = mem_ap_read_atomic_u32(armv7a->debug_ap,
                        armv7a->debug_base + CPUDBG_DSCR, &dscr);
        if (retval != ERROR_OK)
                return retval;
@@ -1073,28 +1090,26 @@ static int cortex_a8_internal_restart(struct target *target)
        if ((dscr & DSCR_INSTR_COMP) == 0)
                LOG_ERROR("DSCR InstrCompl must be set before leaving debug!");
 
-       retval = mem_ap_sel_write_atomic_u32(swjdp, swjdp_debugap,
-               armv7a->debug_base + CPUDBG_DSCR, dscr & ~DSCR_ITR_EN);
+       retval = mem_ap_write_atomic_u32(armv7a->debug_ap,
+                       armv7a->debug_base + CPUDBG_DSCR, dscr & ~DSCR_ITR_EN);
        if (retval != ERROR_OK)
                return retval;
 
-       retval = mem_ap_sel_write_atomic_u32(swjdp, swjdp_debugap,
+       retval = mem_ap_write_atomic_u32(armv7a->debug_ap,
                        armv7a->debug_base + CPUDBG_DRCR, DRCR_RESTART |
                        DRCR_CLEAR_EXCEPTIONS);
        if (retval != ERROR_OK)
                return retval;
 
-       long long then = timeval_ms();
-       for (;;)
-       {
-               retval = mem_ap_sel_read_atomic_u32(swjdp, swjdp_debugap,
-                       armv7a->debug_base + CPUDBG_DSCR, &dscr);
+       int64_t then = timeval_ms();
+       for (;; ) {
+               retval = mem_ap_read_atomic_u32(armv7a->debug_ap,
+                               armv7a->debug_base + CPUDBG_DSCR, &dscr);
                if (retval != ERROR_OK)
                        return retval;
                if ((dscr & DSCR_CORE_RESTARTED) != 0)
                        break;
-               if (timeval_ms() > then + 1000)
-               {
+               if (timeval_ms() > then + 1000) {
                        LOG_ERROR("Timeout waiting for resume");
                        return ERROR_FAIL;
                }
@@ -1104,27 +1119,26 @@ static int cortex_a8_internal_restart(struct target *target)
        target->state = TARGET_RUNNING;
 
        /* registers are now invalid */
-       register_cache_invalidate(armv4_5->core_cache);
+       register_cache_invalidate(arm->core_cache);
 
        return ERROR_OK;
 }
 
-static int cortex_a8_restore_smp(struct target *target,int handle_breakpoints)
+static int cortex_a_restore_smp(struct target *target, int handle_breakpoints)
 {
        int retval = 0;
        struct target_list *head;
        struct target *curr;
-    uint32_t address;
+       target_addr_t address;
        head = target->head;
-       while(head != (struct target_list*)NULL)
-       {
+       while (head != (struct target_list *)NULL) {
                curr = head->target;
-               if ((curr != target) && (curr->state != TARGET_RUNNING))
-               {
-               /*  resume current address , not in step mode */
-               retval += cortex_a8_internal_restore(curr, 1, &address,
-                               handle_breakpoints, 0);
-                 retval += cortex_a8_internal_restart(curr);
+               if ((curr != target) && (curr->state != TARGET_RUNNING)
+                       && target_was_examined(curr)) {
+                       /*  resume current address , not in step mode */
+                       retval += cortex_a_internal_restore(curr, 1, &address,
+                                       handle_breakpoints, 0);
+                       retval += cortex_a_internal_restart(curr);
                }
                head = head->next;
 
@@ -1132,13 +1146,12 @@ static int cortex_a8_restore_smp(struct target *target,int handle_breakpoints)
        return retval;
 }
 
-static int cortex_a8_resume(struct target *target, int current,
-               uint32_t address, int handle_breakpoints, int debug_execution)
+static int cortex_a_resume(struct target *target, int current,
+       target_addr_t address, int handle_breakpoints, int debug_execution)
 {
        int retval = 0;
-       /*   dummy resume for smp toggle in order to reduce gdb impact  */
-       if ((target->smp) && (target->gdb_service->core[1]!=-1))
-       {
+       /* dummy resume for smp toggle in order to reduce gdb impact  */
+       if ((target->smp) && (target->gdb_service->core[1] != -1)) {
                /*   simulate a start and halt of target */
                target->gdb_service->target = NULL;
                target->gdb_service->core[0] = target->gdb_service->core[1];
@@ -1146,71 +1159,67 @@ static int cortex_a8_resume(struct target *target, int current,
                target_call_event_callbacks(target, TARGET_EVENT_RESUMED);
                return 0;
        }
-       cortex_a8_internal_restore(target, current, &address, handle_breakpoints, debug_execution);
-       if (target->smp)
-       {   target->gdb_service->core[0] = -1;
-               retval = cortex_a8_restore_smp(target, handle_breakpoints);
+       cortex_a_internal_restore(target, current, &address, handle_breakpoints, debug_execution);
+       if (target->smp) {
+               target->gdb_service->core[0] = -1;
+               retval = cortex_a_restore_smp(target, handle_breakpoints);
                if (retval != ERROR_OK)
                        return retval;
        }
-       cortex_a8_internal_restart(target);
+       cortex_a_internal_restart(target);
 
-       if (!debug_execution)
-       {
+       if (!debug_execution) {
                target->state = TARGET_RUNNING;
                target_call_event_callbacks(target, TARGET_EVENT_RESUMED);
-               LOG_DEBUG("target resumed at 0x%" PRIx32, address);
-       }
-       else
-       {
+               LOG_DEBUG("target resumed at " TARGET_ADDR_FMT, address);
+       } else {
                target->state = TARGET_DEBUG_RUNNING;
                target_call_event_callbacks(target, TARGET_EVENT_DEBUG_RESUMED);
-               LOG_DEBUG("target debug resumed at 0x%" PRIx32, address);
+               LOG_DEBUG("target debug resumed at " TARGET_ADDR_FMT, address);
        }
 
        return ERROR_OK;
 }
 
-static int cortex_a8_debug_entry(struct target *target)
+static int cortex_a_debug_entry(struct target *target)
 {
        int i;
-       uint32_t regfile[16], cpsr, dscr;
+       uint32_t regfile[16], cpsr, spsr, dscr;
        int retval = ERROR_OK;
        struct working_area *regfile_working_area = NULL;
-       struct cortex_a8_common *cortex_a8 = target_to_cortex_a8(target);
+       struct cortex_a_common *cortex_a = target_to_cortex_a(target);
        struct armv7a_common *armv7a = target_to_armv7a(target);
-       struct arm *armv4_5 = &armv7a->armv4_5_common;
-       struct adiv5_dap *swjdp = armv7a->armv4_5_common.dap;
+       struct arm *arm = &armv7a->arm;
        struct reg *reg;
 
-       LOG_DEBUG("dscr = 0x%08" PRIx32, cortex_a8->cpudbg_dscr);
+       LOG_DEBUG("dscr = 0x%08" PRIx32, cortex_a->cpudbg_dscr);
 
        /* REVISIT surely we should not re-read DSCR !! */
-       retval = mem_ap_sel_read_atomic_u32(swjdp, swjdp_debugap,
-                               armv7a->debug_base + CPUDBG_DSCR, &dscr);
+       retval = mem_ap_read_atomic_u32(armv7a->debug_ap,
+                       armv7a->debug_base + CPUDBG_DSCR, &dscr);
        if (retval != ERROR_OK)
                return retval;
 
-       /* REVISIT see A8 TRM 12.11.4 steps 2..3 -- make sure that any
+       /* REVISIT see A TRM 12.11.4 steps 2..3 -- make sure that any
         * imprecise data aborts get discarded by issuing a Data
         * Synchronization Barrier:  ARMV4_5_MCR(15, 0, 0, 7, 10, 4).
         */
 
        /* Enable the ITR execution once we are in debug mode */
        dscr |= DSCR_ITR_EN;
-       retval = mem_ap_sel_write_atomic_u32(swjdp, swjdp_debugap,
+       retval = mem_ap_write_atomic_u32(armv7a->debug_ap,
                        armv7a->debug_base + CPUDBG_DSCR, dscr);
        if (retval != ERROR_OK)
                return retval;
 
        /* Examine debug reason */
-       arm_dpm_report_dscr(&armv7a->dpm, cortex_a8->cpudbg_dscr);
+       arm_dpm_report_dscr(&armv7a->dpm, cortex_a->cpudbg_dscr);
 
        /* save address of instruction that triggered the watchpoint? */
        if (target->debug_reason == DBG_REASON_WATCHPOINT) {
                uint32_t wfar;
 
-               retval = mem_ap_sel_read_atomic_u32(swjdp, swjdp_debugap,
+               retval = mem_ap_read_atomic_u32(armv7a->debug_ap,
                                armv7a->debug_base + CPUDBG_WFAR,
                                &wfar);
                if (retval != ERROR_OK)
@@ -1221,39 +1230,34 @@ static int cortex_a8_debug_entry(struct target *target)
        /* REVISIT fast_reg_read is never set ... */
 
        /* Examine target state and mode */
-       if (cortex_a8->fast_reg_read)
+       if (cortex_a->fast_reg_read)
                target_alloc_working_area(target, 64, &regfile_working_area);
 
+
        /* First load register acessible through core debug port*/
        if (!regfile_working_area)
-       {
                retval = arm_dpm_read_current_registers(&armv7a->dpm);
-       }
-       else
-       {
-               retval = cortex_a8_read_regs_through_mem(target,
+       else {
+               retval = cortex_a_read_regs_through_mem(target,
                                regfile_working_area->address, regfile);
 
                target_free_working_area(target, regfile_working_area);
                if (retval != ERROR_OK)
-               {
                        return retval;
-               }
 
                /* read Current PSR */
-               retval = cortex_a8_dap_read_coreregister_u32(target, &cpsr, 16);
+               retval = cortex_a_dap_read_coreregister_u32(target, &cpsr, 16);
                /*  store current cpsr */
                if (retval != ERROR_OK)
                        return retval;
 
                LOG_DEBUG("cpsr: %8.8" PRIx32, cpsr);
 
-               arm_set_cpsr(armv4_5, cpsr);
+               arm_set_cpsr(arm, cpsr);
 
                /* update cache */
-               for (i = 0; i <= ARM_PC; i++)
-               {
-                       reg = arm_reg_current(armv4_5, i);
+               for (i = 0; i <= ARM_PC; i++) {
+                       reg = arm_reg_current(arm, i);
 
                        buf_set_u32(reg->value, 0, 32, regfile[i]);
                        reg->valid = 1;
@@ -1261,39 +1265,48 @@ static int cortex_a8_debug_entry(struct target *target)
                }
 
                /* Fixup PC Resume Address */
-               if (cpsr & (1 << 5))
-               {
-                       // T bit set for Thumb or ThumbEE state
+               if (cpsr & (1 << 5)) {
+                       /* T bit set for Thumb or ThumbEE state */
                        regfile[ARM_PC] -= 4;
-               }
-               else
-               {
-                       // ARM state
+               } else {
+                       /* ARM state */
                        regfile[ARM_PC] -= 8;
                }
 
-               reg = armv4_5->pc;
+               reg = arm->pc;
                buf_set_u32(reg->value, 0, 32, regfile[ARM_PC]);
                reg->dirty = reg->valid;
        }
 
+       if (arm->spsr) {
+               /* read Saved PSR */
+               retval = cortex_a_dap_read_coreregister_u32(target, &spsr, 17);
+               /*  store current spsr */
+               if (retval != ERROR_OK)
+                       return retval;
+
+               reg = arm->spsr;
+               buf_set_u32(reg->value, 0, 32, spsr);
+               reg->valid = 1;
+               reg->dirty = 0;
+       }
+
 #if 0
 /* TODO, Move this */
        uint32_t cp15_control_register, cp15_cacr, cp15_nacr;
-       cortex_a8_read_cp(target, &cp15_control_register, 15, 0, 1, 0, 0);
+       cortex_a_read_cp(target, &cp15_control_register, 15, 0, 1, 0, 0);
        LOG_DEBUG("cp15_control_register = 0x%08x", cp15_control_register);
 
-       cortex_a8_read_cp(target, &cp15_cacr, 15, 0, 1, 0, 2);
+       cortex_a_read_cp(target, &cp15_cacr, 15, 0, 1, 0, 2);
        LOG_DEBUG("cp15 Coprocessor Access Control Register = 0x%08x", cp15_cacr);
 
-       cortex_a8_read_cp(target, &cp15_nacr, 15, 0, 1, 1, 2);
+       cortex_a_read_cp(target, &cp15_nacr, 15, 0, 1, 1, 2);
        LOG_DEBUG("cp15 Nonsecure Access Control Register = 0x%08x", cp15_nacr);
 #endif
 
        /* Are we in an exception handler */
-//     armv4_5->exception_number = 0;
-       if (armv7a->post_debug_entry)
-       {
+/*     armv4_5->exception_number = 0; */
+       if (armv7a->post_debug_entry) {
                retval = armv7a->post_debug_entry(target);
                if (retval != ERROR_OK)
                        return retval;
@@ -1302,111 +1315,158 @@ static int cortex_a8_debug_entry(struct target *target)
        return retval;
 }
 
-static int cortex_a8_post_debug_entry(struct target *target)
+static int cortex_a_post_debug_entry(struct target *target)
 {
-       struct cortex_a8_common *cortex_a8 = target_to_cortex_a8(target);
-       struct armv7a_common *armv7a = &cortex_a8->armv7a_common;
+       struct cortex_a_common *cortex_a = target_to_cortex_a(target);
+       struct armv7a_common *armv7a = &cortex_a->armv7a_common;
        int retval;
 
        /* MRC p15,0,<Rt>,c1,c0,0 ; Read CP15 System Control Register */
-       retval = armv7a->armv4_5_common.mrc(target, 15,
+       retval = armv7a->arm.mrc(target, 15,
                        0, 0,   /* op1, op2 */
                        1, 0,   /* CRn, CRm */
-                       &cortex_a8->cp15_control_reg);
+                       &cortex_a->cp15_control_reg);
        if (retval != ERROR_OK)
                return retval;
-       LOG_DEBUG("cp15_control_reg: %8.8" PRIx32, cortex_a8->cp15_control_reg);
-    cortex_a8->cp15_control_reg_curr = cortex_a8->cp15_control_reg;
+       LOG_DEBUG("cp15_control_reg: %8.8" PRIx32, cortex_a->cp15_control_reg);
+       cortex_a->cp15_control_reg_curr = cortex_a->cp15_control_reg;
 
-       if (armv7a->armv7a_mmu.armv7a_cache.ctype == -1)
-       {
+       if (!armv7a->is_armv7r)
+               armv7a_read_ttbcr(target);
+
+       if (armv7a->armv7a_mmu.armv7a_cache.info == -1)
                armv7a_identify_cache(target);
-       }
 
-       armv7a->armv7a_mmu.mmu_enabled =
-                       (cortex_a8->cp15_control_reg & 0x1U) ? 1 : 0;
+       if (armv7a->is_armv7r) {
+               armv7a->armv7a_mmu.mmu_enabled = 0;
+       } else {
+               armv7a->armv7a_mmu.mmu_enabled =
+                       (cortex_a->cp15_control_reg & 0x1U) ? 1 : 0;
+       }
        armv7a->armv7a_mmu.armv7a_cache.d_u_cache_enabled =
-                       (cortex_a8->cp15_control_reg & 0x4U) ? 1 : 0;
+               (cortex_a->cp15_control_reg & 0x4U) ? 1 : 0;
        armv7a->armv7a_mmu.armv7a_cache.i_cache_enabled =
-                       (cortex_a8->cp15_control_reg & 0x1000U) ? 1 : 0;
-    cortex_a8->curr_mode = armv7a->armv4_5_common.core_mode;
-    
+               (cortex_a->cp15_control_reg & 0x1000U) ? 1 : 0;
+       cortex_a->curr_mode = armv7a->arm.core_mode;
+
+       /* switch to SVC mode to read DACR */
+       dpm_modeswitch(&armv7a->dpm, ARM_MODE_SVC);
+       armv7a->arm.mrc(target, 15,
+                       0, 0, 3, 0,
+                       &cortex_a->cp15_dacr_reg);
+
+       LOG_DEBUG("cp15_dacr_reg: %8.8" PRIx32,
+                       cortex_a->cp15_dacr_reg);
+
+       dpm_modeswitch(&armv7a->dpm, ARM_MODE_ANY);
        return ERROR_OK;
 }
 
-static int cortex_a8_step(struct target *target, int current, uint32_t address,
-               int handle_breakpoints)
+int cortex_a_set_dscr_bits(struct target *target, unsigned long bit_mask, unsigned long value)
+{
+       struct armv7a_common *armv7a = target_to_armv7a(target);
+       uint32_t dscr;
+
+       /* Read DSCR */
+       int retval = mem_ap_read_atomic_u32(armv7a->debug_ap,
+                       armv7a->debug_base + CPUDBG_DSCR, &dscr);
+       if (ERROR_OK != retval)
+               return retval;
+
+       /* clear bitfield */
+       dscr &= ~bit_mask;
+       /* put new value */
+       dscr |= value & bit_mask;
+
+       /* write new DSCR */
+       retval = mem_ap_write_atomic_u32(armv7a->debug_ap,
+                       armv7a->debug_base + CPUDBG_DSCR, dscr);
+       return retval;
+}
+
+static int cortex_a_step(struct target *target, int current, target_addr_t address,
+       int handle_breakpoints)
 {
+       struct cortex_a_common *cortex_a = target_to_cortex_a(target);
        struct armv7a_common *armv7a = target_to_armv7a(target);
-       struct arm *armv4_5 = &armv7a->armv4_5_common;
+       struct arm *arm = &armv7a->arm;
        struct breakpoint *breakpoint = NULL;
        struct breakpoint stepbreakpoint;
        struct reg *r;
        int retval;
 
-       if (target->state != TARGET_HALTED)
-       {
+       if (target->state != TARGET_HALTED) {
                LOG_WARNING("target not halted");
                return ERROR_TARGET_NOT_HALTED;
        }
 
        /* current = 1: continue on current pc, otherwise continue at <address> */
-       r = armv4_5->pc;
+       r = arm->pc;
        if (!current)
-       {
                buf_set_u32(r->value, 0, 32, address);
-       }
        else
-       {
                address = buf_get_u32(r->value, 0, 32);
-       }
 
        /* The front-end may request us not to handle breakpoints.
-        * But since Cortex-A8 uses breakpoint for single step,
+        * But since Cortex-A uses breakpoint for single step,
         * we MUST handle breakpoints.
         */
        handle_breakpoints = 1;
        if (handle_breakpoints) {
                breakpoint = breakpoint_find(target, address);
                if (breakpoint)
-                       cortex_a8_unset_breakpoint(target, breakpoint);
+                       cortex_a_unset_breakpoint(target, breakpoint);
        }
 
        /* Setup single step breakpoint */
        stepbreakpoint.address = address;
-       stepbreakpoint.length = (armv4_5->core_state == ARM_STATE_THUMB)
-                       ? 2 : 4;
+       stepbreakpoint.asid = 0;
+       stepbreakpoint.length = (arm->core_state == ARM_STATE_THUMB)
+               ? 2 : 4;
        stepbreakpoint.type = BKPT_HARD;
        stepbreakpoint.set = 0;
 
+       /* Disable interrupts during single step if requested */
+       if (cortex_a->isrmasking_mode == CORTEX_A_ISRMASK_ON) {
+               retval = cortex_a_set_dscr_bits(target, DSCR_INT_DIS, DSCR_INT_DIS);
+               if (ERROR_OK != retval)
+                       return retval;
+       }
+
        /* Break on IVA mismatch */
-       cortex_a8_set_breakpoint(target, &stepbreakpoint, 0x04);
+       cortex_a_set_breakpoint(target, &stepbreakpoint, 0x04);
 
        target->debug_reason = DBG_REASON_SINGLESTEP;
 
-       retval = cortex_a8_resume(target, 1, address, 0, 0);
+       retval = cortex_a_resume(target, 1, address, 0, 0);
        if (retval != ERROR_OK)
                return retval;
 
-       long long then = timeval_ms();
-       while (target->state != TARGET_HALTED)
-       {
-               retval = cortex_a8_poll(target);
+       int64_t then = timeval_ms();
+       while (target->state != TARGET_HALTED) {
+               retval = cortex_a_poll(target);
                if (retval != ERROR_OK)
                        return retval;
-               if (timeval_ms() > then + 1000)
-               {
+               if (timeval_ms() > then + 1000) {
                        LOG_ERROR("timeout waiting for target halt");
                        return ERROR_FAIL;
                }
        }
 
-       cortex_a8_unset_breakpoint(target, &stepbreakpoint);
+       cortex_a_unset_breakpoint(target, &stepbreakpoint);
+
+       /* Re-enable interrupts if they were disabled */
+       if (cortex_a->isrmasking_mode == CORTEX_A_ISRMASK_ON) {
+               retval = cortex_a_set_dscr_bits(target, DSCR_INT_DIS, 0);
+               if (ERROR_OK != retval)
+                       return retval;
+       }
+
 
        target->debug_reason = DBG_REASON_BREAKPOINT;
 
        if (breakpoint)
-               cortex_a8_set_breakpoint(target, breakpoint, 0);
+               cortex_a_set_breakpoint(target, breakpoint, 0);
 
        if (target->state != TARGET_HALTED)
                LOG_DEBUG("target stepped");
@@ -1414,7 +1474,7 @@ static int cortex_a8_step(struct target *target, int current, uint32_t address,
        return ERROR_OK;
 }
 
-static int cortex_a8_restore_context(struct target *target, bool bpwp)
+static int cortex_a_restore_context(struct target *target, bool bpwp)
 {
        struct armv7a_common *armv7a = target_to_armv7a(target);
 
@@ -1426,281 +1486,285 @@ static int cortex_a8_restore_context(struct target *target, bool bpwp)
        return arm_dpm_write_dirty_registers(&armv7a->dpm, bpwp);
 }
 
-
 /*
- * Cortex-A8 Breakpoint and watchpoint functions
+ * Cortex-A Breakpoint and watchpoint functions
  */
 
 /* Setup hardware Breakpoint Register Pair */
-static int cortex_a8_set_breakpoint(struct target *target,
-               struct breakpoint *breakpoint, uint8_t matchmode)
+static int cortex_a_set_breakpoint(struct target *target,
+       struct breakpoint *breakpoint, uint8_t matchmode)
 {
        int retval;
-       int brp_i=0;
+       int brp_i = 0;
        uint32_t control;
        uint8_t byte_addr_select = 0x0F;
-       struct cortex_a8_common *cortex_a8 = target_to_cortex_a8(target);
-       struct armv7a_common *armv7a = &cortex_a8->armv7a_common;
-       struct cortex_a8_brp * brp_list = cortex_a8->brp_list;
+       struct cortex_a_common *cortex_a = target_to_cortex_a(target);
+       struct armv7a_common *armv7a = &cortex_a->armv7a_common;
+       struct cortex_a_brp *brp_list = cortex_a->brp_list;
 
-       if (breakpoint->set)
-       {
+       if (breakpoint->set) {
                LOG_WARNING("breakpoint already set");
                return ERROR_OK;
        }
 
-       if (breakpoint->type == BKPT_HARD)
-       {
-               while (brp_list[brp_i].used && (brp_i < cortex_a8->brp_num))
-                       brp_i++ ;
-               if (brp_i >= cortex_a8->brp_num)
-               {
+       if (breakpoint->type == BKPT_HARD) {
+               while (brp_list[brp_i].used && (brp_i < cortex_a->brp_num))
+                       brp_i++;
+               if (brp_i >= cortex_a->brp_num) {
                        LOG_ERROR("ERROR Can not find free Breakpoint Register Pair");
                        return ERROR_TARGET_RESOURCE_NOT_AVAILABLE;
                }
                breakpoint->set = brp_i + 1;
                if (breakpoint->length == 2)
-               {
                        byte_addr_select = (3 << (breakpoint->address & 0x02));
-               }
                control = ((matchmode & 0x7) << 20)
-                               | (byte_addr_select << 5)
-                               | (3 << 1) | 1;
+                       | (byte_addr_select << 5)
+                       | (3 << 1) | 1;
                brp_list[brp_i].used = 1;
                brp_list[brp_i].value = (breakpoint->address & 0xFFFFFFFC);
                brp_list[brp_i].control = control;
-               retval = cortex_a8_dap_write_memap_register_u32(target, armv7a->debug_base
+               retval = cortex_a_dap_write_memap_register_u32(target, armv7a->debug_base
                                + CPUDBG_BVR_BASE + 4 * brp_list[brp_i].BRPn,
                                brp_list[brp_i].value);
                if (retval != ERROR_OK)
                        return retval;
-               retval = cortex_a8_dap_write_memap_register_u32(target, armv7a->debug_base
+               retval = cortex_a_dap_write_memap_register_u32(target, armv7a->debug_base
                                + CPUDBG_BCR_BASE + 4 * brp_list[brp_i].BRPn,
                                brp_list[brp_i].control);
                if (retval != ERROR_OK)
                        return retval;
                LOG_DEBUG("brp %i control 0x%0" PRIx32 " value 0x%0" PRIx32, brp_i,
-                               brp_list[brp_i].control,
-                               brp_list[brp_i].value);
-       }
-       else if (breakpoint->type == BKPT_SOFT)
-       {
+                       brp_list[brp_i].control,
+                       brp_list[brp_i].value);
+       } else if (breakpoint->type == BKPT_SOFT) {
                uint8_t code[4];
+               /* length == 2: Thumb breakpoint */
                if (breakpoint->length == 2)
-               {
                        buf_set_u32(code, 0, 32, ARMV5_T_BKPT(0x11));
-               }
                else
-               {
+               /* length == 3: Thumb-2 breakpoint, actual encoding is
+                * a regular Thumb BKPT instruction but we replace a
+                * 32bit Thumb-2 instruction, so fix-up the breakpoint
+                * length
+                */
+               if (breakpoint->length == 3) {
+                       buf_set_u32(code, 0, 32, ARMV5_T_BKPT(0x11));
+                       breakpoint->length = 4;
+               } else
+                       /* length == 4, normal ARM breakpoint */
                        buf_set_u32(code, 0, 32, ARMV5_BKPT(0x11));
-               }
-               retval = target->type->read_memory(target,
+
+               retval = target_read_memory(target,
                                breakpoint->address & 0xFFFFFFFE,
                                breakpoint->length, 1,
                                breakpoint->orig_instr);
                if (retval != ERROR_OK)
                        return retval;
-               retval = target->type->write_memory(target,
+
+               /* make sure data cache is cleaned & invalidated down to PoC */
+               if (!armv7a->armv7a_mmu.armv7a_cache.auto_cache_enabled) {
+                       armv7a_cache_flush_virt(target, breakpoint->address,
+                                               breakpoint->length);
+               }
+
+               retval = target_write_memory(target,
                                breakpoint->address & 0xFFFFFFFE,
                                breakpoint->length, 1, code);
                if (retval != ERROR_OK)
                        return retval;
-               breakpoint->set = 0x11; /* Any nice value but 0 */
+
+               /* update i-cache at breakpoint location */
+               armv7a_l1_d_cache_inval_virt(target, breakpoint->address,
+                                       breakpoint->length);
+               armv7a_l1_i_cache_inval_virt(target, breakpoint->address,
+                                                breakpoint->length);
+
+               breakpoint->set = 0x11; /* Any nice value but 0 */
        }
 
        return ERROR_OK;
 }
 
-static int cortex_a8_set_context_breakpoint(struct target *target,
-               struct breakpoint *breakpoint, uint8_t matchmode)
+static int cortex_a_set_context_breakpoint(struct target *target,
+       struct breakpoint *breakpoint, uint8_t matchmode)
 {
        int retval = ERROR_FAIL;
-       int brp_i=0;
+       int brp_i = 0;
        uint32_t control;
        uint8_t byte_addr_select = 0x0F;
-       struct cortex_a8_common *cortex_a8 = target_to_cortex_a8(target);
-       struct armv7a_common *armv7a = &cortex_a8->armv7a_common;
-       struct cortex_a8_brp * brp_list = cortex_a8->brp_list;
-       
-       if (breakpoint->set)
-       {
+       struct cortex_a_common *cortex_a = target_to_cortex_a(target);
+       struct armv7a_common *armv7a = &cortex_a->armv7a_common;
+       struct cortex_a_brp *brp_list = cortex_a->brp_list;
+
+       if (breakpoint->set) {
                LOG_WARNING("breakpoint already set");
-               return retval ;
+               return retval;
        }
        /*check available context BRPs*/
-       while ((brp_list[brp_i].used || (brp_list[brp_i].type!=BRP_CONTEXT)) && (brp_i < cortex_a8->brp_num))
-                       brp_i++ ;
-       
-       if (brp_i >= cortex_a8->brp_num)
-       {
+       while ((brp_list[brp_i].used ||
+               (brp_list[brp_i].type != BRP_CONTEXT)) && (brp_i < cortex_a->brp_num))
+               brp_i++;
+
+       if (brp_i >= cortex_a->brp_num) {
                LOG_ERROR("ERROR Can not find free Breakpoint Register Pair");
                return ERROR_FAIL;
        }
 
        breakpoint->set = brp_i + 1;
        control = ((matchmode & 0x7) << 20)
-                               | (byte_addr_select << 5)
-                               | (3 << 1) | 1;
+               | (byte_addr_select << 5)
+               | (3 << 1) | 1;
        brp_list[brp_i].used = 1;
        brp_list[brp_i].value = (breakpoint->asid);
        brp_list[brp_i].control = control;
-       retval = cortex_a8_dap_write_memap_register_u32(target, armv7a->debug_base
-                       + CPUDBG_BVR_BASE + 4 * brp_list[brp_i].BRPn, 
+       retval = cortex_a_dap_write_memap_register_u32(target, armv7a->debug_base
+                       + CPUDBG_BVR_BASE + 4 * brp_list[brp_i].BRPn,
                        brp_list[brp_i].value);
-       if(retval != ERROR_OK)
+       if (retval != ERROR_OK)
                return retval;
-       retval = cortex_a8_dap_write_memap_register_u32(target, armv7a->debug_base
+       retval = cortex_a_dap_write_memap_register_u32(target, armv7a->debug_base
                        + CPUDBG_BCR_BASE + 4 * brp_list[brp_i].BRPn,
                        brp_list[brp_i].control);
-       if(retval != ERROR_OK)
+       if (retval != ERROR_OK)
                return retval;
        LOG_DEBUG("brp %i control 0x%0" PRIx32 " value 0x%0" PRIx32, brp_i,
-       brp_list[brp_i].control,
-       brp_list[brp_i].value);
+               brp_list[brp_i].control,
+               brp_list[brp_i].value);
        return ERROR_OK;
-               
+
 }
 
-static int cortex_a8_set_hybrid_breakpoint(struct target *target, struct breakpoint *breakpoint)
+static int cortex_a_set_hybrid_breakpoint(struct target *target, struct breakpoint *breakpoint)
 {
        int retval = ERROR_FAIL;
-       int brp_1=0; //holds the contextID pair
-       int brp_2=0; // holds the IVA pair
+       int brp_1 = 0;  /* holds the contextID pair */
+       int brp_2 = 0;  /* holds the IVA pair */
        uint32_t control_CTX, control_IVA;
        uint8_t CTX_byte_addr_select = 0x0F;
        uint8_t IVA_byte_addr_select = 0x0F;
        uint8_t CTX_machmode = 0x03;
        uint8_t IVA_machmode = 0x01;
-       struct cortex_a8_common *cortex_a8 = target_to_cortex_a8(target);
-       struct armv7a_common *armv7a = &cortex_a8->armv7a_common;
-       struct cortex_a8_brp * brp_list = cortex_a8->brp_list;
-       
-       
-       
-       if (breakpoint->set)
-       {
+       struct cortex_a_common *cortex_a = target_to_cortex_a(target);
+       struct armv7a_common *armv7a = &cortex_a->armv7a_common;
+       struct cortex_a_brp *brp_list = cortex_a->brp_list;
+
+       if (breakpoint->set) {
                LOG_WARNING("breakpoint already set");
-               return retval ;
+               return retval;
        }
        /*check available context BRPs*/
-       while ((brp_list[brp_1].used || (brp_list[brp_1].type!=BRP_CONTEXT)) && (brp_1 < cortex_a8->brp_num))
-                       brp_1++ ;
-       
-       printf("brp(CTX) found num: %d \n",brp_1);
-       if (brp_1 >= cortex_a8->brp_num)
-       {
+       while ((brp_list[brp_1].used ||
+               (brp_list[brp_1].type != BRP_CONTEXT)) && (brp_1 < cortex_a->brp_num))
+               brp_1++;
+
+       printf("brp(CTX) found num: %d\n", brp_1);
+       if (brp_1 >= cortex_a->brp_num) {
                LOG_ERROR("ERROR Can not find free Breakpoint Register Pair");
                return ERROR_FAIL;
        }
 
-       while ((brp_list[brp_2].used || (brp_list[brp_2].type!=BRP_NORMAL)) && (brp_2 < cortex_a8->brp_num))
-                       brp_2++ ;
-       
-       printf("brp(IVA) found num: %d \n",brp_2);
-       if (brp_2 >= cortex_a8->brp_num)
-       {
+       while ((brp_list[brp_2].used ||
+               (brp_list[brp_2].type != BRP_NORMAL)) && (brp_2 < cortex_a->brp_num))
+               brp_2++;
+
+       printf("brp(IVA) found num: %d\n", brp_2);
+       if (brp_2 >= cortex_a->brp_num) {
                LOG_ERROR("ERROR Can not find free Breakpoint Register Pair");
                return ERROR_FAIL;
        }
 
        breakpoint->set = brp_1 + 1;
-       breakpoint->linked_BRP= brp_2;
+       breakpoint->linked_BRP = brp_2;
        control_CTX = ((CTX_machmode & 0x7) << 20)
-                               | (brp_2 << 16)
-                               | (0 << 14)
-                               | (CTX_byte_addr_select << 5)
-                               | (3 << 1) | 1;
-               brp_list[brp_1].used = 1;
-               brp_list[brp_1].value = (breakpoint->asid);
-               brp_list[brp_1].control = control_CTX;
-               retval = cortex_a8_dap_write_memap_register_u32(target, armv7a->debug_base
-                               + CPUDBG_BVR_BASE + 4 * brp_list[brp_1].BRPn, 
-                               brp_list[brp_1].value);
-               if (retval != ERROR_OK)
-                       return retval;
-               retval = cortex_a8_dap_write_memap_register_u32(target, armv7a->debug_base
-                               + CPUDBG_BCR_BASE + 4 * brp_list[brp_1].BRPn,
-                               brp_list[brp_1].control);
-               if( retval != ERROR_OK )
-                       return retval;
-
-               control_IVA = ((IVA_machmode & 0x7) << 20)
-                               | (brp_1 << 16)
-                               | (IVA_byte_addr_select << 5)
-                               | (3 << 1) | 1;
-               brp_list[brp_2].used = 1;
-               brp_list[brp_2].value = (breakpoint->address & 0xFFFFFFFC);
-               brp_list[brp_2].control = control_IVA;
-               retval = cortex_a8_dap_write_memap_register_u32(target, armv7a->debug_base
-                               + CPUDBG_BVR_BASE + 4 * brp_list[brp_2].BRPn, 
-                               brp_list[brp_2].value);
-               if (retval != ERROR_OK)
-                       return retval;
-               retval = cortex_a8_dap_write_memap_register_u32(target, armv7a->debug_base
-                               + CPUDBG_BCR_BASE + 4 * brp_list[brp_2].BRPn,
-                               brp_list[brp_2].control);
-               if (retval != ERROR_OK )
-                       return retval;
+               | (brp_2 << 16)
+               | (0 << 14)
+               | (CTX_byte_addr_select << 5)
+               | (3 << 1) | 1;
+       brp_list[brp_1].used = 1;
+       brp_list[brp_1].value = (breakpoint->asid);
+       brp_list[brp_1].control = control_CTX;
+       retval = cortex_a_dap_write_memap_register_u32(target, armv7a->debug_base
+                       + CPUDBG_BVR_BASE + 4 * brp_list[brp_1].BRPn,
+                       brp_list[brp_1].value);
+       if (retval != ERROR_OK)
+               return retval;
+       retval = cortex_a_dap_write_memap_register_u32(target, armv7a->debug_base
+                       + CPUDBG_BCR_BASE + 4 * brp_list[brp_1].BRPn,
+                       brp_list[brp_1].control);
+       if (retval != ERROR_OK)
+               return retval;
 
-       return ERROR_OK;
-}
+       control_IVA = ((IVA_machmode & 0x7) << 20)
+               | (brp_1 << 16)
+               | (IVA_byte_addr_select << 5)
+               | (3 << 1) | 1;
+       brp_list[brp_2].used = 1;
+       brp_list[brp_2].value = (breakpoint->address & 0xFFFFFFFC);
+       brp_list[brp_2].control = control_IVA;
+       retval = cortex_a_dap_write_memap_register_u32(target, armv7a->debug_base
+                       + CPUDBG_BVR_BASE + 4 * brp_list[brp_2].BRPn,
+                       brp_list[brp_2].value);
+       if (retval != ERROR_OK)
+               return retval;
+       retval = cortex_a_dap_write_memap_register_u32(target, armv7a->debug_base
+                       + CPUDBG_BCR_BASE + 4 * brp_list[brp_2].BRPn,
+                       brp_list[brp_2].control);
+       if (retval != ERROR_OK)
+               return retval;
 
+       return ERROR_OK;
+}
 
-static int cortex_a8_unset_breakpoint(struct target *target, struct breakpoint *breakpoint)
+static int cortex_a_unset_breakpoint(struct target *target, struct breakpoint *breakpoint)
 {
        int retval;
-       struct cortex_a8_common *cortex_a8 = target_to_cortex_a8(target);
-       struct armv7a_common *armv7a = &cortex_a8->armv7a_common;
-       struct cortex_a8_brp * brp_list = cortex_a8->brp_list;
+       struct cortex_a_common *cortex_a = target_to_cortex_a(target);
+       struct armv7a_common *armv7a = &cortex_a->armv7a_common;
+       struct cortex_a_brp *brp_list = cortex_a->brp_list;
 
-       if (!breakpoint->set)
-       {
+       if (!breakpoint->set) {
                LOG_WARNING("breakpoint not set");
                return ERROR_OK;
        }
 
-       if (breakpoint->type == BKPT_HARD)
-       {
-               if ((breakpoint->address != 0) && (breakpoint->asid != 0))
-               {
+       if (breakpoint->type == BKPT_HARD) {
+               if ((breakpoint->address != 0) && (breakpoint->asid != 0)) {
                        int brp_i = breakpoint->set - 1;
                        int brp_j = breakpoint->linked_BRP;
-                       if ((brp_i < 0) || (brp_i >= cortex_a8->brp_num))
-                       {
+                       if ((brp_i < 0) || (brp_i >= cortex_a->brp_num)) {
                                LOG_DEBUG("Invalid BRP number in breakpoint");
                                return ERROR_OK;
                        }
                        LOG_DEBUG("rbp %i control 0x%0" PRIx32 " value 0x%0" PRIx32, brp_i,
-                                       brp_list[brp_i].control, brp_list[brp_i].value);
+                               brp_list[brp_i].control, brp_list[brp_i].value);
                        brp_list[brp_i].used = 0;
                        brp_list[brp_i].value = 0;
                        brp_list[brp_i].control = 0;
-                       retval = cortex_a8_dap_write_memap_register_u32(target, armv7a->debug_base
+                       retval = cortex_a_dap_write_memap_register_u32(target, armv7a->debug_base
                                        + CPUDBG_BCR_BASE + 4 * brp_list[brp_i].BRPn,
                                        brp_list[brp_i].control);
                        if (retval != ERROR_OK)
                                return retval;
-                       retval = cortex_a8_dap_write_memap_register_u32(target, armv7a->debug_base
+                       retval = cortex_a_dap_write_memap_register_u32(target, armv7a->debug_base
                                        + CPUDBG_BVR_BASE + 4 * brp_list[brp_i].BRPn,
                                        brp_list[brp_i].value);
                        if (retval != ERROR_OK)
-                               return retval;                                          
-                       if ((brp_j < 0) || (brp_j >= cortex_a8->brp_num))
-                       {
+                               return retval;
+                       if ((brp_j < 0) || (brp_j >= cortex_a->brp_num)) {
                                LOG_DEBUG("Invalid BRP number in breakpoint");
                                return ERROR_OK;
                        }
                        LOG_DEBUG("rbp %i control 0x%0" PRIx32 " value 0x%0" PRIx32, brp_j,
-                                       brp_list[brp_j].control, brp_list[brp_j].value);
+                               brp_list[brp_j].control, brp_list[brp_j].value);
                        brp_list[brp_j].used = 0;
                        brp_list[brp_j].value = 0;
                        brp_list[brp_j].control = 0;
-                       retval = cortex_a8_dap_write_memap_register_u32(target, armv7a->debug_base
+                       retval = cortex_a_dap_write_memap_register_u32(target, armv7a->debug_base
                                        + CPUDBG_BCR_BASE + 4 * brp_list[brp_j].BRPn,
                                        brp_list[brp_j].control);
                        if (retval != ERROR_OK)
                                return retval;
-                       retval = cortex_a8_dap_write_memap_register_u32(target, armv7a->debug_base
+                       retval = cortex_a_dap_write_memap_register_u32(target, armv7a->debug_base
                                        + CPUDBG_BVR_BASE + 4 * brp_list[brp_j].BRPn,
                                        brp_list[brp_j].value);
                        if (retval != ERROR_OK)
@@ -1708,143 +1772,141 @@ static int cortex_a8_unset_breakpoint(struct target *target, struct breakpoint *
                        breakpoint->linked_BRP = 0;
                        breakpoint->set = 0;
                        return ERROR_OK;
-                       
-               }
-               else
-               {
+
+               } else {
                        int brp_i = breakpoint->set - 1;
-                       if ((brp_i < 0) || (brp_i >= cortex_a8->brp_num))
-                       {
+                       if ((brp_i < 0) || (brp_i >= cortex_a->brp_num)) {
                                LOG_DEBUG("Invalid BRP number in breakpoint");
                                return ERROR_OK;
                        }
                        LOG_DEBUG("rbp %i control 0x%0" PRIx32 " value 0x%0" PRIx32, brp_i,
-                                       brp_list[brp_i].control, brp_list[brp_i].value);
+                               brp_list[brp_i].control, brp_list[brp_i].value);
                        brp_list[brp_i].used = 0;
                        brp_list[brp_i].value = 0;
                        brp_list[brp_i].control = 0;
-                       retval = cortex_a8_dap_write_memap_register_u32(target, armv7a->debug_base
+                       retval = cortex_a_dap_write_memap_register_u32(target, armv7a->debug_base
                                        + CPUDBG_BCR_BASE + 4 * brp_list[brp_i].BRPn,
                                        brp_list[brp_i].control);
                        if (retval != ERROR_OK)
                                return retval;
-                       retval = cortex_a8_dap_write_memap_register_u32(target, armv7a->debug_base
+                       retval = cortex_a_dap_write_memap_register_u32(target, armv7a->debug_base
                                        + CPUDBG_BVR_BASE + 4 * brp_list[brp_i].BRPn,
                                        brp_list[brp_i].value);
                        if (retval != ERROR_OK)
                                return retval;
                        breakpoint->set = 0;
                        return ERROR_OK;
-               }                                       
-       }
-       else
-       {
+               }
+       } else {
+
+               /* make sure data cache is cleaned & invalidated down to PoC */
+               if (!armv7a->armv7a_mmu.armv7a_cache.auto_cache_enabled) {
+                       armv7a_cache_flush_virt(target, breakpoint->address,
+                                               breakpoint->length);
+               }
+
                /* restore original instruction (kept in target endianness) */
-               if (breakpoint->length == 4)
-               {
-                       retval = target->type->write_memory(target,
+               if (breakpoint->length == 4) {
+                       retval = target_write_memory(target,
                                        breakpoint->address & 0xFFFFFFFE,
                                        4, 1, breakpoint->orig_instr);
                        if (retval != ERROR_OK)
                                return retval;
-               }
-               else
-               {
-                       retval = target->type->write_memory(target,
+               } else {
+                       retval = target_write_memory(target,
                                        breakpoint->address & 0xFFFFFFFE,
                                        2, 1, breakpoint->orig_instr);
                        if (retval != ERROR_OK)
                                return retval;
                }
+
+               /* update i-cache at breakpoint location */
+               armv7a_l1_d_cache_inval_virt(target, breakpoint->address,
+                                                breakpoint->length);
+               armv7a_l1_i_cache_inval_virt(target, breakpoint->address,
+                                                breakpoint->length);
        }
        breakpoint->set = 0;
 
        return ERROR_OK;
 }
 
-static int cortex_a8_add_breakpoint(struct target *target,
-               struct breakpoint *breakpoint)
+static int cortex_a_add_breakpoint(struct target *target,
+       struct breakpoint *breakpoint)
 {
-       struct cortex_a8_common *cortex_a8 = target_to_cortex_a8(target);
+       struct cortex_a_common *cortex_a = target_to_cortex_a(target);
 
-       if ((breakpoint->type == BKPT_HARD) && (cortex_a8->brp_num_available < 1))
-       {
+       if ((breakpoint->type == BKPT_HARD) && (cortex_a->brp_num_available < 1)) {
                LOG_INFO("no hardware breakpoint available");
                return ERROR_TARGET_RESOURCE_NOT_AVAILABLE;
        }
 
        if (breakpoint->type == BKPT_HARD)
-               cortex_a8->brp_num_available--;
+               cortex_a->brp_num_available--;
 
-       return cortex_a8_set_breakpoint(target, breakpoint, 0x00); /* Exact match */
+       return cortex_a_set_breakpoint(target, breakpoint, 0x00);       /* Exact match */
 }
 
-static int cortex_a8_add_context_breakpoint(struct target *target,
-               struct breakpoint *breakpoint)
+static int cortex_a_add_context_breakpoint(struct target *target,
+       struct breakpoint *breakpoint)
 {
-       struct cortex_a8_common *cortex_a8 = target_to_cortex_a8(target);
+       struct cortex_a_common *cortex_a = target_to_cortex_a(target);
 
-       if ((breakpoint->type == BKPT_HARD) && (cortex_a8->brp_num_available < 1))
-       {
+       if ((breakpoint->type == BKPT_HARD) && (cortex_a->brp_num_available < 1)) {
                LOG_INFO("no hardware breakpoint available");
                return ERROR_TARGET_RESOURCE_NOT_AVAILABLE;
        }
 
        if (breakpoint->type == BKPT_HARD)
-               cortex_a8->brp_num_available--;
+               cortex_a->brp_num_available--;
 
-       return cortex_a8_set_context_breakpoint(target, breakpoint, 0x02); /* asid match */
+       return cortex_a_set_context_breakpoint(target, breakpoint, 0x02);       /* asid match */
 }
 
-static int cortex_a8_add_hybrid_breakpoint(struct target *target,
-               struct breakpoint *breakpoint)
+static int cortex_a_add_hybrid_breakpoint(struct target *target,
+       struct breakpoint *breakpoint)
 {
-       struct cortex_a8_common *cortex_a8 = target_to_cortex_a8(target);
+       struct cortex_a_common *cortex_a = target_to_cortex_a(target);
 
-       if ((breakpoint->type == BKPT_HARD) && (cortex_a8->brp_num_available < 1))
-       {
+       if ((breakpoint->type == BKPT_HARD) && (cortex_a->brp_num_available < 1)) {
                LOG_INFO("no hardware breakpoint available");
                return ERROR_TARGET_RESOURCE_NOT_AVAILABLE;
        }
 
        if (breakpoint->type == BKPT_HARD)
-               cortex_a8->brp_num_available--;
+               cortex_a->brp_num_available--;
 
-       return cortex_a8_set_hybrid_breakpoint(target, breakpoint); /* ??? */
+       return cortex_a_set_hybrid_breakpoint(target, breakpoint);      /* ??? */
 }
 
 
-static int cortex_a8_remove_breakpoint(struct target *target, struct breakpoint *breakpoint)
+static int cortex_a_remove_breakpoint(struct target *target, struct breakpoint *breakpoint)
 {
-       struct cortex_a8_common *cortex_a8 = target_to_cortex_a8(target);
+       struct cortex_a_common *cortex_a = target_to_cortex_a(target);
 
 #if 0
 /* It is perfectly possible to remove breakpoints while the target is running */
-       if (target->state != TARGET_HALTED)
-       {
+       if (target->state != TARGET_HALTED) {
                LOG_WARNING("target not halted");
                return ERROR_TARGET_NOT_HALTED;
        }
 #endif
 
-       if (breakpoint->set)
-       {
-               cortex_a8_unset_breakpoint(target, breakpoint);
+       if (breakpoint->set) {
+               cortex_a_unset_breakpoint(target, breakpoint);
                if (breakpoint->type == BKPT_HARD)
-                       cortex_a8->brp_num_available++ ;
+                       cortex_a->brp_num_available++;
        }
 
 
        return ERROR_OK;
 }
 
-
-
 /*
- * Cortex-A8 Reset functions
+ * Cortex-A Reset functions
  */
 
-static int cortex_a8_assert_reset(struct target *target)
+static int cortex_a_assert_reset(struct target *target)
 {
        struct armv7a_common *armv7a = target_to_armv7a(target);
 
@@ -1852,28 +1914,39 @@ static int cortex_a8_assert_reset(struct target *target)
 
        /* FIXME when halt is requested, make it work somehow... */
 
+       /* This function can be called in "target not examined" state */
+
        /* Issue some kind of warm reset. */
-       if (target_has_event_action(target, TARGET_EVENT_RESET_ASSERT)) {
+       if (target_has_event_action(target, TARGET_EVENT_RESET_ASSERT))
                target_handle_event(target, TARGET_EVENT_RESET_ASSERT);
-       else if (jtag_get_reset_config() & RESET_HAS_SRST) {
+       else if (jtag_get_reset_config() & RESET_HAS_SRST) {
                /* REVISIT handle "pulls" cases, if there's
                 * hardware that needs them to work.
                 */
-               jtag_add_reset(0, 1);
+
+               /*
+                * FIXME: fix reset when transport is SWD. This is a temporary
+                * work-around for release v0.10 that is not intended to stay!
+                */
+               if (transport_is_swd() ||
+                               (target->reset_halt && (jtag_get_reset_config() & RESET_SRST_NO_GATING)))
+                       jtag_add_reset(0, 1);
+
        } else {
                LOG_ERROR("%s: how to reset?", target_name(target));
                return ERROR_FAIL;
        }
 
        /* registers are now invalid */
-       register_cache_invalidate(armv7a->armv4_5_common.core_cache);
+       if (target_was_examined(target))
+               register_cache_invalidate(armv7a->arm.core_cache);
 
        target->state = TARGET_RESET;
 
        return ERROR_OK;
 }
 
-static int cortex_a8_deassert_reset(struct target *target)
+static int cortex_a_deassert_reset(struct target *target)
 {
        int retval;
 
@@ -1882,409 +1955,989 @@ static int cortex_a8_deassert_reset(struct target *target)
        /* be certain SRST is off */
        jtag_add_reset(0, 0);
 
-       retval = cortex_a8_poll(target);
-       if (retval != ERROR_OK)
-               return retval;
+       if (target_was_examined(target)) {
+               retval = cortex_a_poll(target);
+               if (retval != ERROR_OK)
+                       return retval;
+       }
 
        if (target->reset_halt) {
                if (target->state != TARGET_HALTED) {
                        LOG_WARNING("%s: ran after reset and before halt ...",
-                                       target_name(target));
-                       if ((retval = target_halt(target)) != ERROR_OK)
-                               return retval;
+                               target_name(target));
+                       if (target_was_examined(target)) {
+                               retval = target_halt(target);
+                               if (retval != ERROR_OK)
+                                       return retval;
+                       } else
+                               target->state = TARGET_UNKNOWN;
                }
        }
 
        return ERROR_OK;
 }
 
+static int cortex_a_set_dcc_mode(struct target *target, uint32_t mode, uint32_t *dscr)
+{
+       /* Changes the mode of the DCC between non-blocking, stall, and fast mode.
+        * New desired mode must be in mode. Current value of DSCR must be in
+        * *dscr, which is updated with new value.
+        *
+        * This function elides actually sending the mode-change over the debug
+        * interface if the mode is already set as desired.
+        */
+       uint32_t new_dscr = (*dscr & ~DSCR_EXT_DCC_MASK) | mode;
+       if (new_dscr != *dscr) {
+               struct armv7a_common *armv7a = target_to_armv7a(target);
+               int retval = mem_ap_write_atomic_u32(armv7a->debug_ap,
+                               armv7a->debug_base + CPUDBG_DSCR, new_dscr);
+               if (retval == ERROR_OK)
+                       *dscr = new_dscr;
+               return retval;
+       } else {
+               return ERROR_OK;
+       }
+}
 
-static int cortex_a8_write_apb_ab_memory(struct target *target,
-                uint32_t address, uint32_t size,
-                uint32_t count, const uint8_t *buffer)
+static int cortex_a_wait_dscr_bits(struct target *target, uint32_t mask,
+       uint32_t value, uint32_t *dscr)
 {
+       /* Waits until the specified bit(s) of DSCR take on a specified value. */
+       struct armv7a_common *armv7a = target_to_armv7a(target);
+       int64_t then = timeval_ms();
+       int retval;
 
-       /* write memory through APB-AP */
+       while ((*dscr & mask) != value) {
+               retval = mem_ap_read_atomic_u32(armv7a->debug_ap,
+                               armv7a->debug_base + CPUDBG_DSCR, dscr);
+               if (retval != ERROR_OK)
+                       return retval;
+               if (timeval_ms() > then + 1000) {
+                       LOG_ERROR("timeout waiting for DSCR bit change");
+                       return ERROR_FAIL;
+               }
+       }
+       return ERROR_OK;
+}
 
-       int retval = ERROR_INVALID_ARGUMENTS;
+static int cortex_a_read_copro(struct target *target, uint32_t opcode,
+       uint32_t *data, uint32_t *dscr)
+{
+       int retval;
        struct armv7a_common *armv7a = target_to_armv7a(target);
-       struct arm *armv4_5 = &armv7a->armv4_5_common;
-       int total_bytes = count * size;
-       int start_byte, nbytes_to_write, i;
-       struct reg *reg;
-       union _data {
-               uint8_t uc_a[4];
-               uint32_t ui;
-       } data;
 
-       if (target->state != TARGET_HALTED)
-       {
+       /* Move from coprocessor to R0. */
+       retval = cortex_a_exec_opcode(target, opcode, dscr);
+       if (retval != ERROR_OK)
+               return retval;
+
+       /* Move from R0 to DTRTX. */
+       retval = cortex_a_exec_opcode(target, ARMV4_5_MCR(14, 0, 0, 0, 5, 0), dscr);
+       if (retval != ERROR_OK)
+               return retval;
+
+       /* Wait until DTRTX is full (according to ARMv7-A/-R architecture
+        * manual section C8.4.3, checking InstrCmpl_l is not sufficient; one
+        * must also check TXfull_l). Most of the time this will be free
+        * because TXfull_l will be set immediately and cached in dscr. */
+       retval = cortex_a_wait_dscr_bits(target, DSCR_DTRTX_FULL_LATCHED,
+                       DSCR_DTRTX_FULL_LATCHED, dscr);
+       if (retval != ERROR_OK)
+               return retval;
+
+       /* Read the value transferred to DTRTX. */
+       retval = mem_ap_read_atomic_u32(armv7a->debug_ap,
+                       armv7a->debug_base + CPUDBG_DTRTX, data);
+       if (retval != ERROR_OK)
+               return retval;
+
+       return ERROR_OK;
+}
+
+static int cortex_a_read_dfar_dfsr(struct target *target, uint32_t *dfar,
+       uint32_t *dfsr, uint32_t *dscr)
+{
+       int retval;
+
+       if (dfar) {
+               retval = cortex_a_read_copro(target, ARMV4_5_MRC(15, 0, 0, 6, 0, 0), dfar, dscr);
+               if (retval != ERROR_OK)
+                       return retval;
+       }
+
+       if (dfsr) {
+               retval = cortex_a_read_copro(target, ARMV4_5_MRC(15, 0, 0, 5, 0, 0), dfsr, dscr);
+               if (retval != ERROR_OK)
+                       return retval;
+       }
+
+       return ERROR_OK;
+}
+
+static int cortex_a_write_copro(struct target *target, uint32_t opcode,
+       uint32_t data, uint32_t *dscr)
+{
+       int retval;
+       struct armv7a_common *armv7a = target_to_armv7a(target);
+
+       /* Write the value into DTRRX. */
+       retval = mem_ap_write_atomic_u32(armv7a->debug_ap,
+                       armv7a->debug_base + CPUDBG_DTRRX, data);
+       if (retval != ERROR_OK)
+               return retval;
+
+       /* Move from DTRRX to R0. */
+       retval = cortex_a_exec_opcode(target, ARMV4_5_MRC(14, 0, 0, 0, 5, 0), dscr);
+       if (retval != ERROR_OK)
+               return retval;
+
+       /* Move from R0 to coprocessor. */
+       retval = cortex_a_exec_opcode(target, opcode, dscr);
+       if (retval != ERROR_OK)
+               return retval;
+
+       /* Wait until DTRRX is empty (according to ARMv7-A/-R architecture manual
+        * section C8.4.3, checking InstrCmpl_l is not sufficient; one must also
+        * check RXfull_l). Most of the time this will be free because RXfull_l
+        * will be cleared immediately and cached in dscr. */
+       retval = cortex_a_wait_dscr_bits(target, DSCR_DTRRX_FULL_LATCHED, 0, dscr);
+       if (retval != ERROR_OK)
+               return retval;
+
+       return ERROR_OK;
+}
+
+static int cortex_a_write_dfar_dfsr(struct target *target, uint32_t dfar,
+       uint32_t dfsr, uint32_t *dscr)
+{
+       int retval;
+
+       retval = cortex_a_write_copro(target, ARMV4_5_MCR(15, 0, 0, 6, 0, 0), dfar, dscr);
+       if (retval != ERROR_OK)
+               return retval;
+
+       retval = cortex_a_write_copro(target, ARMV4_5_MCR(15, 0, 0, 5, 0, 0), dfsr, dscr);
+       if (retval != ERROR_OK)
+               return retval;
+
+       return ERROR_OK;
+}
+
+static int cortex_a_dfsr_to_error_code(uint32_t dfsr)
+{
+       uint32_t status, upper4;
+
+       if (dfsr & (1 << 9)) {
+               /* LPAE format. */
+               status = dfsr & 0x3f;
+               upper4 = status >> 2;
+               if (upper4 == 1 || upper4 == 2 || upper4 == 3 || upper4 == 15)
+                       return ERROR_TARGET_TRANSLATION_FAULT;
+               else if (status == 33)
+                       return ERROR_TARGET_UNALIGNED_ACCESS;
+               else
+                       return ERROR_TARGET_DATA_ABORT;
+       } else {
+               /* Normal format. */
+               status = ((dfsr >> 6) & 0x10) | (dfsr & 0xf);
+               if (status == 1)
+                       return ERROR_TARGET_UNALIGNED_ACCESS;
+               else if (status == 5 || status == 7 || status == 3 || status == 6 ||
+                               status == 9 || status == 11 || status == 13 || status == 15)
+                       return ERROR_TARGET_TRANSLATION_FAULT;
+               else
+                       return ERROR_TARGET_DATA_ABORT;
+       }
+}
+
+static int cortex_a_write_cpu_memory_slow(struct target *target,
+       uint32_t size, uint32_t count, const uint8_t *buffer, uint32_t *dscr)
+{
+       /* Writes count objects of size size from *buffer. Old value of DSCR must
+        * be in *dscr; updated to new value. This is slow because it works for
+        * non-word-sized objects and (maybe) unaligned accesses. If size == 4 and
+        * the address is aligned, cortex_a_write_cpu_memory_fast should be
+        * preferred.
+        * Preconditions:
+        * - Address is in R0.
+        * - R0 is marked dirty.
+        */
+       struct armv7a_common *armv7a = target_to_armv7a(target);
+       struct arm *arm = &armv7a->arm;
+       int retval;
+
+       /* Mark register R1 as dirty, to use for transferring data. */
+       arm_reg_current(arm, 1)->dirty = true;
+
+       /* Switch to non-blocking mode if not already in that mode. */
+       retval = cortex_a_set_dcc_mode(target, DSCR_EXT_DCC_NON_BLOCKING, dscr);
+       if (retval != ERROR_OK)
+               return retval;
+
+       /* Go through the objects. */
+       while (count) {
+               /* Write the value to store into DTRRX. */
+               uint32_t data, opcode;
+               if (size == 1)
+                       data = *buffer;
+               else if (size == 2)
+                       data = target_buffer_get_u16(target, buffer);
+               else
+                       data = target_buffer_get_u32(target, buffer);
+               retval = mem_ap_write_atomic_u32(armv7a->debug_ap,
+                               armv7a->debug_base + CPUDBG_DTRRX, data);
+               if (retval != ERROR_OK)
+                       return retval;
+
+               /* Transfer the value from DTRRX to R1. */
+               retval = cortex_a_exec_opcode(target, ARMV4_5_MRC(14, 0, 1, 0, 5, 0), dscr);
+               if (retval != ERROR_OK)
+                       return retval;
+
+               /* Write the value transferred to R1 into memory. */
+               if (size == 1)
+                       opcode = ARMV4_5_STRB_IP(1, 0);
+               else if (size == 2)
+                       opcode = ARMV4_5_STRH_IP(1, 0);
+               else
+                       opcode = ARMV4_5_STRW_IP(1, 0);
+               retval = cortex_a_exec_opcode(target, opcode, dscr);
+               if (retval != ERROR_OK)
+                       return retval;
+
+               /* Check for faults and return early. */
+               if (*dscr & (DSCR_STICKY_ABORT_PRECISE | DSCR_STICKY_ABORT_IMPRECISE))
+                       return ERROR_OK; /* A data fault is not considered a system failure. */
+
+               /* Wait until DTRRX is empty (according to ARMv7-A/-R architecture
+                * manual section C8.4.3, checking InstrCmpl_l is not sufficient; one
+                * must also check RXfull_l). Most of the time this will be free
+                * because RXfull_l will be cleared immediately and cached in dscr. */
+               retval = cortex_a_wait_dscr_bits(target, DSCR_DTRRX_FULL_LATCHED, 0, dscr);
+               if (retval != ERROR_OK)
+                       return retval;
+
+               /* Advance. */
+               buffer += size;
+               --count;
+       }
+
+       return ERROR_OK;
+}
+
+static int cortex_a_write_cpu_memory_fast(struct target *target,
+       uint32_t count, const uint8_t *buffer, uint32_t *dscr)
+{
+       /* Writes count objects of size 4 from *buffer. Old value of DSCR must be
+        * in *dscr; updated to new value. This is fast but only works for
+        * word-sized objects at aligned addresses.
+        * Preconditions:
+        * - Address is in R0 and must be a multiple of 4.
+        * - R0 is marked dirty.
+        */
+       struct armv7a_common *armv7a = target_to_armv7a(target);
+       int retval;
+
+       /* Switch to fast mode if not already in that mode. */
+       retval = cortex_a_set_dcc_mode(target, DSCR_EXT_DCC_FAST_MODE, dscr);
+       if (retval != ERROR_OK)
+               return retval;
+
+       /* Latch STC instruction. */
+       retval = mem_ap_write_atomic_u32(armv7a->debug_ap,
+                       armv7a->debug_base + CPUDBG_ITR, ARMV4_5_STC(0, 1, 0, 1, 14, 5, 0, 4));
+       if (retval != ERROR_OK)
+               return retval;
+
+       /* Transfer all the data and issue all the instructions. */
+       return mem_ap_write_buf_noincr(armv7a->debug_ap, buffer,
+                       4, count, armv7a->debug_base + CPUDBG_DTRRX);
+}
+
+static int cortex_a_write_cpu_memory(struct target *target,
+       uint32_t address, uint32_t size,
+       uint32_t count, const uint8_t *buffer)
+{
+       /* Write memory through the CPU. */
+       int retval, final_retval;
+       struct armv7a_common *armv7a = target_to_armv7a(target);
+       struct arm *arm = &armv7a->arm;
+       uint32_t dscr, orig_dfar, orig_dfsr, fault_dscr, fault_dfar, fault_dfsr;
+
+       LOG_DEBUG("Writing CPU memory address 0x%" PRIx32 " size %"  PRIu32 " count %"  PRIu32,
+                         address, size, count);
+       if (target->state != TARGET_HALTED) {
                LOG_WARNING("target not halted");
                return ERROR_TARGET_NOT_HALTED;
        }
 
-       reg = arm_reg_current(armv4_5, 0);
-       reg->dirty = 1;
-       reg = arm_reg_current(armv4_5, 1);
-       reg->dirty = 1;
+       if (!count)
+               return ERROR_OK;
 
-       retval = cortex_a8_dap_write_coreregister_u32(target, address & 0xFFFFFFFC, 0);
+       /* Clear any abort. */
+       retval = mem_ap_write_atomic_u32(armv7a->debug_ap,
+                       armv7a->debug_base + CPUDBG_DRCR, DRCR_CLEAR_EXCEPTIONS);
+       if (retval != ERROR_OK)
+               return retval;
+
+       /* Read DSCR. */
+       retval = mem_ap_read_atomic_u32(armv7a->debug_ap,
+                       armv7a->debug_base + CPUDBG_DSCR, &dscr);
        if (retval != ERROR_OK)
                return retval;
 
-       start_byte = address & 0x3;
+       /* Switch to non-blocking mode if not already in that mode. */
+       retval = cortex_a_set_dcc_mode(target, DSCR_EXT_DCC_NON_BLOCKING, &dscr);
+       if (retval != ERROR_OK)
+               goto out;
 
-       while (total_bytes > 0) {
+       /* Mark R0 as dirty. */
+       arm_reg_current(arm, 0)->dirty = true;
 
-               nbytes_to_write = 4 - start_byte;
-               if (total_bytes < nbytes_to_write)
-                       nbytes_to_write = total_bytes; 
-                       
-               if ( nbytes_to_write != 4 ) {
-               
-                       /* execute instruction LDR r1, [r0] */
-                       retval = cortex_a8_exec_opcode(target,  ARMV4_5_LDR(1, 0), NULL);
-                       if (retval != ERROR_OK)
-                               return retval;
+       /* Read DFAR and DFSR, as they will be modified in the event of a fault. */
+       retval = cortex_a_read_dfar_dfsr(target, &orig_dfar, &orig_dfsr, &dscr);
+       if (retval != ERROR_OK)
+               goto out;
 
-                       retval = cortex_a8_dap_read_coreregister_u32(target, &data.ui, 1);
-                       if (retval != ERROR_OK)
-                               return retval;
-               }
-       
-               for (i = 0; i < nbytes_to_write; ++i)
-                       data.uc_a[i + start_byte] = *buffer++;
+       /* Get the memory address into R0. */
+       retval = mem_ap_write_atomic_u32(armv7a->debug_ap,
+                       armv7a->debug_base + CPUDBG_DTRRX, address);
+       if (retval != ERROR_OK)
+               goto out;
+       retval = cortex_a_exec_opcode(target, ARMV4_5_MRC(14, 0, 0, 0, 5, 0), &dscr);
+       if (retval != ERROR_OK)
+               goto out;
 
-               retval = cortex_a8_dap_write_coreregister_u32(target, data.ui, 1);
+       if (size == 4 && (address % 4) == 0) {
+               /* We are doing a word-aligned transfer, so use fast mode. */
+               retval = cortex_a_write_cpu_memory_fast(target, count, buffer, &dscr);
+       } else {
+               /* Use slow path. */
+               retval = cortex_a_write_cpu_memory_slow(target, size, count, buffer, &dscr);
+       }
+
+out:
+       final_retval = retval;
+
+       /* Switch to non-blocking mode if not already in that mode. */
+       retval = cortex_a_set_dcc_mode(target, DSCR_EXT_DCC_NON_BLOCKING, &dscr);
+       if (final_retval == ERROR_OK)
+               final_retval = retval;
+
+       /* Wait for last issued instruction to complete. */
+       retval = cortex_a_wait_instrcmpl(target, &dscr, true);
+       if (final_retval == ERROR_OK)
+               final_retval = retval;
+
+       /* Wait until DTRRX is empty (according to ARMv7-A/-R architecture manual
+        * section C8.4.3, checking InstrCmpl_l is not sufficient; one must also
+        * check RXfull_l). Most of the time this will be free because RXfull_l
+        * will be cleared immediately and cached in dscr. However, don't do this
+        * if there is fault, because then the instruction might not have completed
+        * successfully. */
+       if (!(dscr & DSCR_STICKY_ABORT_PRECISE)) {
+               retval = cortex_a_wait_dscr_bits(target, DSCR_DTRRX_FULL_LATCHED, 0, &dscr);
                if (retval != ERROR_OK)
                        return retval;
+       }
 
-               /* execute instruction STRW r1, [r0], 1 (0xe4801004) */
-               retval = cortex_a8_exec_opcode(target, ARMV4_5_STRW_IP(1, 0) , NULL);
+       /* If there were any sticky abort flags, clear them. */
+       if (dscr & (DSCR_STICKY_ABORT_PRECISE | DSCR_STICKY_ABORT_IMPRECISE)) {
+               fault_dscr = dscr;
+               mem_ap_write_atomic_u32(armv7a->debug_ap,
+                               armv7a->debug_base + CPUDBG_DRCR, DRCR_CLEAR_EXCEPTIONS);
+               dscr &= ~(DSCR_STICKY_ABORT_PRECISE | DSCR_STICKY_ABORT_IMPRECISE);
+       } else {
+               fault_dscr = 0;
+       }
+
+       /* Handle synchronous data faults. */
+       if (fault_dscr & DSCR_STICKY_ABORT_PRECISE) {
+               if (final_retval == ERROR_OK) {
+                       /* Final return value will reflect cause of fault. */
+                       retval = cortex_a_read_dfar_dfsr(target, &fault_dfar, &fault_dfsr, &dscr);
+                       if (retval == ERROR_OK) {
+                               LOG_ERROR("data abort at 0x%08" PRIx32 ", dfsr = 0x%08" PRIx32, fault_dfar, fault_dfsr);
+                               final_retval = cortex_a_dfsr_to_error_code(fault_dfsr);
+                       } else
+                               final_retval = retval;
+               }
+               /* Fault destroyed DFAR/DFSR; restore them. */
+               retval = cortex_a_write_dfar_dfsr(target, orig_dfar, orig_dfsr, &dscr);
                if (retval != ERROR_OK)
-                               return retval;
+                       LOG_ERROR("error restoring dfar/dfsr - dscr = 0x%08" PRIx32, dscr);
+       }
 
-               total_bytes -= nbytes_to_write;
-               start_byte = 0;
+       /* Handle asynchronous data faults. */
+       if (fault_dscr & DSCR_STICKY_ABORT_IMPRECISE) {
+               if (final_retval == ERROR_OK)
+                       /* No other error has been recorded so far, so keep this one. */
+                       final_retval = ERROR_TARGET_DATA_ABORT;
        }
 
-       return retval;
+       /* If the DCC is nonempty, clear it. */
+       if (dscr & DSCR_DTRTX_FULL_LATCHED) {
+               uint32_t dummy;
+               retval = mem_ap_read_atomic_u32(armv7a->debug_ap,
+                               armv7a->debug_base + CPUDBG_DTRTX, &dummy);
+               if (final_retval == ERROR_OK)
+                       final_retval = retval;
+       }
+       if (dscr & DSCR_DTRRX_FULL_LATCHED) {
+               retval = cortex_a_exec_opcode(target, ARMV4_5_MRC(14, 0, 1, 0, 5, 0), &dscr);
+               if (final_retval == ERROR_OK)
+                       final_retval = retval;
+       }
+
+       /* Done. */
+       return final_retval;
 }
 
+static int cortex_a_read_cpu_memory_slow(struct target *target,
+       uint32_t size, uint32_t count, uint8_t *buffer, uint32_t *dscr)
+{
+       /* Reads count objects of size size into *buffer. Old value of DSCR must be
+        * in *dscr; updated to new value. This is slow because it works for
+        * non-word-sized objects and (maybe) unaligned accesses. If size == 4 and
+        * the address is aligned, cortex_a_read_cpu_memory_fast should be
+        * preferred.
+        * Preconditions:
+        * - Address is in R0.
+        * - R0 is marked dirty.
+        */
+       struct armv7a_common *armv7a = target_to_armv7a(target);
+       struct arm *arm = &armv7a->arm;
+       int retval;
+
+       /* Mark register R1 as dirty, to use for transferring data. */
+       arm_reg_current(arm, 1)->dirty = true;
+
+       /* Switch to non-blocking mode if not already in that mode. */
+       retval = cortex_a_set_dcc_mode(target, DSCR_EXT_DCC_NON_BLOCKING, dscr);
+       if (retval != ERROR_OK)
+               return retval;
+
+       /* Go through the objects. */
+       while (count) {
+               /* Issue a load of the appropriate size to R1. */
+               uint32_t opcode, data;
+               if (size == 1)
+                       opcode = ARMV4_5_LDRB_IP(1, 0);
+               else if (size == 2)
+                       opcode = ARMV4_5_LDRH_IP(1, 0);
+               else
+                       opcode = ARMV4_5_LDRW_IP(1, 0);
+               retval = cortex_a_exec_opcode(target, opcode, dscr);
+               if (retval != ERROR_OK)
+                       return retval;
+
+               /* Issue a write of R1 to DTRTX. */
+               retval = cortex_a_exec_opcode(target, ARMV4_5_MCR(14, 0, 1, 0, 5, 0), dscr);
+               if (retval != ERROR_OK)
+                       return retval;
+
+               /* Check for faults and return early. */
+               if (*dscr & (DSCR_STICKY_ABORT_PRECISE | DSCR_STICKY_ABORT_IMPRECISE))
+                       return ERROR_OK; /* A data fault is not considered a system failure. */
+
+               /* Wait until DTRTX is full (according to ARMv7-A/-R architecture
+                * manual section C8.4.3, checking InstrCmpl_l is not sufficient; one
+                * must also check TXfull_l). Most of the time this will be free
+                * because TXfull_l will be set immediately and cached in dscr. */
+               retval = cortex_a_wait_dscr_bits(target, DSCR_DTRTX_FULL_LATCHED,
+                               DSCR_DTRTX_FULL_LATCHED, dscr);
+               if (retval != ERROR_OK)
+                       return retval;
+
+               /* Read the value transferred to DTRTX into the buffer. */
+               retval = mem_ap_read_atomic_u32(armv7a->debug_ap,
+                               armv7a->debug_base + CPUDBG_DTRTX, &data);
+               if (retval != ERROR_OK)
+                       return retval;
+               if (size == 1)
+                       *buffer = (uint8_t) data;
+               else if (size == 2)
+                       target_buffer_set_u16(target, buffer, (uint16_t) data);
+               else
+                       target_buffer_set_u32(target, buffer, data);
+
+               /* Advance. */
+               buffer += size;
+               --count;
+       }
+
+       return ERROR_OK;
+}
 
-static int cortex_a8_read_apb_ab_memory(struct target *target,
-                uint32_t address, uint32_t size,
-                uint32_t count, uint8_t *buffer)
+static int cortex_a_read_cpu_memory_fast(struct target *target,
+       uint32_t count, uint8_t *buffer, uint32_t *dscr)
 {
+       /* Reads count objects of size 4 into *buffer. Old value of DSCR must be in
+        * *dscr; updated to new value. This is fast but only works for word-sized
+        * objects at aligned addresses.
+        * Preconditions:
+        * - Address is in R0 and must be a multiple of 4.
+        * - R0 is marked dirty.
+        */
+       struct armv7a_common *armv7a = target_to_armv7a(target);
+       uint32_t u32;
+       int retval;
+
+       /* Switch to non-blocking mode if not already in that mode. */
+       retval = cortex_a_set_dcc_mode(target, DSCR_EXT_DCC_NON_BLOCKING, dscr);
+       if (retval != ERROR_OK)
+               return retval;
+
+       /* Issue the LDC instruction via a write to ITR. */
+       retval = cortex_a_exec_opcode(target, ARMV4_5_LDC(0, 1, 0, 1, 14, 5, 0, 4), dscr);
+       if (retval != ERROR_OK)
+               return retval;
+
+       count--;
+
+       if (count > 0) {
+               /* Switch to fast mode if not already in that mode. */
+               retval = cortex_a_set_dcc_mode(target, DSCR_EXT_DCC_FAST_MODE, dscr);
+               if (retval != ERROR_OK)
+                       return retval;
+
+               /* Latch LDC instruction. */
+               retval = mem_ap_write_atomic_u32(armv7a->debug_ap,
+                               armv7a->debug_base + CPUDBG_ITR, ARMV4_5_LDC(0, 1, 0, 1, 14, 5, 0, 4));
+               if (retval != ERROR_OK)
+                       return retval;
 
-       /* read memory through APB-AP */
+               /* Read the value transferred to DTRTX into the buffer. Due to fast
+                * mode rules, this blocks until the instruction finishes executing and
+                * then reissues the read instruction to read the next word from
+                * memory. The last read of DTRTX in this call reads the second-to-last
+                * word from memory and issues the read instruction for the last word.
+                */
+               retval = mem_ap_read_buf_noincr(armv7a->debug_ap, buffer,
+                               4, count, armv7a->debug_base + CPUDBG_DTRTX);
+               if (retval != ERROR_OK)
+                       return retval;
+
+               /* Advance. */
+               buffer += count * 4;
+       }
+
+       /* Wait for last issued instruction to complete. */
+       retval = cortex_a_wait_instrcmpl(target, dscr, false);
+       if (retval != ERROR_OK)
+               return retval;
+
+       /* Switch to non-blocking mode if not already in that mode. */
+       retval = cortex_a_set_dcc_mode(target, DSCR_EXT_DCC_NON_BLOCKING, dscr);
+       if (retval != ERROR_OK)
+               return retval;
+
+       /* Check for faults and return early. */
+       if (*dscr & (DSCR_STICKY_ABORT_PRECISE | DSCR_STICKY_ABORT_IMPRECISE))
+               return ERROR_OK; /* A data fault is not considered a system failure. */
+
+       /* Wait until DTRTX is full (according to ARMv7-A/-R architecture manual
+        * section C8.4.3, checking InstrCmpl_l is not sufficient; one must also
+        * check TXfull_l). Most of the time this will be free because TXfull_l
+        * will be set immediately and cached in dscr. */
+       retval = cortex_a_wait_dscr_bits(target, DSCR_DTRTX_FULL_LATCHED,
+                       DSCR_DTRTX_FULL_LATCHED, dscr);
+       if (retval != ERROR_OK)
+               return retval;
+
+       /* Read the value transferred to DTRTX into the buffer. This is the last
+        * word. */
+       retval = mem_ap_read_atomic_u32(armv7a->debug_ap,
+                       armv7a->debug_base + CPUDBG_DTRTX, &u32);
+       if (retval != ERROR_OK)
+               return retval;
+       target_buffer_set_u32(target, buffer, u32);
+
+       return ERROR_OK;
+}
 
-       int retval = ERROR_INVALID_ARGUMENTS;
+static int cortex_a_read_cpu_memory(struct target *target,
+       uint32_t address, uint32_t size,
+       uint32_t count, uint8_t *buffer)
+{
+       /* Read memory through the CPU. */
+       int retval, final_retval;
        struct armv7a_common *armv7a = target_to_armv7a(target);
-       struct arm *armv4_5 = &armv7a->armv4_5_common;
-       int total_bytes = count * size;
-       int start_byte, nbytes_to_read, i;
-       struct reg *reg;
-       union _data {
-               uint8_t uc_a[4];
-               uint32_t ui;
-       } data;
+       struct arm *arm = &armv7a->arm;
+       uint32_t dscr, orig_dfar, orig_dfsr, fault_dscr, fault_dfar, fault_dfsr;
 
-       if (target->state != TARGET_HALTED)
-       {
+       LOG_DEBUG("Reading CPU memory address 0x%" PRIx32 " size %"  PRIu32 " count %"  PRIu32,
+                         address, size, count);
+       if (target->state != TARGET_HALTED) {
                LOG_WARNING("target not halted");
                return ERROR_TARGET_NOT_HALTED;
        }
 
-       reg = arm_reg_current(armv4_5, 0);
-       reg->dirty = 1;
-       reg = arm_reg_current(armv4_5, 1);
-       reg->dirty = 1;
+       if (!count)
+               return ERROR_OK;
+
+       /* Clear any abort. */
+       retval = mem_ap_write_atomic_u32(armv7a->debug_ap,
+                       armv7a->debug_base + CPUDBG_DRCR, DRCR_CLEAR_EXCEPTIONS);
+       if (retval != ERROR_OK)
+               return retval;
+
+       /* Read DSCR */
+       retval = mem_ap_read_atomic_u32(armv7a->debug_ap,
+                       armv7a->debug_base + CPUDBG_DSCR, &dscr);
+       if (retval != ERROR_OK)
+               return retval;
+
+       /* Switch to non-blocking mode if not already in that mode. */
+       retval = cortex_a_set_dcc_mode(target, DSCR_EXT_DCC_NON_BLOCKING, &dscr);
+       if (retval != ERROR_OK)
+               goto out;
+
+       /* Mark R0 as dirty. */
+       arm_reg_current(arm, 0)->dirty = true;
+
+       /* Read DFAR and DFSR, as they will be modified in the event of a fault. */
+       retval = cortex_a_read_dfar_dfsr(target, &orig_dfar, &orig_dfsr, &dscr);
+       if (retval != ERROR_OK)
+               goto out;
+
+       /* Get the memory address into R0. */
+       retval = mem_ap_write_atomic_u32(armv7a->debug_ap,
+                       armv7a->debug_base + CPUDBG_DTRRX, address);
+       if (retval != ERROR_OK)
+               goto out;
+       retval = cortex_a_exec_opcode(target, ARMV4_5_MRC(14, 0, 0, 0, 5, 0), &dscr);
+       if (retval != ERROR_OK)
+               goto out;
+
+       if (size == 4 && (address % 4) == 0) {
+               /* We are doing a word-aligned transfer, so use fast mode. */
+               retval = cortex_a_read_cpu_memory_fast(target, count, buffer, &dscr);
+       } else {
+               /* Use slow path. */
+               retval = cortex_a_read_cpu_memory_slow(target, size, count, buffer, &dscr);
+       }
+
+out:
+       final_retval = retval;
+
+       /* Switch to non-blocking mode if not already in that mode. */
+       retval = cortex_a_set_dcc_mode(target, DSCR_EXT_DCC_NON_BLOCKING, &dscr);
+       if (final_retval == ERROR_OK)
+               final_retval = retval;
+
+       /* Wait for last issued instruction to complete. */
+       retval = cortex_a_wait_instrcmpl(target, &dscr, true);
+       if (final_retval == ERROR_OK)
+               final_retval = retval;
+
+       /* If there were any sticky abort flags, clear them. */
+       if (dscr & (DSCR_STICKY_ABORT_PRECISE | DSCR_STICKY_ABORT_IMPRECISE)) {
+               fault_dscr = dscr;
+               mem_ap_write_atomic_u32(armv7a->debug_ap,
+                               armv7a->debug_base + CPUDBG_DRCR, DRCR_CLEAR_EXCEPTIONS);
+               dscr &= ~(DSCR_STICKY_ABORT_PRECISE | DSCR_STICKY_ABORT_IMPRECISE);
+       } else {
+               fault_dscr = 0;
+       }
+
+       /* Handle synchronous data faults. */
+       if (fault_dscr & DSCR_STICKY_ABORT_PRECISE) {
+               if (final_retval == ERROR_OK) {
+                       /* Final return value will reflect cause of fault. */
+                       retval = cortex_a_read_dfar_dfsr(target, &fault_dfar, &fault_dfsr, &dscr);
+                       if (retval == ERROR_OK) {
+                               LOG_ERROR("data abort at 0x%08" PRIx32 ", dfsr = 0x%08" PRIx32, fault_dfar, fault_dfsr);
+                               final_retval = cortex_a_dfsr_to_error_code(fault_dfsr);
+                       } else
+                               final_retval = retval;
+               }
+               /* Fault destroyed DFAR/DFSR; restore them. */
+               retval = cortex_a_write_dfar_dfsr(target, orig_dfar, orig_dfsr, &dscr);
+               if (retval != ERROR_OK)
+                       LOG_ERROR("error restoring dfar/dfsr - dscr = 0x%08" PRIx32, dscr);
+       }
+
+       /* Handle asynchronous data faults. */
+       if (fault_dscr & DSCR_STICKY_ABORT_IMPRECISE) {
+               if (final_retval == ERROR_OK)
+                       /* No other error has been recorded so far, so keep this one. */
+                       final_retval = ERROR_TARGET_DATA_ABORT;
+       }
+
+       /* If the DCC is nonempty, clear it. */
+       if (dscr & DSCR_DTRTX_FULL_LATCHED) {
+               uint32_t dummy;
+               retval = mem_ap_read_atomic_u32(armv7a->debug_ap,
+                               armv7a->debug_base + CPUDBG_DTRTX, &dummy);
+               if (final_retval == ERROR_OK)
+                       final_retval = retval;
+       }
+       if (dscr & DSCR_DTRRX_FULL_LATCHED) {
+               retval = cortex_a_exec_opcode(target, ARMV4_5_MRC(14, 0, 1, 0, 5, 0), &dscr);
+               if (final_retval == ERROR_OK)
+                       final_retval = retval;
+       }
+
+       /* Done. */
+       return final_retval;
+}
+
+
+/*
+ * Cortex-A Memory access
+ *
+ * This is same Cortex-M3 but we must also use the correct
+ * ap number for every access.
+ */
+
+static int cortex_a_read_phys_memory(struct target *target,
+       target_addr_t address, uint32_t size,
+       uint32_t count, uint8_t *buffer)
+{
+       struct armv7a_common *armv7a = target_to_armv7a(target);
+       struct adiv5_dap *swjdp = armv7a->arm.dap;
+       uint8_t apsel = swjdp->apsel;
+       int retval;
+
+       if (!count || !buffer)
+               return ERROR_COMMAND_SYNTAX_ERROR;
+
+       LOG_DEBUG("Reading memory at real address " TARGET_ADDR_FMT "; size %" PRId32 "; count %" PRId32,
+               address, size, count);
+
+       if (armv7a->memory_ap_available && (apsel == armv7a->memory_ap->ap_num))
+               return mem_ap_read_buf(armv7a->memory_ap, buffer, size, count, address);
+
+       /* read memory through the CPU */
+       cortex_a_prep_memaccess(target, 1);
+       retval = cortex_a_read_cpu_memory(target, address, size, count, buffer);
+       cortex_a_post_memaccess(target, 1);
+
+       return retval;
+}
+
+static int cortex_a_read_memory(struct target *target, target_addr_t address,
+       uint32_t size, uint32_t count, uint8_t *buffer)
+{
+       int retval;
+
+       /* cortex_a handles unaligned memory access */
+       LOG_DEBUG("Reading memory at address " TARGET_ADDR_FMT "; size %" PRId32 "; count %" PRId32,
+               address, size, count);
+
+       cortex_a_prep_memaccess(target, 0);
+       retval = cortex_a_read_cpu_memory(target, address, size, count, buffer);
+       cortex_a_post_memaccess(target, 0);
+
+       return retval;
+}
 
-       retval = cortex_a8_dap_write_coreregister_u32(target, address & 0xFFFFFFFC, 0);
-       if (retval != ERROR_OK)
-               return retval;
+static int cortex_a_read_memory_ahb(struct target *target, target_addr_t address,
+       uint32_t size, uint32_t count, uint8_t *buffer)
+{
+       int mmu_enabled = 0;
+       target_addr_t virt, phys;
+       int retval;
+       struct armv7a_common *armv7a = target_to_armv7a(target);
+       struct adiv5_dap *swjdp = armv7a->arm.dap;
+       uint8_t apsel = swjdp->apsel;
 
-       start_byte = address & 0x3;
+       if (!armv7a->memory_ap_available || (apsel != armv7a->memory_ap->ap_num))
+               return target_read_memory(target, address, size, count, buffer);
 
-       while (total_bytes > 0) {
+       /* cortex_a handles unaligned memory access */
+       LOG_DEBUG("Reading memory at address " TARGET_ADDR_FMT "; size %" PRId32 "; count %" PRId32,
+               address, size, count);
 
-               /* execute instruction LDRW r1, [r0], 4 (0xe4901004)  */
-               retval = cortex_a8_exec_opcode(target,  ARMV4_5_LDRW_IP(1, 0), NULL);
+       /* determine if MMU was enabled on target stop */
+       if (!armv7a->is_armv7r) {
+               retval = cortex_a_mmu(target, &mmu_enabled);
                if (retval != ERROR_OK)
                        return retval;
+       }
 
-               retval = cortex_a8_dap_read_coreregister_u32(target, &data.ui, 1);
+       if (mmu_enabled) {
+               virt = address;
+               retval = cortex_a_virt2phys(target, virt, &phys);
                if (retval != ERROR_OK)
                        return retval;
 
-               nbytes_to_read = 4 - start_byte;
-               if (total_bytes < nbytes_to_read)
-                       nbytes_to_read = total_bytes; 
-       
-               for (i = 0; i < nbytes_to_read; ++i)
-                       *buffer++ = data.uc_a[i + start_byte];
-                       
-               total_bytes -= nbytes_to_read;
-               start_byte = 0;
+               LOG_DEBUG("Reading at virtual address. "
+                         "Translating v:" TARGET_ADDR_FMT " to r:" TARGET_ADDR_FMT,
+                         virt, phys);
+               address = phys;
        }
 
-       return retval;
-}
-
+       if (!count || !buffer)
+               return ERROR_COMMAND_SYNTAX_ERROR;
 
+       retval = mem_ap_read_buf(armv7a->memory_ap, buffer, size, count, address);
 
-/*
- * Cortex-A8 Memory access
- *
- * This is same Cortex M3 but we must also use the correct
- * ap number for every access.
- */
+       return retval;
+}
 
-static int cortex_a8_read_phys_memory(struct target *target,
-                uint32_t address, uint32_t size,
-                uint32_t count, uint8_t *buffer)
+static int cortex_a_write_phys_memory(struct target *target,
+       target_addr_t address, uint32_t size,
+       uint32_t count, const uint8_t *buffer)
 {
        struct armv7a_common *armv7a = target_to_armv7a(target);
-       struct adiv5_dap *swjdp = armv7a->armv4_5_common.dap;
-       int retval = ERROR_INVALID_ARGUMENTS;
+       struct adiv5_dap *swjdp = armv7a->arm.dap;
        uint8_t apsel = swjdp->apsel;
-       LOG_DEBUG("Reading memory at real address 0x%x; size %d; count %d",
-                       address, size, count);
-
-       if (count && buffer) {
-
-               if ( apsel == swjdp_memoryap ) {
-
-                       /* read memory through AHB-AP */
-
-                       switch (size) {
-                       case 4:
-                               retval = mem_ap_sel_read_buf_u32(swjdp, swjdp_memoryap,
-                                               buffer, 4 * count, address);
-                               break;
-                       case 2:
-                               retval = mem_ap_sel_read_buf_u16(swjdp, swjdp_memoryap,
-                                               buffer, 2 * count, address);
-                               break;
-                       case 1:
-                               retval = mem_ap_sel_read_buf_u8(swjdp, swjdp_memoryap,
-                                               buffer, count, address);
-                               break;
-                       }
-               } else {
+       int retval;
+
+       if (!count || !buffer)
+               return ERROR_COMMAND_SYNTAX_ERROR;
+
+       LOG_DEBUG("Writing memory to real address " TARGET_ADDR_FMT "; size %" PRId32 "; count %" PRId32,
+               address, size, count);
+
+       if (armv7a->memory_ap_available && (apsel == armv7a->memory_ap->ap_num))
+               return mem_ap_write_buf(armv7a->memory_ap, buffer, size, count, address);
+
+       /* write memory through the CPU */
+       cortex_a_prep_memaccess(target, 1);
+       retval = cortex_a_write_cpu_memory(target, address, size, count, buffer);
+       cortex_a_post_memaccess(target, 1);
 
-                       /* read memory through APB-AP */
-                       /*  disable mmu */
-                       retval = cortex_a8_mmu_modify(target, 0);
-            if (retval != ERROR_OK) return retval;
-                       retval =  cortex_a8_read_apb_ab_memory(target, address, size, count, buffer);
-               }
-       }
        return retval;
 }
 
-static int cortex_a8_read_memory(struct target *target, uint32_t address,
-               uint32_t size, uint32_t count, uint8_t *buffer)
+static int cortex_a_write_memory(struct target *target, target_addr_t address,
+       uint32_t size, uint32_t count, const uint8_t *buffer)
 {
-       int enabled = 0;
-       uint32_t virt, phys;
        int retval;
-       struct armv7a_common *armv7a = target_to_armv7a(target);
-       struct adiv5_dap *swjdp = armv7a->armv4_5_common.dap;
-       uint8_t apsel = swjdp->apsel;
-
-       /* cortex_a8 handles unaligned memory access */
-       LOG_DEBUG("Reading memory at address 0x%x; size %d; count %d", address,
-                       size, count);
-       if (apsel == swjdp_memoryap) {
-               retval = cortex_a8_mmu(target, &enabled);
-               if (retval != ERROR_OK)
-                       return retval;
 
+       /* cortex_a handles unaligned memory access */
+       LOG_DEBUG("Writing memory at address " TARGET_ADDR_FMT "; size %" PRId32 "; count %" PRId32,
+               address, size, count);
 
-               if(enabled)
-               {
-                       virt = address;
-                       retval = cortex_a8_virt2phys(target, virt, &phys);
-                       if (retval != ERROR_OK)
-                               return retval;
+       /* memory writes bypass the caches, must flush before writing */
+       armv7a_cache_auto_flush_on_write(target, address, size * count);
 
-                       LOG_DEBUG("Reading at virtual address. Translating v:0x%x to r:0x%x",
-                                       virt, phys);
-                       address = phys;
-               }
-               retval = cortex_a8_read_phys_memory(target, address, size, count, buffer);
-       } else {
-               retval = cortex_a8_check_address(target, address);
-        if (retval != ERROR_OK) return retval;
-               /*  enable mmu */
-               retval = cortex_a8_mmu_modify(target, 1);
-           if (retval != ERROR_OK) return retval;
-               retval = cortex_a8_read_apb_ab_memory(target, address, size, count, buffer);
-       }
+       cortex_a_prep_memaccess(target, 0);
+       retval = cortex_a_write_cpu_memory(target, address, size, count, buffer);
+       cortex_a_post_memaccess(target, 0);
        return retval;
 }
 
-static int cortex_a8_write_phys_memory(struct target *target,
-                uint32_t address, uint32_t size,
-                uint32_t count, const uint8_t *buffer)
+static int cortex_a_write_memory_ahb(struct target *target, target_addr_t address,
+       uint32_t size, uint32_t count, const uint8_t *buffer)
 {
+       int mmu_enabled = 0;
+       target_addr_t virt, phys;
+       int retval;
        struct armv7a_common *armv7a = target_to_armv7a(target);
-       struct adiv5_dap *swjdp = armv7a->armv4_5_common.dap;
-       int retval = ERROR_INVALID_ARGUMENTS;
+       struct adiv5_dap *swjdp = armv7a->arm.dap;
        uint8_t apsel = swjdp->apsel;
 
-       LOG_DEBUG("Writing memory to real address 0x%x; size %d; count %d", address,
-                       size, count);
-
-       if (count && buffer) {
-
-               if ( apsel == swjdp_memoryap ) {
+       if (!armv7a->memory_ap_available || (apsel != armv7a->memory_ap->ap_num))
+               return target_write_memory(target, address, size, count, buffer);
 
-                       /* write memory through AHB-AP */
+       /* cortex_a handles unaligned memory access */
+       LOG_DEBUG("Writing memory at address " TARGET_ADDR_FMT "; size %" PRId32 "; count %" PRId32,
+               address, size, count);
 
-                       switch (size) {
-                               case 4:
-                                       retval = mem_ap_sel_write_buf_u32(swjdp, swjdp_memoryap,
-                                                       buffer, 4 * count, address);
-                                       break;
-                               case 2:
-                                       retval = mem_ap_sel_write_buf_u16(swjdp, swjdp_memoryap,
-                                                       buffer, 2 * count, address);
-                                       break;
-                               case 1:
-                                       retval = mem_ap_sel_write_buf_u8(swjdp, swjdp_memoryap,
-                                                       buffer, count, address);
-                                       break;
-                       }
+       /* determine if MMU was enabled on target stop */
+       if (!armv7a->is_armv7r) {
+               retval = cortex_a_mmu(target, &mmu_enabled);
+               if (retval != ERROR_OK)
+                       return retval;
+       }
 
-               } else {
+       if (mmu_enabled) {
+               virt = address;
+               retval = cortex_a_virt2phys(target, virt, &phys);
+               if (retval != ERROR_OK)
+                       return retval;
 
-                       /* write memory through APB-AP */
-                       retval = cortex_a8_mmu_modify(target, 0);
-                       if (retval != ERROR_OK)
-                               return retval;
-                       return  cortex_a8_write_apb_ab_memory(target, address, size, count, buffer);
-               }
+               LOG_DEBUG("Writing to virtual address. "
+                         "Translating v:" TARGET_ADDR_FMT " to r:" TARGET_ADDR_FMT,
+                         virt,
+                         phys);
+               address = phys;
        }
 
+       if (!count || !buffer)
+               return ERROR_COMMAND_SYNTAX_ERROR;
 
-       /* REVISIT this op is generic ARMv7-A/R stuff */
-       if (retval == ERROR_OK && target->state == TARGET_HALTED)
-       {
-               struct arm_dpm *dpm = armv7a->armv4_5_common.dpm;
+       retval = mem_ap_write_buf(armv7a->memory_ap, buffer, size, count, address);
 
-               retval = dpm->prepare(dpm);
-               if (retval != ERROR_OK)
-                       return retval;
+       return retval;
+}
 
-               /* The Cache handling will NOT work with MMU active, the
-                * wrong addresses will be invalidated!
-                *
-                * For both ICache and DCache, walk all cache lines in the
-                * address range. Cortex-A8 has fixed 64 byte line length.
-                *
-                * REVISIT per ARMv7, these may trigger watchpoints ...
-                */
+static int cortex_a_read_buffer(struct target *target, target_addr_t address,
+                               uint32_t count, uint8_t *buffer)
+{
+       uint32_t size;
 
-               /* invalidate I-Cache */
-               if (armv7a->armv7a_mmu.armv7a_cache.i_cache_enabled)
-               {
-                       /* ICIMVAU - Invalidate Cache single entry
-                        * with MVA to PoU
-                        *      MCR p15, 0, r0, c7, c5, 1
-                        */
-                       for (uint32_t cacheline = address;
-                                       cacheline < address + size * count;
-                                       cacheline += 64) {
-                               retval = dpm->instr_write_data_r0(dpm,
-                                               ARMV4_5_MCR(15, 0, 0, 7, 5, 1),
-                                               cacheline);
-                               if (retval != ERROR_OK)
-                                       return retval;
-                       }
+       /* Align up to maximum 4 bytes. The loop condition makes sure the next pass
+        * will have something to do with the size we leave to it. */
+       for (size = 1; size < 4 && count >= size * 2 + (address & size); size *= 2) {
+               if (address & size) {
+                       int retval = cortex_a_read_memory_ahb(target, address, size, 1, buffer);
+                       if (retval != ERROR_OK)
+                               return retval;
+                       address += size;
+                       count -= size;
+                       buffer += size;
                }
+       }
 
-               /* invalidate D-Cache */
-               if (armv7a->armv7a_mmu.armv7a_cache.d_u_cache_enabled)
-               {
-                       /* DCIMVAC - Invalidate data Cache line
-                        * with MVA to PoC
-                        *      MCR p15, 0, r0, c7, c6, 1
-                        */
-                       for (uint32_t cacheline = address;
-                                       cacheline < address + size * count;
-                                       cacheline += 64) {
-                               retval = dpm->instr_write_data_r0(dpm,
-                                               ARMV4_5_MCR(15, 0, 0, 7, 6, 1),
-                                               cacheline);
-                               if (retval != ERROR_OK)
-                                       return retval;
-                       }
+       /* Read the data with as large access size as possible. */
+       for (; size > 0; size /= 2) {
+               uint32_t aligned = count - count % size;
+               if (aligned > 0) {
+                       int retval = cortex_a_read_memory_ahb(target, address, size, aligned / size, buffer);
+                       if (retval != ERROR_OK)
+                               return retval;
+                       address += aligned;
+                       count -= aligned;
+                       buffer += aligned;
                }
-
-               /* (void) */ dpm->finish(dpm);
        }
 
-       return retval;
+       return ERROR_OK;
 }
 
-static int cortex_a8_write_memory(struct target *target, uint32_t address,
-                uint32_t size, uint32_t count, const uint8_t *buffer)
+static int cortex_a_write_buffer(struct target *target, target_addr_t address,
+                                uint32_t count, const uint8_t *buffer)
 {
-       int enabled = 0;
-       uint32_t virt, phys;
-       int retval;
-       struct armv7a_common *armv7a = target_to_armv7a(target);
-       struct adiv5_dap *swjdp = armv7a->armv4_5_common.dap;
-       uint8_t apsel = swjdp->apsel;
-       /* cortex_a8 handles unaligned memory access */
-       LOG_DEBUG("Reading memory at address 0x%x; size %d; count %d", address,
-                       size, count);
-       if (apsel == swjdp_memoryap) {
+       uint32_t size;
 
-               LOG_DEBUG("Writing memory to address 0x%x; size %d; count %d", address, size, count);
-               retval = cortex_a8_mmu(target, &enabled);
-               if (retval != ERROR_OK)
-                       return retval;
-
-               if(enabled)
-               {
-                       virt = address;
-                       retval = cortex_a8_virt2phys(target, virt, &phys);
+       /* Align up to maximum 4 bytes. The loop condition makes sure the next pass
+        * will have something to do with the size we leave to it. */
+       for (size = 1; size < 4 && count >= size * 2 + (address & size); size *= 2) {
+               if (address & size) {
+                       int retval = cortex_a_write_memory_ahb(target, address, size, 1, buffer);
                        if (retval != ERROR_OK)
                                return retval;
-                       LOG_DEBUG("Writing to virtual address. Translating v:0x%x to r:0x%x", virt, phys);
-                       address = phys;
+                       address += size;
+                       count -= size;
+                       buffer += size;
                }
-
-               retval = cortex_a8_write_phys_memory(target, address, size, 
-                               count, buffer);
        }
-       else {
-               retval = cortex_a8_check_address(target, address);
-               if (retval != ERROR_OK) return retval;
-               /*  enable mmu  */
-               retval = cortex_a8_mmu_modify(target, 1);
-               if (retval != ERROR_OK) return retval;
-               retval = cortex_a8_write_apb_ab_memory(target, address, size, count, buffer);
+
+       /* Write the data with as large access size as possible. */
+       for (; size > 0; size /= 2) {
+               uint32_t aligned = count - count % size;
+               if (aligned > 0) {
+                       int retval = cortex_a_write_memory_ahb(target, address, size, aligned / size, buffer);
+                       if (retval != ERROR_OK)
+                               return retval;
+                       address += aligned;
+                       count -= aligned;
+                       buffer += aligned;
+               }
        }
-    return retval;
-}
 
-static int cortex_a8_bulk_write_memory(struct target *target, uint32_t address,
-               uint32_t count, const uint8_t *buffer)
-{
-       return cortex_a8_write_memory(target, address, 4, count, buffer);
+       return ERROR_OK;
 }
 
-
-static int cortex_a8_handle_target_request(void *priv)
+static int cortex_a_handle_target_request(void *priv)
 {
        struct target *target = priv;
        struct armv7a_common *armv7a = target_to_armv7a(target);
-       struct adiv5_dap *swjdp = armv7a->armv4_5_common.dap;
        int retval;
 
        if (!target_was_examined(target))
@@ -2292,23 +2945,25 @@ static int cortex_a8_handle_target_request(void *priv)
        if (!target->dbg_msg_enabled)
                return ERROR_OK;
 
-       if (target->state == TARGET_RUNNING)
-       {
+       if (target->state == TARGET_RUNNING) {
                uint32_t request;
                uint32_t dscr;
-               retval = mem_ap_sel_read_atomic_u32(swjdp, swjdp_debugap,
-                                       armv7a->debug_base      + CPUDBG_DSCR, &dscr);
+               retval = mem_ap_read_atomic_u32(armv7a->debug_ap,
+                               armv7a->debug_base + CPUDBG_DSCR, &dscr);
 
                /* check if we have data */
-               while ((dscr & DSCR_DTR_TX_FULL) && (retval==ERROR_OK))
-               {
-                       retval = mem_ap_sel_read_atomic_u32(swjdp, swjdp_debugap,
-                                       armv7a->debug_base+ CPUDBG_DTRTX, &request);
-                       if (retval == ERROR_OK)
-                       {
+               int64_t then = timeval_ms();
+               while ((dscr & DSCR_DTR_TX_FULL) && (retval == ERROR_OK)) {
+                       retval = mem_ap_read_atomic_u32(armv7a->debug_ap,
+                                       armv7a->debug_base + CPUDBG_DTRTX, &request);
+                       if (retval == ERROR_OK) {
                                target_request(target, request);
-                               retval = mem_ap_sel_read_atomic_u32(swjdp, swjdp_debugap,
-                                               armv7a->debug_base+ CPUDBG_DSCR, &dscr);
+                               retval = mem_ap_read_atomic_u32(armv7a->debug_ap,
+                                               armv7a->debug_base + CPUDBG_DSCR, &dscr);
+                       }
+                       if (timeval_ms() > then + 1000) {
+                               LOG_ERROR("Timeout waiting for dtr tx full");
+                               return ERROR_FAIL;
                        }
                }
        }
@@ -2317,237 +2972,332 @@ static int cortex_a8_handle_target_request(void *priv)
 }
 
 /*
- * Cortex-A8 target information and configuration
+ * Cortex-A target information and configuration
  */
 
-static int cortex_a8_examine_first(struct target *target)
+static int cortex_a_examine_first(struct target *target)
 {
-       struct cortex_a8_common *cortex_a8 = target_to_cortex_a8(target);
-       struct armv7a_common *armv7a = &cortex_a8->armv7a_common;
-       struct adiv5_dap *swjdp = armv7a->armv4_5_common.dap;
+       struct cortex_a_common *cortex_a = target_to_cortex_a(target);
+       struct armv7a_common *armv7a = &cortex_a->armv7a_common;
+       struct adiv5_dap *swjdp = armv7a->arm.dap;
+
        int i;
        int retval = ERROR_OK;
-       uint32_t didr, ctypr, ttypr, cpuid;
+       uint32_t didr, cpuid, dbg_osreg;
 
-       /* We do one extra read to ensure DAP is configured,
-        * we call ahbap_debugport_init(swjdp) instead
-        */
-       retval = ahbap_debugport_init(swjdp);
-       if (retval != ERROR_OK)
+       /* Search for the APB-AP - it is needed for access to debug registers */
+       retval = dap_find_ap(swjdp, AP_TYPE_APB_AP, &armv7a->debug_ap);
+       if (retval != ERROR_OK) {
+               LOG_ERROR("Could not find APB-AP for debug access");
                return retval;
+       }
 
-       if (!target->dbgbase_set)
-       {
+       retval = mem_ap_init(armv7a->debug_ap);
+       if (retval != ERROR_OK) {
+               LOG_ERROR("Could not initialize the APB-AP");
+               return retval;
+       }
+
+       armv7a->debug_ap->memaccess_tck = 80;
+
+       /* Search for the AHB-AB.
+        * REVISIT: We should search for AXI-AP as well and make sure the AP's MEMTYPE says it
+        * can access system memory. */
+       armv7a->memory_ap_available = false;
+       retval = dap_find_ap(swjdp, AP_TYPE_AHB_AP, &armv7a->memory_ap);
+       if (retval == ERROR_OK) {
+               retval = mem_ap_init(armv7a->memory_ap);
+               if (retval == ERROR_OK)
+                       armv7a->memory_ap_available = true;
+       }
+       if (retval != ERROR_OK) {
+               /* AHB-AP not found or unavailable - use the CPU */
+               LOG_DEBUG("No AHB-AP available for memory access");
+       }
+
+       if (!target->dbgbase_set) {
                uint32_t dbgbase;
                /* Get ROM Table base */
                uint32_t apid;
-               retval = dap_get_debugbase(swjdp, 1, &dbgbase, &apid);
+               int32_t coreidx = target->coreid;
+               LOG_DEBUG("%s's dbgbase is not set, trying to detect using the ROM table",
+                         target->cmd_name);
+               retval = dap_get_debugbase(armv7a->debug_ap, &dbgbase, &apid);
                if (retval != ERROR_OK)
                        return retval;
                /* Lookup 0x15 -- Processor DAP */
-               retval = dap_lookup_cs_component(swjdp, 1, dbgbase, 0x15,
-                               &armv7a->debug_base);
-               if (retval != ERROR_OK)
+               retval = dap_lookup_cs_component(armv7a->debug_ap, dbgbase, 0x15,
+                               &armv7a->debug_base, &coreidx);
+               if (retval != ERROR_OK) {
+                       LOG_ERROR("Can't detect %s's dbgbase from the ROM table; you need to specify it explicitly.",
+                                 target->cmd_name);
                        return retval;
-       }
-    else
-       {
+               }
+               LOG_DEBUG("Detected core %" PRId32 " dbgbase: %08" PRIx32,
+                         target->coreid, armv7a->debug_base);
+       } else
                armv7a->debug_base = target->dbgbase;
-       }
 
-       retval = mem_ap_sel_read_atomic_u32(swjdp, swjdp_debugap,
-                       armv7a->debug_base + CPUDBG_CPUID, &cpuid);
-       if (retval != ERROR_OK)
+       retval = mem_ap_read_atomic_u32(armv7a->debug_ap,
+                       armv7a->debug_base + CPUDBG_DIDR, &didr);
+       if (retval != ERROR_OK) {
+               LOG_DEBUG("Examine %s failed", "DIDR");
                return retval;
+       }
 
-       if ((retval = mem_ap_sel_read_atomic_u32(swjdp, swjdp_debugap,
-                       armv7a->debug_base + CPUDBG_CPUID, &cpuid)) != ERROR_OK)
-       {
+       retval = mem_ap_read_atomic_u32(armv7a->debug_ap,
+                       armv7a->debug_base + CPUDBG_CPUID, &cpuid);
+       if (retval != ERROR_OK) {
                LOG_DEBUG("Examine %s failed", "CPUID");
                return retval;
        }
 
-       if ((retval = mem_ap_sel_read_atomic_u32(swjdp, swjdp_debugap,
-                       armv7a->debug_base + CPUDBG_CTYPR, &ctypr)) != ERROR_OK)
-       {
-               LOG_DEBUG("Examine %s failed", "CTYPR");
-               return retval;
-       }
+       LOG_DEBUG("didr = 0x%08" PRIx32, didr);
+       LOG_DEBUG("cpuid = 0x%08" PRIx32, cpuid);
 
-       if ((retval = mem_ap_sel_read_atomic_u32(swjdp, swjdp_debugap,
-                       armv7a->debug_base + CPUDBG_TTYPR, &ttypr)) != ERROR_OK)
-       {
-               LOG_DEBUG("Examine %s failed", "TTYPR");
-               return retval;
-       }
+       cortex_a->didr = didr;
+       cortex_a->cpuid = cpuid;
 
-       if ((retval = mem_ap_sel_read_atomic_u32(swjdp, swjdp_debugap,
-                       armv7a->debug_base + CPUDBG_DIDR, &didr)) != ERROR_OK)
-       {
-               LOG_DEBUG("Examine %s failed", "DIDR");
+       retval = mem_ap_read_atomic_u32(armv7a->debug_ap,
+                                   armv7a->debug_base + CPUDBG_PRSR, &dbg_osreg);
+       if (retval != ERROR_OK)
                return retval;
+       LOG_DEBUG("target->coreid %" PRId32 " DBGPRSR  0x%" PRIx32, target->coreid, dbg_osreg);
+
+       if ((dbg_osreg & PRSR_POWERUP_STATUS) == 0) {
+               LOG_ERROR("target->coreid %" PRId32 " powered down!", target->coreid);
+               target->state = TARGET_UNKNOWN; /* TARGET_NO_POWER? */
+               return ERROR_TARGET_INIT_FAILED;
        }
 
-       LOG_DEBUG("cpuid = 0x%08" PRIx32, cpuid);
-       LOG_DEBUG("ctypr = 0x%08" PRIx32, ctypr);
-       LOG_DEBUG("ttypr = 0x%08" PRIx32, ttypr);
-       LOG_DEBUG("didr = 0x%08" PRIx32, didr);
+       if (dbg_osreg & PRSR_STICKY_RESET_STATUS)
+               LOG_DEBUG("target->coreid %" PRId32 " was reset!", target->coreid);
 
-       armv7a->armv4_5_common.core_type = ARM_MODE_MON;
-       retval = cortex_a8_dpm_setup(cortex_a8, didr);
+       /* Read DBGOSLSR and check if OSLK is implemented */
+       retval = mem_ap_read_atomic_u32(armv7a->debug_ap,
+                               armv7a->debug_base + CPUDBG_OSLSR, &dbg_osreg);
        if (retval != ERROR_OK)
                return retval;
+       LOG_DEBUG("target->coreid %" PRId32 " DBGOSLSR 0x%" PRIx32, target->coreid, dbg_osreg);
+
+       /* check if OS Lock is implemented */
+       if ((dbg_osreg & OSLSR_OSLM) == OSLSR_OSLM0 || (dbg_osreg & OSLSR_OSLM) == OSLSR_OSLM1) {
+               /* check if OS Lock is set */
+               if (dbg_osreg & OSLSR_OSLK) {
+                       LOG_DEBUG("target->coreid %" PRId32 " OSLock set! Trying to unlock", target->coreid);
+
+                       retval = mem_ap_write_atomic_u32(armv7a->debug_ap,
+                                                       armv7a->debug_base + CPUDBG_OSLAR,
+                                                       0);
+                       if (retval == ERROR_OK)
+                               retval = mem_ap_read_atomic_u32(armv7a->debug_ap,
+                                                       armv7a->debug_base + CPUDBG_OSLSR, &dbg_osreg);
+
+                       /* if we fail to access the register or cannot reset the OSLK bit, bail out */
+                       if (retval != ERROR_OK || (dbg_osreg & OSLSR_OSLK) != 0) {
+                               LOG_ERROR("target->coreid %" PRId32 " OSLock sticky, core not powered?",
+                                               target->coreid);
+                               target->state = TARGET_UNKNOWN; /* TARGET_NO_POWER? */
+                               return ERROR_TARGET_INIT_FAILED;
+                       }
+               }
+       }
+
+       armv7a->arm.core_type = ARM_MODE_MON;
+
+       /* Avoid recreating the registers cache */
+       if (!target_was_examined(target)) {
+               retval = cortex_a_dpm_setup(cortex_a, didr);
+               if (retval != ERROR_OK)
+                       return retval;
+       }
 
        /* Setup Breakpoint Register Pairs */
-       cortex_a8->brp_num = ((didr >> 24) & 0x0F) + 1;
-       cortex_a8->brp_num_context = ((didr >> 20) & 0x0F) + 1;
-       cortex_a8->brp_num_available = cortex_a8->brp_num;
-       cortex_a8->brp_list = calloc(cortex_a8->brp_num, sizeof(struct cortex_a8_brp));
-//     cortex_a8->brb_enabled = ????;
-       for (i = 0; i < cortex_a8->brp_num; i++)
-       {
-               cortex_a8->brp_list[i].used = 0;
-               if (i < (cortex_a8->brp_num-cortex_a8->brp_num_context))
-                       cortex_a8->brp_list[i].type = BRP_NORMAL;
+       cortex_a->brp_num = ((didr >> 24) & 0x0F) + 1;
+       cortex_a->brp_num_context = ((didr >> 20) & 0x0F) + 1;
+       cortex_a->brp_num_available = cortex_a->brp_num;
+       free(cortex_a->brp_list);
+       cortex_a->brp_list = calloc(cortex_a->brp_num, sizeof(struct cortex_a_brp));
+/*     cortex_a->brb_enabled = ????; */
+       for (i = 0; i < cortex_a->brp_num; i++) {
+               cortex_a->brp_list[i].used = 0;
+               if (i < (cortex_a->brp_num-cortex_a->brp_num_context))
+                       cortex_a->brp_list[i].type = BRP_NORMAL;
                else
-                       cortex_a8->brp_list[i].type = BRP_CONTEXT;
-               cortex_a8->brp_list[i].value = 0;
-               cortex_a8->brp_list[i].control = 0;
-               cortex_a8->brp_list[i].BRPn = i;
+                       cortex_a->brp_list[i].type = BRP_CONTEXT;
+               cortex_a->brp_list[i].value = 0;
+               cortex_a->brp_list[i].control = 0;
+               cortex_a->brp_list[i].BRPn = i;
        }
 
-       LOG_DEBUG("Configured %i hw breakpoints", cortex_a8->brp_num);
+       LOG_DEBUG("Configured %i hw breakpoints", cortex_a->brp_num);
+
+       /* select debug_ap as default */
+       swjdp->apsel = armv7a->debug_ap->ap_num;
 
        target_set_examined(target);
        return ERROR_OK;
 }
 
-static int cortex_a8_examine(struct target *target)
+static int cortex_a_examine(struct target *target)
 {
        int retval = ERROR_OK;
 
-       /* don't re-probe hardware after each reset */
-       if (!target_was_examined(target))
-               retval = cortex_a8_examine_first(target);
+       /* Reestablish communication after target reset */
+       retval = cortex_a_examine_first(target);
 
        /* Configure core debug access */
        if (retval == ERROR_OK)
-               retval = cortex_a8_init_debug_access(target);
+               retval = cortex_a_init_debug_access(target);
 
        return retval;
 }
 
 /*
- *     Cortex-A8 target creation and initialization
+ *     Cortex-A target creation and initialization
  */
 
-static int cortex_a8_init_target(struct command_context *cmd_ctx,
-               struct target *target)
+static int cortex_a_init_target(struct command_context *cmd_ctx,
+       struct target *target)
 {
        /* examine_first() does a bunch of this */
+       arm_semihosting_init(target);
        return ERROR_OK;
 }
 
-static int cortex_a8_init_arch_info(struct target *target,
-               struct cortex_a8_common *cortex_a8, struct jtag_tap *tap)
+static int cortex_a_init_arch_info(struct target *target,
+       struct cortex_a_common *cortex_a, struct adiv5_dap *dap)
 {
-       struct armv7a_common *armv7a = &cortex_a8->armv7a_common;
-       struct adiv5_dap *dap = &armv7a->dap;
-
-       armv7a->armv4_5_common.dap = dap;
-
-       /* Setup struct cortex_a8_common */
-       cortex_a8->common_magic = CORTEX_A8_COMMON_MAGIC;
-       /*  tap has no dap initialized */
-       if (!tap->dap)
-       {
-       armv7a->armv4_5_common.dap = dap;
-       /* Setup struct cortex_a8_common */
+       struct armv7a_common *armv7a = &cortex_a->armv7a_common;
 
-       /* prepare JTAG information for the new target */
-       cortex_a8->jtag_info.tap = tap;
-       cortex_a8->jtag_info.scann_size = 4;
+       /* Setup struct cortex_a_common */
+       cortex_a->common_magic = CORTEX_A_COMMON_MAGIC;
+       armv7a->arm.dap = dap;
 
-       /* Leave (only) generic DAP stuff for debugport_init() */
-       dap->jtag_info = &cortex_a8->jtag_info;
-
-       /* Number of bits for tar autoincrement, impl. dep. at least 10 */
-       dap->tar_autoincr_block = (1 << 10);
-       dap->memaccess_tck = 80;
-       tap->dap = dap;
-    }
-       else
-       armv7a->armv4_5_common.dap = tap->dap;
-
-       cortex_a8->fast_reg_read = 0;
+       cortex_a->fast_reg_read = 0;
 
        /* register arch-specific functions */
        armv7a->examine_debug_reason = NULL;
 
-       armv7a->post_debug_entry = cortex_a8_post_debug_entry;
+       armv7a->post_debug_entry = cortex_a_post_debug_entry;
 
        armv7a->pre_restore_context = NULL;
 
-       armv7a->armv7a_mmu.read_physical_memory = cortex_a8_read_phys_memory;
+       armv7a->armv7a_mmu.read_physical_memory = cortex_a_read_phys_memory;
+
 
-       
-//     arm7_9->handle_target_request = cortex_a8_handle_target_request;
+/*     arm7_9->handle_target_request = cortex_a_handle_target_request; */
 
        /* REVISIT v7a setup should be in a v7a-specific routine */
        armv7a_init_arch_info(target, armv7a);
-       target_register_timer_callback(cortex_a8_handle_target_request, 1, 1, target);
+       target_register_timer_callback(cortex_a_handle_target_request, 1, 1, target);
 
        return ERROR_OK;
 }
 
-static int cortex_a8_target_create(struct target *target, Jim_Interp *interp)
+static int cortex_a_target_create(struct target *target, Jim_Interp *interp)
+{
+       struct cortex_a_common *cortex_a = calloc(1, sizeof(struct cortex_a_common));
+       cortex_a->common_magic = CORTEX_A_COMMON_MAGIC;
+       struct adiv5_private_config *pc;
+
+       if (target->private_config == NULL)
+               return ERROR_FAIL;
+
+       pc = (struct adiv5_private_config *)target->private_config;
+
+       cortex_a->armv7a_common.is_armv7r = false;
+
+       cortex_a->armv7a_common.arm.arm_vfp_version = ARM_VFP_V3;
+
+       return cortex_a_init_arch_info(target, cortex_a, pc->dap);
+}
+
+static int cortex_r4_target_create(struct target *target, Jim_Interp *interp)
 {
-       struct cortex_a8_common *cortex_a8 = calloc(1, sizeof(struct cortex_a8_common));
+       struct cortex_a_common *cortex_a = calloc(1, sizeof(struct cortex_a_common));
+       cortex_a->common_magic = CORTEX_A_COMMON_MAGIC;
+       struct adiv5_private_config *pc;
+
+       pc = (struct adiv5_private_config *)target->private_config;
+       if (adiv5_verify_config(pc) != ERROR_OK)
+               return ERROR_FAIL;
+
+       cortex_a->armv7a_common.is_armv7r = true;
 
-       return cortex_a8_init_arch_info(target, cortex_a8, target->tap);
+       return cortex_a_init_arch_info(target, cortex_a, pc->dap);
 }
 
+static void cortex_a_deinit_target(struct target *target)
+{
+       struct cortex_a_common *cortex_a = target_to_cortex_a(target);
+       struct arm_dpm *dpm = &cortex_a->armv7a_common.dpm;
 
+       free(cortex_a->brp_list);
+       free(dpm->dbp);
+       free(dpm->dwp);
+       free(target->private_config);
+       free(cortex_a);
+}
 
-static int cortex_a8_mmu(struct target *target, int *enabled)
+static int cortex_a_mmu(struct target *target, int *enabled)
 {
+       struct armv7a_common *armv7a = target_to_armv7a(target);
+
        if (target->state != TARGET_HALTED) {
                LOG_ERROR("%s: target not halted", __func__);
                return ERROR_TARGET_INVALID;
        }
 
-       *enabled = target_to_cortex_a8(target)->armv7a_common.armv7a_mmu.mmu_enabled;
+       if (armv7a->is_armv7r)
+               *enabled = 0;
+       else
+               *enabled = target_to_cortex_a(target)->armv7a_common.armv7a_mmu.mmu_enabled;
+
        return ERROR_OK;
 }
 
-static int cortex_a8_virt2phys(struct target *target,
-               uint32_t virt, uint32_t *phys)
+static int cortex_a_virt2phys(struct target *target,
+       target_addr_t virt, target_addr_t *phys)
 {
        int retval = ERROR_FAIL;
        struct armv7a_common *armv7a = target_to_armv7a(target);
-       struct adiv5_dap *swjdp = armv7a->armv4_5_common.dap;
+       struct adiv5_dap *swjdp = armv7a->arm.dap;
        uint8_t apsel = swjdp->apsel;
-       if (apsel == swjdp_memoryap)
-       {
+       int mmu_enabled = 0;
+
+       /*
+        * If the MMU was not enabled at debug entry, there is no
+        * way of knowing if there was ever a valid configuration
+        * for it and thus it's not safe to enable it. In this case,
+        * just return the virtual address as physical.
+        */
+       cortex_a_mmu(target, &mmu_enabled);
+       if (!mmu_enabled) {
+               *phys = virt;
+               return ERROR_OK;
+       }
+
+       if (armv7a->memory_ap_available && (apsel == armv7a->memory_ap->ap_num)) {
                uint32_t ret;
                retval = armv7a_mmu_translate_va(target,
-                                virt, &ret);
+                               virt, &ret);
                if (retval != ERROR_OK)
                        goto done;
                *phys = ret;
-       } 
-       else
-       { /*  use this method if swjdp_memoryap not selected */
-               /*  mmu must be enable in order to get a correct translation */
-               retval = cortex_a8_mmu_modify(target, 1);
-               if (retval != ERROR_OK) goto done;
-               retval = armv7a_mmu_translate_va_pa(target, virt,  phys, 1);
+       } else {/*  use this method if armv7a->memory_ap not selected
+                *  mmu must be enable in order to get a correct translation */
+               retval = cortex_a_mmu_modify(target, 1);
+               if (retval != ERROR_OK)
+                       goto done;
+               retval = armv7a_mmu_translate_va_pa(target, (uint32_t)virt,
+                                                   (uint32_t *)phys, 1);
        }
 done:
        return retval;
 }
 
-COMMAND_HANDLER(cortex_a8_handle_cache_info_command)
+COMMAND_HANDLER(cortex_a_handle_cache_info_command)
 {
        struct target *target = get_current_target(CMD_CTX);
        struct armv7a_common *armv7a = target_to_armv7a(target);
@@ -2557,29 +3307,26 @@ COMMAND_HANDLER(cortex_a8_handle_cache_info_command)
 }
 
 
-COMMAND_HANDLER(cortex_a8_handle_dbginit_command)
+COMMAND_HANDLER(cortex_a_handle_dbginit_command)
 {
        struct target *target = get_current_target(CMD_CTX);
-       if (!target_was_examined(target))
-       {
+       if (!target_was_examined(target)) {
                LOG_ERROR("target not examined yet");
                return ERROR_FAIL;
        }
 
-       return cortex_a8_init_debug_access(target);
+       return cortex_a_init_debug_access(target);
 }
-COMMAND_HANDLER(cortex_a8_handle_smp_off_command)
+COMMAND_HANDLER(cortex_a_handle_smp_off_command)
 {
        struct target *target = get_current_target(CMD_CTX);
-    /* check target is an smp target */
-    struct target_list *head;
+       /* check target is an smp target */
+       struct target_list *head;
        struct target *curr;
        head = target->head;
        target->smp = 0;
-       if (head != (struct target_list*)NULL)
-       {
-               while (head != (struct target_list*)NULL)
-               {
+       if (head != (struct target_list *)NULL) {
+               while (head != (struct target_list *)NULL) {
                        curr = head->target;
                        curr->smp = 0;
                        head = head->next;
@@ -2590,16 +3337,15 @@ COMMAND_HANDLER(cortex_a8_handle_smp_off_command)
        return ERROR_OK;
 }
 
-COMMAND_HANDLER(cortex_a8_handle_smp_on_command)
+COMMAND_HANDLER(cortex_a_handle_smp_on_command)
 {
        struct target *target = get_current_target(CMD_CTX);
        struct target_list *head;
        struct target *curr;
        head = target->head;
-       if (head != (struct target_list*)NULL)
-       {   target->smp=1;
-               while (head != (struct target_list*)NULL)
-               {
+       if (head != (struct target_list *)NULL) {
+               target->smp = 1;
+               while (head != (struct target_list *)NULL) {
                        curr = head->target;
                        curr->smp = 1;
                        head = head->next;
@@ -2608,64 +3354,134 @@ COMMAND_HANDLER(cortex_a8_handle_smp_on_command)
        return ERROR_OK;
 }
 
-COMMAND_HANDLER(cortex_a8_handle_smp_gdb_command)
+COMMAND_HANDLER(cortex_a_handle_smp_gdb_command)
 {
        struct target *target = get_current_target(CMD_CTX);
        int retval = ERROR_OK;
        struct target_list *head;
        head = target->head;
-       if (head != (struct target_list*)NULL)
-       {
-               if (CMD_ARGC == 1)
-               {
+       if (head != (struct target_list *)NULL) {
+               if (CMD_ARGC == 1) {
                        int coreid = 0;
                        COMMAND_PARSE_NUMBER(int, CMD_ARGV[0], coreid);
                        if (ERROR_OK != retval)
                                return retval;
-                       target->gdb_service->core[1]=coreid;
+                       target->gdb_service->core[1] = coreid;
+
+               }
+               command_print(CMD_CTX, "gdb coreid  %" PRId32 " -> %" PRId32, target->gdb_service->core[0]
+                       , target->gdb_service->core[1]);
+       }
+       return ERROR_OK;
+}
 
+COMMAND_HANDLER(handle_cortex_a_mask_interrupts_command)
+{
+       struct target *target = get_current_target(CMD_CTX);
+       struct cortex_a_common *cortex_a = target_to_cortex_a(target);
+
+       static const Jim_Nvp nvp_maskisr_modes[] = {
+               { .name = "off", .value = CORTEX_A_ISRMASK_OFF },
+               { .name = "on", .value = CORTEX_A_ISRMASK_ON },
+               { .name = NULL, .value = -1 },
+       };
+       const Jim_Nvp *n;
+
+       if (CMD_ARGC > 0) {
+               n = Jim_Nvp_name2value_simple(nvp_maskisr_modes, CMD_ARGV[0]);
+               if (n->name == NULL) {
+                       LOG_ERROR("Unknown parameter: %s - should be off or on", CMD_ARGV[0]);
+                       return ERROR_COMMAND_SYNTAX_ERROR;
                }
-               command_print(CMD_CTX, "gdb coreid  %d -> %d", target->gdb_service->core[0]
-                               , target->gdb_service->core[1]);
+
+               cortex_a->isrmasking_mode = n->value;
+       }
+
+       n = Jim_Nvp_value2name_simple(nvp_maskisr_modes, cortex_a->isrmasking_mode);
+       command_print(CMD_CTX, "cortex_a interrupt mask %s", n->name);
+
+       return ERROR_OK;
+}
+
+COMMAND_HANDLER(handle_cortex_a_dacrfixup_command)
+{
+       struct target *target = get_current_target(CMD_CTX);
+       struct cortex_a_common *cortex_a = target_to_cortex_a(target);
+
+       static const Jim_Nvp nvp_dacrfixup_modes[] = {
+               { .name = "off", .value = CORTEX_A_DACRFIXUP_OFF },
+               { .name = "on", .value = CORTEX_A_DACRFIXUP_ON },
+               { .name = NULL, .value = -1 },
+       };
+       const Jim_Nvp *n;
+
+       if (CMD_ARGC > 0) {
+               n = Jim_Nvp_name2value_simple(nvp_dacrfixup_modes, CMD_ARGV[0]);
+               if (n->name == NULL)
+                       return ERROR_COMMAND_SYNTAX_ERROR;
+               cortex_a->dacrfixup_mode = n->value;
+
        }
+
+       n = Jim_Nvp_value2name_simple(nvp_dacrfixup_modes, cortex_a->dacrfixup_mode);
+       command_print(CMD_CTX, "cortex_a domain access control fixup %s", n->name);
+
        return ERROR_OK;
 }
 
-static const struct command_registration cortex_a8_exec_command_handlers[] = {
+static const struct command_registration cortex_a_exec_command_handlers[] = {
        {
                .name = "cache_info",
-               .handler = cortex_a8_handle_cache_info_command,
+               .handler = cortex_a_handle_cache_info_command,
                .mode = COMMAND_EXEC,
                .help = "display information about target caches",
+               .usage = "",
        },
        {
                .name = "dbginit",
-               .handler = cortex_a8_handle_dbginit_command,
+               .handler = cortex_a_handle_dbginit_command,
                .mode = COMMAND_EXEC,
                .help = "Initialize core debug",
+               .usage = "",
        },
-       {   .name ="smp_off",
-           .handler = cortex_a8_handle_smp_off_command,
-               .mode = COMMAND_EXEC,
-               .help = "Stop smp handling",
-       },
-       {
-        .name ="smp_on",
-        .handler = cortex_a8_handle_smp_on_command,
+       {   .name = "smp_off",
+           .handler = cortex_a_handle_smp_off_command,
+           .mode = COMMAND_EXEC,
+           .help = "Stop smp handling",
+           .usage = "",},
+       {
+               .name = "smp_on",
+               .handler = cortex_a_handle_smp_on_command,
                .mode = COMMAND_EXEC,
                .help = "Restart smp handling",
+               .usage = "",
        },
        {
-        .name ="smp_gdb",
-        .handler = cortex_a8_handle_smp_gdb_command,
+               .name = "smp_gdb",
+               .handler = cortex_a_handle_smp_gdb_command,
                .mode = COMMAND_EXEC,
                .help = "display/fix current core played to gdb",
+               .usage = "",
+       },
+       {
+               .name = "maskisr",
+               .handler = handle_cortex_a_mask_interrupts_command,
+               .mode = COMMAND_ANY,
+               .help = "mask cortex_a interrupts",
+               .usage = "['on'|'off']",
+       },
+       {
+               .name = "dacrfixup",
+               .handler = handle_cortex_a_dacrfixup_command,
+               .mode = COMMAND_ANY,
+               .help = "set domain access control (DACR) to all-manager "
+                       "on memory access",
+               .usage = "['on'|'off']",
        },
-
 
        COMMAND_REGISTRATION_DONE
 };
-static const struct command_registration cortex_a8_command_handlers[] = {
+static const struct command_registration cortex_a_command_handlers[] = {
        {
                .chain = arm_command_handlers,
        },
@@ -2673,56 +3489,130 @@ static const struct command_registration cortex_a8_command_handlers[] = {
                .chain = armv7a_command_handlers,
        },
        {
-               .name = "cortex_a8",
+               .name = "cortex_a",
                .mode = COMMAND_ANY,
-               .help = "Cortex-A8 command group",
-               .chain = cortex_a8_exec_command_handlers,
+               .help = "Cortex-A command group",
+               .usage = "",
+               .chain = cortex_a_exec_command_handlers,
        },
        COMMAND_REGISTRATION_DONE
 };
 
-struct target_type cortexa8_target = {
-       .name = "cortex_a8",
+struct target_type cortexa_target = {
+       .name = "cortex_a",
+       .deprecated_name = "cortex_a8",
 
-       .poll = cortex_a8_poll,
+       .poll = cortex_a_poll,
        .arch_state = armv7a_arch_state,
 
-       .target_request_data = NULL,
+       .halt = cortex_a_halt,
+       .resume = cortex_a_resume,
+       .step = cortex_a_step,
 
-       .halt = cortex_a8_halt,
-       .resume = cortex_a8_resume,
-       .step = cortex_a8_step,
-
-       .assert_reset = cortex_a8_assert_reset,
-       .deassert_reset = cortex_a8_deassert_reset,
-       .soft_reset_halt = NULL,
+       .assert_reset = cortex_a_assert_reset,
+       .deassert_reset = cortex_a_deassert_reset,
 
        /* REVISIT allow exporting VFP3 registers ... */
        .get_gdb_reg_list = arm_get_gdb_reg_list,
 
-       .read_memory = cortex_a8_read_memory,
-       .write_memory = cortex_a8_write_memory,
-       .bulk_write_memory = cortex_a8_bulk_write_memory,
+       .read_memory = cortex_a_read_memory,
+       .write_memory = cortex_a_write_memory,
+
+       .read_buffer = cortex_a_read_buffer,
+       .write_buffer = cortex_a_write_buffer,
 
        .checksum_memory = arm_checksum_memory,
        .blank_check_memory = arm_blank_check_memory,
 
        .run_algorithm = armv4_5_run_algorithm,
 
-       .add_breakpoint = cortex_a8_add_breakpoint,
-       .add_context_breakpoint = cortex_a8_add_context_breakpoint,
-       .add_hybrid_breakpoint = cortex_a8_add_hybrid_breakpoint,
-       .remove_breakpoint = cortex_a8_remove_breakpoint,
+       .add_breakpoint = cortex_a_add_breakpoint,
+       .add_context_breakpoint = cortex_a_add_context_breakpoint,
+       .add_hybrid_breakpoint = cortex_a_add_hybrid_breakpoint,
+       .remove_breakpoint = cortex_a_remove_breakpoint,
        .add_watchpoint = NULL,
        .remove_watchpoint = NULL,
 
-       .commands = cortex_a8_command_handlers,
-       .target_create = cortex_a8_target_create,
-       .init_target = cortex_a8_init_target,
-       .examine = cortex_a8_examine,
+       .commands = cortex_a_command_handlers,
+       .target_create = cortex_a_target_create,
+       .target_jim_configure = adiv5_jim_configure,
+       .init_target = cortex_a_init_target,
+       .examine = cortex_a_examine,
+       .deinit_target = cortex_a_deinit_target,
+
+       .read_phys_memory = cortex_a_read_phys_memory,
+       .write_phys_memory = cortex_a_write_phys_memory,
+       .mmu = cortex_a_mmu,
+       .virt2phys = cortex_a_virt2phys,
+};
+
+static const struct command_registration cortex_r4_exec_command_handlers[] = {
+       {
+               .name = "dbginit",
+               .handler = cortex_a_handle_dbginit_command,
+               .mode = COMMAND_EXEC,
+               .help = "Initialize core debug",
+               .usage = "",
+       },
+       {
+               .name = "maskisr",
+               .handler = handle_cortex_a_mask_interrupts_command,
+               .mode = COMMAND_EXEC,
+               .help = "mask cortex_r4 interrupts",
+               .usage = "['on'|'off']",
+       },
+
+       COMMAND_REGISTRATION_DONE
+};
+static const struct command_registration cortex_r4_command_handlers[] = {
+       {
+               .chain = arm_command_handlers,
+       },
+       {
+               .name = "cortex_r4",
+               .mode = COMMAND_ANY,
+               .help = "Cortex-R4 command group",
+               .usage = "",
+               .chain = cortex_r4_exec_command_handlers,
+       },
+       COMMAND_REGISTRATION_DONE
+};
+
+struct target_type cortexr4_target = {
+       .name = "cortex_r4",
+
+       .poll = cortex_a_poll,
+       .arch_state = armv7a_arch_state,
+
+       .halt = cortex_a_halt,
+       .resume = cortex_a_resume,
+       .step = cortex_a_step,
+
+       .assert_reset = cortex_a_assert_reset,
+       .deassert_reset = cortex_a_deassert_reset,
+
+       /* REVISIT allow exporting VFP3 registers ... */
+       .get_gdb_reg_list = arm_get_gdb_reg_list,
+
+       .read_memory = cortex_a_read_phys_memory,
+       .write_memory = cortex_a_write_phys_memory,
+
+       .checksum_memory = arm_checksum_memory,
+       .blank_check_memory = arm_blank_check_memory,
+
+       .run_algorithm = armv4_5_run_algorithm,
+
+       .add_breakpoint = cortex_a_add_breakpoint,
+       .add_context_breakpoint = cortex_a_add_context_breakpoint,
+       .add_hybrid_breakpoint = cortex_a_add_hybrid_breakpoint,
+       .remove_breakpoint = cortex_a_remove_breakpoint,
+       .add_watchpoint = NULL,
+       .remove_watchpoint = NULL,
 
-       .read_phys_memory = cortex_a8_read_phys_memory,
-       .write_phys_memory = cortex_a8_write_phys_memory,
-       .mmu = cortex_a8_mmu,
-       .virt2phys = cortex_a8_virt2phys,
+       .commands = cortex_r4_command_handlers,
+       .target_create = cortex_r4_target_create,
+       .target_jim_configure = adiv5_jim_configure,
+       .init_target = cortex_a_init_target,
+       .examine = cortex_a_examine,
+       .deinit_target = cortex_a_deinit_target,
 };