From 927e53f8d5e6ebaf52be82cb7300cbbb471a92bb Mon Sep 17 00:00:00 2001 From: Evan Hunter Date: Tue, 23 Oct 2012 15:48:41 +1100 Subject: [PATCH] cortex_a : optimize apb read/write access. Rewrite: Adheres more closely to 'fast read/write' examples in TRM. up to 50x faster Change-Id: Ieb4da57d8367628f3e7306827a5b1f0ab550e641 Signed-off-by: Evan Hunter Reviewed-on: http://openocd.zylin.com/903 Tested-by: jenkins Reviewed-by: Michel JAOUEN Reviewed-by: Freddie Chopin Reviewed-by: Spencer Oliver --- src/target/arm_adi_v5.c | 43 ++++-- src/target/arm_adi_v5.h | 10 +- src/target/arm_dpm.h | 63 ++++++-- src/target/arm_opcodes.h | 30 ++++ src/target/cortex_a.c | 305 +++++++++++++++++++++++++++++++-------- src/target/cortex_m.c | 4 +- 6 files changed, 365 insertions(+), 90 deletions(-) diff --git a/src/target/arm_adi_v5.c b/src/target/arm_adi_v5.c index a0fd4cb4..b17fd410 100644 --- a/src/target/arm_adi_v5.c +++ b/src/target/arm_adi_v5.c @@ -262,16 +262,17 @@ int mem_ap_write_atomic_u32(struct adiv5_dap *dap, uint32_t address, /***************************************************************************** * * -* mem_ap_write_buf(struct adiv5_dap *dap, uint8_t *buffer, int count, uint32_t address) * +* mem_ap_write_buf(struct adiv5_dap *dap, uint8_t *buffer, int count, uint32_t address, bool addr_incr) * * * * Write a buffer in target order (little endian) * * * *****************************************************************************/ -int mem_ap_write_buf_u32(struct adiv5_dap *dap, const uint8_t *buffer, int count, uint32_t address) +int mem_ap_write_buf_u32(struct adiv5_dap *dap, const uint8_t *buffer, int count, uint32_t address, bool addr_incr) { int wcount, blocksize, writecount, errorcount = 0, retval = ERROR_OK; uint32_t adr = address; const uint8_t *pBuffer = buffer; + uint32_t incr_flag = CSW_ADDRINC_OFF; count >>= 2; wcount = count; @@ -302,7 +303,10 @@ int mem_ap_write_buf_u32(struct adiv5_dap *dap, const uint8_t *buffer, int count if (blocksize == 0) blocksize = 1; - retval = dap_setup_accessport(dap, CSW_32BIT | CSW_ADDRINC_SINGLE, address); + if (addr_incr) + incr_flag = CSW_ADDRINC_SINGLE; + + retval = dap_setup_accessport(dap, CSW_32BIT | incr_flag, address); if (retval != ERROR_OK) return retval; @@ -317,7 +321,8 @@ int mem_ap_write_buf_u32(struct adiv5_dap *dap, const uint8_t *buffer, int count retval = dap_run(dap); if (retval == ERROR_OK) { wcount = wcount - blocksize; - address = address + 4 * blocksize; + if (addr_incr) + address = address + 4 * blocksize; buffer = buffer + 4 * blocksize; } else errorcount++; @@ -547,14 +552,16 @@ extern int adi_jtag_dp_scan(struct adiv5_dap *dap, * @param buffer where the words will be stored (in host byte order). * @param count How many words to read. * @param address Memory address from which to read words; all the + * @param addr_incr if true, increment the source address for each u32 * words must be readable by the currently selected MEM-AP. */ int mem_ap_read_buf_u32(struct adiv5_dap *dap, uint8_t *buffer, - int count, uint32_t address) + int count, uint32_t address, bool addr_incr) { int wcount, blocksize, readcount, errorcount = 0, retval = ERROR_OK; uint32_t adr = address; uint8_t *pBuffer = buffer; + uint32_t incr_flag = CSW_ADDRINC_OFF; count >>= 2; wcount = count; @@ -573,7 +580,10 @@ int mem_ap_read_buf_u32(struct adiv5_dap *dap, uint8_t *buffer, if (blocksize == 0) blocksize = 1; - retval = dap_setup_accessport(dap, CSW_32BIT | CSW_ADDRINC_SINGLE, + if (addr_incr) + incr_flag = CSW_ADDRINC_SINGLE; + + retval = dap_setup_accessport(dap, CSW_32BIT | incr_flag, address); if (retval != ERROR_OK) return retval; @@ -622,7 +632,8 @@ int mem_ap_read_buf_u32(struct adiv5_dap *dap, uint8_t *buffer, return retval; } wcount = wcount - blocksize; - address += 4 * blocksize; + if (addr_incr) + address += 4 * blocksize; buffer += 4 * blocksize; } @@ -881,11 +892,18 @@ int mem_ap_sel_read_buf_u16(struct adiv5_dap *swjdp, uint8_t ap, return mem_ap_read_buf_u16(swjdp, buffer, count, address); } +int mem_ap_sel_read_buf_u32_noincr(struct adiv5_dap *swjdp, uint8_t ap, + uint8_t *buffer, int count, uint32_t address) +{ + dap_ap_select(swjdp, ap); + return mem_ap_read_buf_u32(swjdp, buffer, count, address, false); +} + int mem_ap_sel_read_buf_u32(struct adiv5_dap *swjdp, uint8_t ap, uint8_t *buffer, int count, uint32_t address) { dap_ap_select(swjdp, ap); - return mem_ap_read_buf_u32(swjdp, buffer, count, address); + return mem_ap_read_buf_u32(swjdp, buffer, count, address, true); } int mem_ap_sel_write_buf_u8(struct adiv5_dap *swjdp, uint8_t ap, @@ -906,7 +924,14 @@ int mem_ap_sel_write_buf_u32(struct adiv5_dap *swjdp, uint8_t ap, const uint8_t *buffer, int count, uint32_t address) { dap_ap_select(swjdp, ap); - return mem_ap_write_buf_u32(swjdp, buffer, count, address); + return mem_ap_write_buf_u32(swjdp, buffer, count, address, true); +} + +int mem_ap_sel_write_buf_u32_noincr(struct adiv5_dap *swjdp, uint8_t ap, + const uint8_t *buffer, int count, uint32_t address) +{ + dap_ap_select(swjdp, ap); + return mem_ap_write_buf_u32(swjdp, buffer, count, address, false); } #define MDM_REG_STAT 0x00 diff --git a/src/target/arm_adi_v5.h b/src/target/arm_adi_v5.h index 37b77712..bab3292a 100644 --- a/src/target/arm_adi_v5.h +++ b/src/target/arm_adi_v5.h @@ -374,14 +374,14 @@ int mem_ap_read_buf_u8(struct adiv5_dap *swjdp, int mem_ap_read_buf_u16(struct adiv5_dap *swjdp, uint8_t *buffer, int count, uint32_t address); int mem_ap_read_buf_u32(struct adiv5_dap *swjdp, - uint8_t *buffer, int count, uint32_t address); + uint8_t *buffer, int count, uint32_t address, bool addr_incr); int mem_ap_write_buf_u8(struct adiv5_dap *swjdp, const uint8_t *buffer, int count, uint32_t address); int mem_ap_write_buf_u16(struct adiv5_dap *swjdp, const uint8_t *buffer, int count, uint32_t address); int mem_ap_write_buf_u32(struct adiv5_dap *swjdp, - const uint8_t *buffer, int count, uint32_t address); + const uint8_t *buffer, int count, uint32_t address, bool addr_incr); /* Queued MEM-AP memory mapped single word transfers with selection of ap */ int mem_ap_sel_read_u32(struct adiv5_dap *swjdp, uint8_t ap, @@ -395,6 +395,12 @@ int mem_ap_sel_read_atomic_u32(struct adiv5_dap *swjdp, uint8_t ap, int mem_ap_sel_write_atomic_u32(struct adiv5_dap *swjdp, uint8_t ap, uint32_t address, uint32_t value); +/* Non incrementing buffer functions for accessing fifos */ +int mem_ap_sel_read_buf_u32_noincr(struct adiv5_dap *swjdp, uint8_t ap, + uint8_t *buffer, int count, uint32_t address); +int mem_ap_sel_write_buf_u32_noincr(struct adiv5_dap *swjdp, uint8_t ap, + const uint8_t *buffer, int count, uint32_t address); + /* MEM-AP memory mapped bus block transfers with selection of ap */ int mem_ap_sel_read_buf_u8(struct adiv5_dap *swjdp, uint8_t ap, uint8_t *buffer, int count, uint32_t address); diff --git a/src/target/arm_dpm.h b/src/target/arm_dpm.h index 66b2b1df..8d2638e2 100644 --- a/src/target/arm_dpm.h +++ b/src/target/arm_dpm.h @@ -140,21 +140,58 @@ int arm_dpm_write_dirty_registers(struct arm_dpm *, bool bpwp); void arm_dpm_report_wfar(struct arm_dpm *, uint32_t wfar); -/* Subset of DSCR bits; see ARMv7a arch spec section C10.3.1. +/* DSCR bits; see ARMv7a arch spec section C10.3.1. * Not all v7 bits are valid in v6. */ -#define DSCR_CORE_HALTED (1 << 0) -#define DSCR_CORE_RESTARTED (1 << 1) -#define DSCR_INT_DIS (1 << 11) -#define DSCR_ITR_EN (1 << 13) -#define DSCR_HALT_DBG_MODE (1 << 14) -#define DSCR_MON_DBG_MODE (1 << 15) -#define DSCR_INSTR_COMP (1 << 24) -#define DSCR_DTR_TX_FULL (1 << 29) -#define DSCR_DTR_RX_FULL (1 << 30) - -#define DSCR_ENTRY(dscr) (((dscr) >> 2) & 0xf) -#define DSCR_RUN_MODE(dscr) ((dscr) & (DSCR_CORE_HALTED | DSCR_CORE_RESTARTED)) +#define DSCR_CORE_HALTED (0x1 << 0) +#define DSCR_CORE_RESTARTED (0x1 << 1) +#define DSCR_ENTRY_MASK (0xF << 2) +#define DSCR_STICKY_ABORT_PRECISE (0x1 << 6) +#define DSCR_STICKY_ABORT_IMPRECISE (0x1 << 7) +#define DSCR_STICKY_UNDEFINED (0x1 << 8) +#define DSCR_DBG_NOPWRDWN (0x1 << 9) /* v6 only */ +#define DSCR_DBG_ACK (0x1 << 10) +#define DSCR_INT_DIS (0x1 << 11) +#define DSCR_CP14_USR_COMMS (0x1 << 12) +#define DSCR_ITR_EN (0x1 << 13) +#define DSCR_HALT_DBG_MODE (0x1 << 14) +#define DSCR_MON_DBG_MODE (0x1 << 15) +#define DSCR_SEC_PRIV_INVASV_DIS (0x1 << 16) +#define DSCR_SEC_PRIV_NINVASV_DIS (0x1 << 17) +#define DSCR_NON_SECURE (0x1 << 18) +#define DSCR_DSCRD_IMPRECISE_ABORT (0x1 << 19) +#define DSCR_EXT_DCC_MASK (0x3 << 20) /* DTR mode */ /* bits 22, 23 are reserved */ +#define DSCR_INSTR_COMP (0x1 << 24) +#define DSCR_PIPE_ADVANCE (0x1 << 25) +#define DSCR_DTRTX_FULL_LATCHED (0x1 << 26) +#define DSCR_DTRRX_FULL_LATCHED (0x1 << 27) /* bit 28 is reserved */ +#define DSCR_DTR_TX_FULL (0x1 << 29) +#define DSCR_DTR_RX_FULL (0x1 << 30) /* bit 31 is reserved */ + +#define DSCR_ENTRY(dscr) (((dscr) >> 2) & 0xf) +#define DSCR_RUN_MODE(dscr) ((dscr) & (DSCR_CORE_HALTED | DSCR_CORE_RESTARTED)) + + +/* Methods of entry into debug mode */ +#define DSCR_ENTRY_HALT_REQ (0x0 << 2) +#define DSCR_ENTRY_BREAKPOINT (0x1 << 2) +#define DSCR_ENTRY_IMPRECISE_WATCHPT (0x2 << 2) +#define DSCR_ENTRY_BKPT_INSTR (0x3 << 2) +#define DSCR_ENTRY_EXT_DBG_REQ (0x4 << 2) +#define DSCR_ENTRY_VECT_CATCH (0x5 << 2) +#define DSCR_ENTRY_D_SIDE_ABORT (0x6 << 2) /* v6 only */ +#define DSCR_ENTRY_I_SIDE_ABORT (0x7 << 2) /* v6 only */ +#define DSCR_ENTRY_OS_UNLOCK (0x8 << 2) +#define DSCR_ENTRY_PRECISE_WATCHPT (0xA << 2) + +/* DTR modes */ +#define DSCR_EXT_DCC_NON_BLOCKING (0x0 << 20) +#define DSCR_EXT_DCC_STALL_MODE (0x1 << 20) +#define DSCR_EXT_DCC_FAST_MODE (0x2 << 20) /* bits 22, 23 are reserved */ + + + + /* DRCR (debug run control register) bits */ #define DRCR_HALT (1 << 0) diff --git a/src/target/arm_opcodes.h b/src/target/arm_opcodes.h index 12a9ca8d..976cab94 100644 --- a/src/target/arm_opcodes.h +++ b/src/target/arm_opcodes.h @@ -133,6 +133,36 @@ */ #define ARMV4_5_BX(Rm) (0xe12fff10 | (Rm)) +/* Store data from coprocessor to consecutive memory + * See Armv7-A arch doc section A8.6.187 + * P: 1=index mode (offset from Rn) + * U: 1=add, 0=subtract Rn address with imm + * D: Opcode D encoding + * W: write back the offset start address to the Rn register + * CP: Coprocessor number (4 bits) + * CRd: Coprocessor source register (4 bits) + * Rn: Base register for memory address (4 bits) + * imm: Immediate value (0 - 1020, must be divisible by 4) + */ +#define ARMV4_5_STC(P, U, D, W, CP, CRd, Rn, imm) \ + (0xec000000 | ((P) << 24) | ((U) << 23) | ((D) << 22) | \ + ((W) << 21) | ((Rn) << 16) | ((CRd) << 12) | ((CP) << 8) | ((imm)>>2)) + +/* Loads data from consecutive memory to coprocessor + * See Armv7-A arch doc section A8.6.51 + * P: 1=index mode (offset from Rn) + * U: 1=add, 0=subtract Rn address with imm + * D: Opcode D encoding + * W: write back the offset start address to the Rn register + * CP: Coprocessor number (4 bits) + * CRd: Coprocessor dest register (4 bits) + * Rn: Base register for memory address (4 bits) + * imm: Immediate value (0 - 1020, must be divisible by 4) + */ +#define ARMV4_5_LDC(P, U, D, W, CP, CRd, Rn, imm) \ + (0xec100000 | ((P) << 24) | ((U) << 23) | ((D) << 22) | \ + ((W) << 21) | ((Rn) << 16) | ((CRd) << 12) | ((CP) << 8) | ((imm) >> 2)) + /* Move to ARM register from coprocessor * CP: Coprocessor number * op1: Coprocessor opcode diff --git a/src/target/cortex_a.c b/src/target/cortex_a.c index 3923b3d1..cc1552d6 100644 --- a/src/target/cortex_a.c +++ b/src/target/cortex_a.c @@ -67,6 +67,8 @@ static int cortex_a8_dap_write_coreregister_u32(struct target *target, static int cortex_a8_mmu(struct target *target, int *enabled); static int cortex_a8_virt2phys(struct target *target, uint32_t virt, uint32_t *phys); +static int cortex_a8_read_apb_ab_memory(struct target *target, + uint32_t address, uint32_t size, uint32_t count, uint8_t *buffer); /* * FIXME do topology discovery using the ROM; don't @@ -1787,127 +1789,302 @@ static int cortex_a8_write_apb_ab_memory(struct target *target, int retval = ERROR_COMMAND_SYNTAX_ERROR; struct armv7a_common *armv7a = target_to_armv7a(target); struct arm *arm = &armv7a->arm; + struct adiv5_dap *swjdp = armv7a->arm.dap; int total_bytes = count * size; - int start_byte, nbytes_to_write, i; + int total_u32; + int start_byte = address & 0x3; + int end_byte = (address + total_bytes) & 0x3; struct reg *reg; - union _data { - uint8_t uc_a[4]; - uint32_t ui; - } data; + uint32_t dscr; + uint8_t *tmp_buff = NULL; if (target->state != TARGET_HALTED) { LOG_WARNING("target not halted"); return ERROR_TARGET_NOT_HALTED; } + total_u32 = DIV_ROUND_UP((address & 3) + total_bytes, 4); + + /* Mark register R0 as dirty, as it will be used + * for transferring the data. + * It will be restored automatically when exiting + * debug mode + */ reg = arm_reg_current(arm, 0); - reg->dirty = 1; - reg = arm_reg_current(arm, 1); - reg->dirty = 1; + reg->dirty = true; - retval = cortex_a8_dap_write_coreregister_u32(target, address & 0xFFFFFFFC, 0); + /* clear any abort */ + retval = mem_ap_sel_write_atomic_u32(swjdp, armv7a->debug_ap, armv7a->debug_base + CPUDBG_DRCR, 1<<2); if (retval != ERROR_OK) return retval; - start_byte = address & 0x3; + /* This algorithm comes from either : + * Cortex-A8 TRM Example 12-25 + * Cortex-R4 TRM Example 11-26 + * (slight differences) + */ - while (total_bytes > 0) { + /* The algorithm only copies 32 bit words, so the buffer + * should be expanded to include the words at either end. + * The first and last words will be read first to avoid + * corruption if needed. + */ + tmp_buff = (uint8_t *) malloc(total_u32 << 2); - nbytes_to_write = 4 - start_byte; - if (total_bytes < nbytes_to_write) - nbytes_to_write = total_bytes; - if (nbytes_to_write != 4) { + if ((start_byte != 0) && (total_u32 > 1)) { + /* First bytes not aligned - read the 32 bit word to avoid corrupting + * the other bytes in the word. + */ + retval = cortex_a8_read_apb_ab_memory(target, (address & ~0x3), 4, 1, tmp_buff); + if (retval != ERROR_OK) + goto error_free_buff_w; + } - /* execute instruction LDR r1, [r0] */ - retval = cortex_a8_exec_opcode(target, ARMV4_5_LDR(1, 0), NULL); - if (retval != ERROR_OK) - return retval; + /* If end of write is not aligned, or the write is less than 4 bytes */ + if ((end_byte != 0) || + ((total_u32 == 1) && (total_bytes != 4))) { - retval = cortex_a8_dap_read_coreregister_u32(target, &data.ui, 1); - if (retval != ERROR_OK) - return retval; - } + /* Read the last word to avoid corruption during 32 bit write */ + int mem_offset = (total_u32-1) << 4; + retval = cortex_a8_read_apb_ab_memory(target, (address & ~0x3) + mem_offset, 4, 1, &tmp_buff[mem_offset]); + if (retval != ERROR_OK) + goto error_free_buff_w; + } - for (i = 0; i < nbytes_to_write; ++i) - data.uc_a[i + start_byte] = *buffer++; + /* Copy the write buffer over the top of the temporary buffer */ + memcpy(&tmp_buff[start_byte], buffer, total_bytes); - retval = cortex_a8_dap_write_coreregister_u32(target, data.ui, 1); - if (retval != ERROR_OK) - return retval; + /* We now have a 32 bit aligned buffer that can be written */ - /* execute instruction STRW r1, [r0], 1 (0xe4801004) */ - retval = cortex_a8_exec_opcode(target, ARMV4_5_STRW_IP(1, 0), NULL); - if (retval != ERROR_OK) - return retval; + /* Read DSCR */ + retval = mem_ap_sel_read_atomic_u32(swjdp, armv7a->debug_ap, + armv7a->debug_base + CPUDBG_DSCR, &dscr); + if (retval != ERROR_OK) + goto error_free_buff_w; + + /* Set DTR mode to Fast (2) */ + dscr = (dscr & ~DSCR_EXT_DCC_MASK) | DSCR_EXT_DCC_FAST_MODE; + retval = mem_ap_sel_write_atomic_u32(swjdp, armv7a->debug_ap, + armv7a->debug_base + CPUDBG_DSCR, dscr); + if (retval != ERROR_OK) + goto error_free_buff_w; + + /* Copy the destination address into R0 */ + /* - pend an instruction MRC p14, 0, R0, c5, c0 */ + retval = mem_ap_sel_write_atomic_u32(swjdp, armv7a->debug_ap, + armv7a->debug_base + CPUDBG_ITR, ARMV4_5_MRC(14, 0, 0, 0, 5, 0)); + if (retval != ERROR_OK) + goto error_unset_dtr_w; + /* Write address into DTRRX, which triggers previous instruction */ + retval = mem_ap_sel_write_atomic_u32(swjdp, armv7a->debug_ap, + armv7a->debug_base + CPUDBG_DTRRX, address & (~0x3)); + if (retval != ERROR_OK) + goto error_unset_dtr_w; + + /* Write the data transfer instruction into the ITR + * (STC p14, c5, [R0], 4) + */ + retval = mem_ap_sel_write_atomic_u32(swjdp, armv7a->debug_ap, + armv7a->debug_base + CPUDBG_ITR, ARMV4_5_STC(0, 1, 0, 1, 14, 5, 0, 4)); + if (retval != ERROR_OK) + goto error_unset_dtr_w; + + /* Do the write */ + retval = mem_ap_sel_write_buf_u32_noincr(swjdp, armv7a->debug_ap, + tmp_buff, (total_u32)<<2, armv7a->debug_base + CPUDBG_DTRRX); + if (retval != ERROR_OK) + goto error_unset_dtr_w; + + + /* Switch DTR mode back to non-blocking (0) */ + dscr = (dscr & ~DSCR_EXT_DCC_MASK) | DSCR_EXT_DCC_NON_BLOCKING; + retval = mem_ap_sel_write_atomic_u32(swjdp, armv7a->debug_ap, + armv7a->debug_base + CPUDBG_DSCR, dscr); + if (retval != ERROR_OK) + goto error_unset_dtr_w; - total_bytes -= nbytes_to_write; - start_byte = 0; + /* Check for sticky abort flags in the DSCR */ + retval = mem_ap_sel_read_atomic_u32(swjdp, armv7a->debug_ap, + armv7a->debug_base + CPUDBG_DSCR, &dscr); + if (retval != ERROR_OK) + goto error_free_buff_w; + if (dscr & (DSCR_STICKY_ABORT_PRECISE | DSCR_STICKY_ABORT_IMPRECISE)) { + /* Abort occurred - clear it and exit */ + LOG_ERROR("abort occurred - dscr = 0x%08x", dscr); + mem_ap_sel_write_atomic_u32(swjdp, armv7a->debug_ap, + armv7a->debug_base + CPUDBG_DRCR, 1<<2); + goto error_free_buff_w; } - return retval; -} + /* Done */ + free(tmp_buff); + return ERROR_OK; +error_unset_dtr_w: + /* Unset DTR mode */ + mem_ap_sel_read_atomic_u32(swjdp, armv7a->debug_ap, + armv7a->debug_base + CPUDBG_DSCR, &dscr); + dscr = (dscr & ~DSCR_EXT_DCC_MASK) | DSCR_EXT_DCC_NON_BLOCKING; + mem_ap_sel_write_atomic_u32(swjdp, armv7a->debug_ap, + armv7a->debug_base + CPUDBG_DSCR, dscr); +error_free_buff_w: + LOG_ERROR("error"); + free(tmp_buff); + return ERROR_FAIL; +} static int cortex_a8_read_apb_ab_memory(struct target *target, uint32_t address, uint32_t size, uint32_t count, uint8_t *buffer) { - /* read memory through APB-AP */ int retval = ERROR_COMMAND_SYNTAX_ERROR; struct armv7a_common *armv7a = target_to_armv7a(target); + struct adiv5_dap *swjdp = armv7a->arm.dap; struct arm *arm = &armv7a->arm; int total_bytes = count * size; - int start_byte, nbytes_to_read, i; + int total_u32; + int start_byte = address & 0x3; struct reg *reg; - union _data { - uint8_t uc_a[4]; - uint32_t ui; - } data; - + uint32_t dscr; + char *tmp_buff = NULL; + uint32_t buff32[2]; if (target->state != TARGET_HALTED) { LOG_WARNING("target not halted"); return ERROR_TARGET_NOT_HALTED; } + total_u32 = DIV_ROUND_UP((address & 3) + total_bytes, 4); + + /* Mark register R0 as dirty, as it will be used + * for transferring the data. + * It will be restored automatically when exiting + * debug mode + */ reg = arm_reg_current(arm, 0); - reg->dirty = 1; - reg = arm_reg_current(arm, 1); - reg->dirty = 1; + reg->dirty = true; - retval = cortex_a8_dap_write_coreregister_u32(target, address & 0xFFFFFFFC, 0); + /* clear any abort */ + retval = + mem_ap_sel_write_atomic_u32(swjdp, armv7a->debug_ap, armv7a->debug_base + CPUDBG_DRCR, 1<<2); if (retval != ERROR_OK) return retval; - start_byte = address & 0x3; + /* Read DSCR */ + retval = mem_ap_sel_read_atomic_u32(swjdp, armv7a->debug_ap, + armv7a->debug_base + CPUDBG_DSCR, &dscr); - while (total_bytes > 0) { + /* This algorithm comes from either : + * Cortex-A8 TRM Example 12-24 + * Cortex-R4 TRM Example 11-25 + * (slight differences) + */ + + /* Set DTR access mode to stall mode b01 */ + dscr = (dscr & ~DSCR_EXT_DCC_MASK) | DSCR_EXT_DCC_STALL_MODE; + retval += mem_ap_sel_write_atomic_u32(swjdp, armv7a->debug_ap, + armv7a->debug_base + CPUDBG_DSCR, dscr); + + /* Write R0 with value 'address' using write procedure for stall mode */ + /* - Write the address for read access into DTRRX */ + retval += mem_ap_sel_write_atomic_u32(swjdp, armv7a->debug_ap, + armv7a->debug_base + CPUDBG_DTRRX, address & ~0x3); + /* - Copy value from DTRRX to R0 using instruction mrc p14, 0, r0, c5, c0 */ + cortex_a8_exec_opcode(target, ARMV4_5_MRC(14, 0, 0, 0, 5, 0), &dscr); + + + /* Write the data transfer instruction (ldc p14, c5, [r0],4) + * and the DTR mode setting to fast mode + * in one combined write (since they are adjacent registers) + */ + buff32[0] = ARMV4_5_LDC(0, 1, 0, 1, 14, 5, 0, 4); + dscr = (dscr & ~DSCR_EXT_DCC_MASK) | DSCR_EXT_DCC_FAST_MODE; + buff32[1] = dscr; + /* group the 2 access CPUDBG_ITR 0x84 and CPUDBG_DSCR 0x88 */ + retval += mem_ap_sel_write_buf_u32(swjdp, armv7a->debug_ap, (uint8_t *)buff32, 8, + armv7a->debug_base + CPUDBG_ITR); + if (retval != ERROR_OK) + goto error_unset_dtr_r; + + + /* Due to offset word alignment, the buffer may not have space + * to read the full first and last int32 words, + * hence, malloc space to read into, then copy and align into the buffer. + */ + tmp_buff = (char *) malloc(total_u32<<2); - /* execute instruction LDRW r1, [r0], 4 (0xe4901004) */ - retval = cortex_a8_exec_opcode(target, ARMV4_5_LDRW_IP(1, 0), NULL); + /* The last word needs to be handled separately - read all other words in one go. + */ + if (total_u32 > 1) { + /* Read the data - Each read of the DTRTX register causes the instruction to be reissued + * Abort flags are sticky, so can be read at end of transactions + * + * This data is read in aligned to 32 bit boundary, hence may need shifting later. + */ + retval = mem_ap_sel_read_buf_u32_noincr(swjdp, armv7a->debug_ap, (uint8_t *)tmp_buff, (total_u32-1)<<2, + armv7a->debug_base + CPUDBG_DTRTX); if (retval != ERROR_OK) - return retval; + goto error_unset_dtr_r; + } - retval = cortex_a8_dap_read_coreregister_u32(target, &data.ui, 1); + /* set DTR access mode back to non blocking b00 */ + dscr = (dscr & ~DSCR_EXT_DCC_MASK) | DSCR_EXT_DCC_NON_BLOCKING; + retval = mem_ap_sel_write_atomic_u32(swjdp, armv7a->debug_ap, + armv7a->debug_base + CPUDBG_DSCR, dscr); + if (retval != ERROR_OK) + goto error_free_buff_r; + + /* Wait for the final read instruction to finish */ + do { + retval = mem_ap_sel_read_atomic_u32(swjdp, armv7a->debug_ap, + armv7a->debug_base + CPUDBG_DSCR, &dscr); if (retval != ERROR_OK) - return retval; + goto error_free_buff_r; + } while ((dscr & DSCR_INSTR_COMP) == 0); + - nbytes_to_read = 4 - start_byte; - if (total_bytes < nbytes_to_read) - nbytes_to_read = total_bytes; + /* Check for sticky abort flags in the DSCR */ + retval = mem_ap_sel_read_atomic_u32(swjdp, armv7a->debug_ap, + armv7a->debug_base + CPUDBG_DSCR, &dscr); + if (retval != ERROR_OK) + goto error_free_buff_r; + if (dscr & (DSCR_STICKY_ABORT_PRECISE | DSCR_STICKY_ABORT_IMPRECISE)) { + /* Abort occurred - clear it and exit */ + LOG_ERROR("abort occurred - dscr = 0x%08x", dscr); + mem_ap_sel_write_atomic_u32(swjdp, armv7a->debug_ap, + armv7a->debug_base + CPUDBG_DRCR, 1<<2); + goto error_free_buff_r; + } + + /* Read the last word */ + retval = mem_ap_sel_read_atomic_u32(swjdp, armv7a->debug_ap, + armv7a->debug_base + CPUDBG_DTRTX, (uint32_t *)&tmp_buff[(total_u32-1)<<2]); + if (retval != ERROR_OK) + goto error_free_buff_r; - for (i = 0; i < nbytes_to_read; ++i) - *buffer++ = data.uc_a[i + start_byte]; + /* Copy and align the data into the output buffer */ + memcpy(buffer, &tmp_buff[start_byte], total_bytes); - total_bytes -= nbytes_to_read; - start_byte = 0; - } + free(tmp_buff); + + /* Done */ + return ERROR_OK; - return retval; -} +error_unset_dtr_r: + /* Unset DTR mode */ + mem_ap_sel_read_atomic_u32(swjdp, armv7a->debug_ap, + armv7a->debug_base + CPUDBG_DSCR, &dscr); + dscr = (dscr & ~DSCR_EXT_DCC_MASK) | DSCR_EXT_DCC_NON_BLOCKING; + mem_ap_sel_write_atomic_u32(swjdp, armv7a->debug_ap, + armv7a->debug_base + CPUDBG_DSCR, dscr); +error_free_buff_r: + LOG_ERROR("error"); + free(tmp_buff); + return ERROR_FAIL; +} /* diff --git a/src/target/cortex_m.c b/src/target/cortex_m.c index 3a823fe6..1d645c67 100644 --- a/src/target/cortex_m.c +++ b/src/target/cortex_m.c @@ -1620,7 +1620,7 @@ static int cortex_m3_read_memory(struct target *target, uint32_t address, if (count && buffer) { switch (size) { case 4: - retval = mem_ap_read_buf_u32(swjdp, buffer, 4 * count, address); + retval = mem_ap_read_buf_u32(swjdp, buffer, 4 * count, address, true); break; case 2: retval = mem_ap_read_buf_u16(swjdp, buffer, 2 * count, address); @@ -1650,7 +1650,7 @@ static int cortex_m3_write_memory(struct target *target, uint32_t address, if (count && buffer) { switch (size) { case 4: - retval = mem_ap_write_buf_u32(swjdp, buffer, 4 * count, address); + retval = mem_ap_write_buf_u32(swjdp, buffer, 4 * count, address, true); break; case 2: retval = mem_ap_write_buf_u16(swjdp, buffer, 2 * count, address); -- 2.39.5