From a89dd2ca6542d157890656d918d6ae2f933ab401 Mon Sep 17 00:00:00 2001 From: oharboe Date: Thu, 3 Sep 2009 08:23:39 +0000 Subject: [PATCH] David Brownell Abstract the orion_nand_fast_block_write() routine into a separate routine -- arm_nandwrite() -- so that other ARM cores can reuse it. Have davinci_nand do so. This faster than byte-at-a-time ops by a factor of three (!), even given the slowish interactions to support hardware ECC (1-bit flavor in that test) each 512 bytes; those could be read more efficiently by on-chip code. NOTE that until there's a generic "ARM algorithm" structure, this can't work on newer ARMv6 (like ARM1136) or ARMv7A (like Cortex-A8) cores, though the downloaded code itself would work just fine there. git-svn-id: svn://svn.berlios.de/openocd/trunk@2663 b42882b7-edfa-0310-969c-e2dbd0fdcd60 --- src/flash/Makefile.am | 2 + src/flash/arm_nandio.c | 131 +++++++++++++++++++++++++++++++++++++++ src/flash/arm_nandio.h | 25 ++++++++ src/flash/davinci_nand.c | 23 ++++++- src/flash/nand.h | 1 + src/flash/orion_nand.c | 83 ++++--------------------- 6 files changed, 191 insertions(+), 74 deletions(-) create mode 100644 src/flash/arm_nandio.c create mode 100644 src/flash/arm_nandio.h diff --git a/src/flash/Makefile.am b/src/flash/Makefile.am index 60a322aa..bf39b2e4 100644 --- a/src/flash/Makefile.am +++ b/src/flash/Makefile.am @@ -6,6 +6,7 @@ AM_CPPFLAGS = \ METASOURCES = AUTO noinst_LTLIBRARIES = libflash.la libflash_la_SOURCES = \ + arm_nandio.c \ flash.c \ lpc2000.c \ cfi.c \ @@ -38,6 +39,7 @@ libflash_la_SOURCES = \ avrf.c noinst_HEADERS = \ + arm_nandio.h \ flash.h \ lpc2000.h \ cfi.h \ diff --git a/src/flash/arm_nandio.c b/src/flash/arm_nandio.c new file mode 100644 index 00000000..fb501e56 --- /dev/null +++ b/src/flash/arm_nandio.c @@ -0,0 +1,131 @@ +/* + * Copyright (C) 2009 by Marvell Semiconductors, Inc. + * Written by Nicolas Pitre + * + * Copyright (C) 2009 by David Brownell + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the + * Free Software Foundation, Inc., + * 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. + */ + +#ifdef HAVE_CONFIG_H +#include "config.h" +#endif + +#include "arm_nandio.h" +#include "armv4_5.h" + + +/* + * ARM-specific bulk write from buffer to address of 8-bit wide NAND. + * For now this only supports ARMv4 and ARMv5 cores. + * + * Enhancements to target_run_algorithm() could enable: + * - faster writes: on ARMv5+ don't setup/teardown hardware breakpoint + * - ARMv6 and ARMv7 cores in ARM mode + * + * Different code fragments could handle: + * - Thumb2 cores like Cortex-M (needs different byteswapping) + * - 16-bit wide data (needs different setup too) + */ +int arm_nandwrite(struct arm_nand_data *nand, uint8_t *data, int size) +{ + target_t *target = nand->target; + armv4_5_algorithm_t algo; + reg_param_t reg_params[3]; + uint32_t target_buf; + int retval; + + /* Inputs: + * r0 NAND data address (byte wide) + * r1 buffer address + * r2 buffer length + */ + static const uint32_t code[] = { + 0xe4d13001, /* s: ldrb r3, [r1], #1 */ + 0xe5c03000, /* strb r3, [r0] */ + 0xe2522001, /* subs r2, r2, #1 */ + 0x1afffffb, /* bne s */ + + /* exit: ARMv4 needs hardware breakpoint */ + 0xe1200070, /* e: bkpt #0 */ + }; + + if (!nand->copy_area) { + uint8_t code_buf[sizeof(code)]; + unsigned i; + + /* make sure we have a working area */ + if (target_alloc_working_area(target, + sizeof(code) + nand->chunk_size, + &nand->copy_area) != ERROR_OK) { + LOG_DEBUG("%s: no %d byte buffer", + __FUNCTION__, + (int) sizeof(code) + nand->chunk_size); + return ERROR_NAND_NO_BUFFER; + } + + /* buffer code in target endianness */ + for (i = 0; i < sizeof(code) / 4; i++) + target_buffer_set_u32(target, code_buf + i * 4, code[i]); + + /* copy code to work area */ + retval = target_write_memory(target, + nand->copy_area->address, + 4, sizeof(code) / 4, code_buf); + if (retval != ERROR_OK) + return retval; + } + + /* copy data to work area */ + target_buf = nand->copy_area->address + sizeof(code); + retval = target_bulk_write_memory(target, target_buf, size / 4, data); + if (retval == ERROR_OK && (size & 3) != 0) + retval = target_write_memory(target, + target_buf + (size & ~3), + 1, size & 3, data + (size & ~3)); + if (retval != ERROR_OK) + return retval; + + /* set up algorithm and parameters */ + algo.common_magic = ARMV4_5_COMMON_MAGIC; + algo.core_mode = ARMV4_5_MODE_SVC; + algo.core_state = ARMV4_5_STATE_ARM; + + init_reg_param(®_params[0], "r0", 32, PARAM_IN); + init_reg_param(®_params[1], "r1", 32, PARAM_IN); + init_reg_param(®_params[2], "r2", 32, PARAM_IN); + + buf_set_u32(reg_params[0].value, 0, 32, nand->data); + buf_set_u32(reg_params[1].value, 0, 32, target_buf); + buf_set_u32(reg_params[2].value, 0, 32, size); + + /* use alg to write data from work area to NAND chip */ + retval = target_run_algorithm(target, 0, NULL, 3, reg_params, + nand->copy_area->address, + nand->copy_area->address + sizeof(code) - 4, + 1000, &algo); + if (retval != ERROR_OK) + LOG_ERROR("error executing hosted NAND write"); + + destroy_reg_param(®_params[0]); + destroy_reg_param(®_params[1]); + destroy_reg_param(®_params[2]); + + return retval; +} + +/* REVISIT do the same for bulk *read* too ... */ + diff --git a/src/flash/arm_nandio.h b/src/flash/arm_nandio.h new file mode 100644 index 00000000..eedf5dca --- /dev/null +++ b/src/flash/arm_nandio.h @@ -0,0 +1,25 @@ +#ifndef __ARM_NANDIO_H +#define __ARM_NANDIO_H + +#include "nand.h" +#include "binarybuffer.h" + +struct arm_nand_data { + /* target is proxy for some ARM core */ + struct target_s *target; + + /* copy_area holds write-to-NAND loop and data to write */ + struct working_area_s *copy_area; + + /* chunk_size == page or ECC unit */ + unsigned chunk_size; + + /* data == where to write the data */ + uint32_t data; + + /* currently implicit: data width == 8 bits (not 16) */ +}; + +int arm_nandwrite(struct arm_nand_data *nand, uint8_t *data, int size); + +#endif /* __ARM_NANDIO_H */ diff --git a/src/flash/davinci_nand.c b/src/flash/davinci_nand.c index 29c96300..41c2b20a 100644 --- a/src/flash/davinci_nand.c +++ b/src/flash/davinci_nand.c @@ -28,7 +28,7 @@ #include "config.h" #endif -#include "nand.h" +#include "arm_nandio.h" enum ecc { @@ -51,6 +51,9 @@ struct davinci_nand { uint32_t cmd; /* with CLE */ uint32_t addr; /* with ALE */ + /* write acceleration */ + struct arm_nand_data io; + /* page i/o for the relevant flavor of hardware ECC */ int (*read_page)(struct nand_device_s *nand, uint32_t page, uint8_t *data, uint32_t data_size, uint8_t *oob, uint32_t oob_size); @@ -181,7 +184,7 @@ static int davinci_read_data(struct nand_device_s *nand, void *data) return ERROR_OK; } -/* REVISIT a bit of native code should let block I/O be MUCH faster */ +/* REVISIT a bit of native code should let block reads be MUCH faster */ static int davinci_read_block_data(struct nand_device_s *nand, uint8_t *data, int data_size) @@ -223,10 +226,17 @@ static int davinci_write_block_data(struct nand_device_s *nand, target_t *target = info->target; uint32_t nfdata = info->data; uint32_t tmp; + int status; if (!halted(target, "write_block")) return ERROR_NAND_OPERATION_FAILED; + /* try the fast way first */ + status = arm_nandwrite(&info->io, data, data_size); + if (status != ERROR_NAND_NO_BUFFER) + return status; + + /* else do it slowly */ while (data_size >= 4) { tmp = le_to_h_u32(data); target_write_u32(target, nfdata, tmp); @@ -285,6 +295,12 @@ static int davinci_write_page(struct nand_device_s *nand, uint32_t page, memset(oob, 0x0ff, oob_size); } + /* REVISIT avoid wasting SRAM: unless nand->use_raw is set, + * use 512 byte chunks. Read side support will often want + * to include oob_size ... + */ + info->io.chunk_size = nand->page_size; + status = info->write_page(nand, page, data, data_size, oob, oob_size); free(ooballoc); return status; @@ -700,6 +716,9 @@ static int davinci_nand_device_command(struct command_context_s *cmd_ctx, nand->controller_priv = info; + info->io.target = target; + info->io.data = info->data; + /* NOTE: for now we don't do any error correction on read. * Nothing else in OpenOCD currently corrects read errors, * and in any case it's *writing* that we care most about. diff --git a/src/flash/nand.h b/src/flash/nand.h index ab87123d..b73e3304 100644 --- a/src/flash/nand.h +++ b/src/flash/nand.h @@ -223,5 +223,6 @@ extern int nand_init(struct command_context_s *cmd_ctx); #define ERROR_NAND_OPERATION_NOT_SUPPORTED (-1103) #define ERROR_NAND_DEVICE_NOT_PROBED (-1104) #define ERROR_NAND_ERROR_CORRECTION_FAILED (-1105) +#define ERROR_NAND_NO_BUFFER (-1106) #endif /* NAND_H */ diff --git a/src/flash/orion_nand.c b/src/flash/orion_nand.c index dc9d78c5..94df17ba 100644 --- a/src/flash/orion_nand.c +++ b/src/flash/orion_nand.c @@ -26,15 +26,15 @@ #include "config.h" #endif -#include "nand.h" +#include "arm_nandio.h" #include "armv4_5.h" -#include "binarybuffer.h" typedef struct orion_nand_controller_s { struct target_s *target; - working_area_t *copy_area; + + struct arm_nand_data io; uint32_t cmd; uint32_t addr; @@ -99,78 +99,14 @@ static int orion_nand_slow_block_write(struct nand_device_s *device, uint8_t *da static int orion_nand_fast_block_write(struct nand_device_s *device, uint8_t *data, int size) { orion_nand_controller_t *hw = device->controller_priv; - target_t *target = hw->target; - armv4_5_algorithm_t algo; - reg_param_t reg_params[3]; - uint32_t target_buf; int retval; - static const uint32_t code[] = { - 0xe4d13001, /* ldrb r3, [r1], #1 */ - 0xe5c03000, /* strb r3, [r0] */ - 0xe2522001, /* subs r2, r2, #1 */ - 0x1afffffb, /* bne 0 */ - 0xeafffffe, /* b . */ - }; - int code_size = sizeof(code); - - if (!hw->copy_area) { - uint8_t code_buf[code_size]; - int i; - - /* make sure we have a working area */ - if (target_alloc_working_area(target, - code_size + device->page_size, - &hw->copy_area) != ERROR_OK) - { - return orion_nand_slow_block_write(device, data, size); - } - - /* copy target instructions to target endianness */ - for (i = 0; i < code_size/4; i++) - target_buffer_set_u32(target, code_buf + i*4, code[i]); - - /* write code to working area */ - retval = target_write_memory(target, - hw->copy_area->address, - 4, code_size/4, code_buf); - if (retval != ERROR_OK) - return retval; - } + hw->io.chunk_size = device->page_size; + + retval = arm_nandwrite(&hw->io, data, size); + if (retval == ERROR_NAND_NO_BUFFER) + retval = orion_nand_slow_block_write(device, data, size); - /* copy data to target's memory */ - target_buf = hw->copy_area->address + code_size; - retval = target_bulk_write_memory(target, target_buf, size/4, data); - if (retval == ERROR_OK && size & 3) { - retval = target_write_memory(target, - target_buf + (size & ~3), - 1, size & 3, data + (size & ~3)); - } - if (retval != ERROR_OK) - return retval; - - algo.common_magic = ARMV4_5_COMMON_MAGIC; - algo.core_mode = ARMV4_5_MODE_SVC; - algo.core_state = ARMV4_5_STATE_ARM; - - init_reg_param(®_params[0], "r0", 32, PARAM_IN); - init_reg_param(®_params[1], "r1", 32, PARAM_IN); - init_reg_param(®_params[2], "r2", 32, PARAM_IN); - - buf_set_u32(reg_params[0].value, 0, 32, hw->data); - buf_set_u32(reg_params[1].value, 0, 32, target_buf); - buf_set_u32(reg_params[2].value, 0, 32, size); - - retval = target_run_algorithm(target, 0, NULL, 3, reg_params, - hw->copy_area->address, - hw->copy_area->address + code_size - 4, - 1000, &algo); - if (retval != ERROR_OK) - LOG_ERROR("error executing hosted NAND write"); - - destroy_reg_param(®_params[0]); - destroy_reg_param(®_params[1]); - destroy_reg_param(®_params[2]); return retval; } @@ -224,6 +160,9 @@ int orion_nand_device_command(struct command_context_s *cmd_ctx, char *cmd, hw->cmd = base + (1 << cle); hw->addr = base + (1 << ale); + hw->io.target = hw->target; + hw->io.data = hw->data; + return ERROR_OK; } -- 2.39.5