void arm_set_cpsr(struct arm *arm, uint32_t cpsr);
 struct reg *arm_reg_current(struct arm *arm, unsigned regnum);
 
-void arm_endianness(uint8_t *tmp, void *in, int size, int be, int flip);
-
 extern struct reg arm_gdb_dummy_fp_reg;
 extern struct reg arm_gdb_dummy_fps_reg;
 
 
        return ERROR_OK;
 }
 
+int arm7_9_endianness_callback(jtag_callback_data_t pu8_in,
+               jtag_callback_data_t i_size, jtag_callback_data_t i_be,
+               jtag_callback_data_t i_flip)
+{
+       uint8_t *in = (uint8_t *)pu8_in;
+       int size = (int)i_size;
+       int be = (int)i_be;
+       int flip = (int)i_flip;
+       uint32_t readback;
+
+       switch (size) {
+       case 4:
+               readback = le_to_h_u32(in);
+               if (flip)
+                       readback = flip_u32(readback, 32);
+               if (be)
+                       h_u32_to_be(in, readback);
+               else
+                       h_u32_to_le(in, readback);
+               break;
+       case 2:
+               readback = le_to_h_u16(in);
+               if (flip)
+                       readback = flip_u32(readback, 16);
+               if (be)
+                       h_u16_to_be(in, readback & 0xffff);
+               else
+                       h_u16_to_le(in, readback & 0xffff);
+               break;
+       case 1:
+               readback = *in;
+               if (flip)
+                       readback = flip_u32(readback, 8);
+               *in = readback & 0xff;
+               break;
+       }
+
+       return ERROR_OK;
+}
+
 COMMAND_HANDLER(handle_arm7_9_dbgrq_command)
 {
        struct target *target = get_current_target(CMD_CTX);
 
 int arm7_9_examine(struct target *target);
 int arm7_9_check_reset(struct target *target);
 
+int arm7_9_endianness_callback(jtag_callback_data_t pu8_in,
+               jtag_callback_data_t i_size, jtag_callback_data_t i_be,
+               jtag_callback_data_t i_flip);
+
 #endif /* ARM7_9_COMMON_H */
 
        return ERROR_OK;
 }
 
-void arm_endianness(uint8_t *tmp, void *in, int size, int be, int flip)
-{
-       uint32_t readback = le_to_h_u32(tmp);
-       if (flip)
-               readback = flip_u32(readback, 32);
-       switch (size) {
-               case 4:
-                       if (be)
-                               h_u32_to_be(((uint8_t *)in), readback);
-                       else
-                                h_u32_to_le(((uint8_t *)in), readback);
-                       break;
-               case 2:
-                       if (be)
-                               h_u16_to_be(((uint8_t *)in), readback & 0xffff);
-                       else
-                               h_u16_to_le(((uint8_t *)in), readback & 0xffff);
-                       break;
-               case 1:
-                       *((uint8_t *)in) = readback & 0xff;
-                       break;
-       }
-}
-
-static int arm7endianness(jtag_callback_data_t arg,
-       jtag_callback_data_t size, jtag_callback_data_t be,
-       jtag_callback_data_t captured)
-{
-       uint8_t *in = (uint8_t *)arg;
-
-       arm_endianness((uint8_t *)captured, in, (int)size, (int)be, 1);
-       return ERROR_OK;
-}
-
 /* clock the target, and read the databus
  * the *in pointer points to a buffer where elements of 'size' bytes
  * are stored in big (be == 1) or little (be == 0) endianness
                void *in, int size, int be)
 {
        int retval = ERROR_OK;
-       struct scan_field fields[2];
+       struct scan_field fields[3];
 
        retval = arm_jtag_scann(jtag_info, 0x1, TAP_DRPAUSE);
        if (retval != ERROR_OK)
        fields[0].out_value = NULL;
        fields[0].in_value = NULL;
 
-       fields[1].num_bits = size * 8;
-       fields[1].out_value = NULL;
-       fields[1].in_value = in;
+       if (size == 4) {
+               fields[1].num_bits = 32;
+               fields[1].out_value = NULL;
+               fields[1].in_value = in;
+       } else {
+               /* Discard irrelevant bits of the scan, making sure we don't write more
+                * than size bytes to in */
+               fields[1].num_bits = 32 - size * 8;
+               fields[1].out_value = NULL;
+               fields[1].in_value = NULL;
 
-       jtag_add_dr_scan(jtag_info->tap, 2, fields, TAP_DRPAUSE);
+               fields[2].num_bits = size * 8;
+               fields[2].out_value = NULL;
+               fields[2].in_value = in;
+       }
+
+       jtag_add_dr_scan(jtag_info->tap, size == 4 ? 2 : 3, fields, TAP_DRPAUSE);
 
-       jtag_add_callback4(arm7endianness,
+       jtag_add_callback4(arm7_9_endianness_callback,
                (jtag_callback_data_t)in,
                (jtag_callback_data_t)size,
                (jtag_callback_data_t)be,
-               (jtag_callback_data_t)in);
+               (jtag_callback_data_t)1);
 
        jtag_add_runtest(0, TAP_DRPAUSE);
 
 
        return ERROR_OK;
 }
 
-static int arm9endianness(jtag_callback_data_t arg,
-       jtag_callback_data_t size, jtag_callback_data_t be,
-       jtag_callback_data_t captured)
-{
-       uint8_t *in = (uint8_t *)arg;
-
-       arm_endianness((uint8_t *)captured, in, (int)size, (int)be, 0);
-       return ERROR_OK;
-}
-
 /* clock the target, and read the databus
  * the *in pointer points to a buffer where elements of 'size' bytes
  * are stored in big (be == 1) or little (be == 0) endianness
                void *in, int size, int be)
 {
        int retval = ERROR_OK;
-       struct scan_field fields[3];
+       struct scan_field fields[2];
 
        retval = arm_jtag_scann(jtag_info, 0x1, TAP_DRPAUSE);
        if (retval != ERROR_OK)
        if (retval != ERROR_OK)
                return retval;
 
-       fields[0].num_bits = size * 8;
-       fields[0].out_value = NULL;
-       fields[0].in_value = in;
+       if (size == 4) {
+               fields[0].num_bits = 32;
+               fields[0].out_value = NULL;
+               fields[0].in_value = in;
 
-       fields[1].num_bits = 3;
-       fields[1].out_value = NULL;
-       fields[1].in_value = NULL;
+               fields[1].num_bits = 3 + 32;
+               fields[1].out_value = NULL;
+               fields[1].in_value = NULL;
+       } else {
+               /* Discard irrelevant bits of the scan, making sure we don't write more
+                * than size bytes to in */
+               fields[0].num_bits = size * 8;
+               fields[0].out_value = NULL;
+               fields[0].in_value = in;
 
-       fields[2].num_bits = 32;
-       fields[2].out_value = NULL;
-       fields[2].in_value = NULL;
+               fields[1].num_bits = 3 + 32 + 32 - size * 8;
+               fields[1].out_value = NULL;
+               fields[1].in_value = NULL;
+       }
 
-       jtag_add_dr_scan(jtag_info->tap, 3, fields, TAP_DRPAUSE);
+       jtag_add_dr_scan(jtag_info->tap, 2, fields, TAP_DRPAUSE);
 
-       jtag_add_callback4(arm9endianness,
+       jtag_add_callback4(arm7_9_endianness_callback,
                (jtag_callback_data_t)in,
                (jtag_callback_data_t)size,
                (jtag_callback_data_t)be,
-               (jtag_callback_data_t)in);
+               (jtag_callback_data_t)0);
 
        jtag_add_runtest(0, TAP_DRPAUSE);