This causes widespread breakage due to the operation of the low-level code
in crt0.S and cro0_64.S for ARM at least.
The fix is not complicated but it seems safer to revert this for now.
This reverts commit 
2afddae07523f23f77acd066ad1719f53d289f98.
Signed-off-by: Simon Glass <sjg@chromium.org>
 static int reserve_global_data(void)
 {
        gd->start_addr_sp -= sizeof(gd_t);
-       gd->start_addr_sp &= ~0xf;
        gd->new_gd = (gd_t *)map_sysmem(gd->start_addr_sp, sizeof(gd_t));
        debug("Reserving %zu Bytes for Global Data at: %08lx\n",
                        sizeof(gd_t), gd->start_addr_sp);
 
        int pcidelay_done;
 #endif
        struct udevice *cur_serial_dev; /* current serial device */
-       /* arch-specific data */
-       struct arch_global_data arch __attribute__((aligned(16)));
+       struct arch_global_data arch;   /* architecture-specific data */
 } gd_t;
 #endif