X-Git-Url: https://git.sur5r.net/?a=blobdiff_plain;f=src%2Fcc65%2Fcodeopt.c;h=30ffd605428e25b36ad5fd080d8f01566cfd3ac4;hb=112ae0e3db511ddd92e769c11328646ebe2a6240;hp=5bbc46e6e93393c9454d2b79efa74f410dc6cd8f;hpb=7ce4196829a53194a546c752dfb4239013f0b16b;p=cc65 diff --git a/src/cc65/codeopt.c b/src/cc65/codeopt.c index 5bbc46e6e..30ffd6054 100644 --- a/src/cc65/codeopt.c +++ b/src/cc65/codeopt.c @@ -6,10 +6,10 @@ /* */ /* */ /* */ -/* (C) 2001-2002 Ullrich von Bassewitz */ -/* Wacholderweg 14 */ -/* D-70597 Stuttgart */ -/* EMail: uz@cc65.org */ +/* (C) 2001-2010, Ullrich von Bassewitz */ +/* Roemerstrasse 52 */ +/* D-70794 Filderstadt */ +/* EMail: uz@cc65.org */ /* */ /* */ /* This software is provided 'as-is', without any expressed or implied */ @@ -39,6 +39,7 @@ /* common */ #include "abend.h" #include "chartype.h" +#include "cpu.h" #include "print.h" #include "xmalloc.h" #include "xsprintf.h" @@ -52,17 +53,36 @@ #include "coptcmp.h" #include "coptind.h" #include "coptneg.h" +#include "coptptrload.h" #include "coptpush.h" +#include "coptsize.h" #include "coptstop.h" +#include "coptstore.h" #include "coptsub.h" #include "copttest.h" -#include "cpu.h" #include "error.h" #include "global.h" #include "codeopt.h" +/*****************************************************************************/ +/* Data */ +/*****************************************************************************/ + + + +/* Shift types */ +enum { + SHIFT_NONE, + SHIFT_ASR_1, + SHIFT_ASL_1, + SHIFT_LSR_1, + SHIFT_LSL_1 +}; + + + /*****************************************************************************/ /* Optimize shifts */ /*****************************************************************************/ @@ -71,15 +91,30 @@ static unsigned OptShift1 (CodeSeg* S) /* A call to the shlaxN routine may get replaced by one or more asl insns - * if the value of X is not used later. + * if the value of X is not used later. If X is used later, but it is zero + * on entry and it's a shift by one, it may get replaced by: + * + * asl a + * bcc L1 + * inx + * L1: + * */ { unsigned Changes = 0; + unsigned I; + + /* Generate register info */ + CS_GenRegInfo (S); /* Walk over the entries */ - unsigned I = 0; + I = 0; while (I < CS_GetEntryCount (S)) { + CodeEntry* N; + CodeEntry* X; + CodeLabel* L; + /* Get next entry */ CodeEntry* E = CS_GetEntry (S, I); @@ -88,21 +123,46 @@ static unsigned OptShift1 (CodeSeg* S) (strncmp (E->Arg, "shlax", 5) == 0 || strncmp (E->Arg, "aslax", 5) == 0) && strlen (E->Arg) == 6 && - IsDigit (E->Arg[5]) && - !RegXUsed (S, I+1)) { + IsDigit (E->Arg[5])) { - /* Insert shift insns */ - unsigned Count = E->Arg[5] - '0'; - while (Count--) { - CodeEntry* X = NewCodeEntry (OP65_ASL, AM65_ACC, "a", 0, E->LI); - CS_InsertEntry (S, X, I+1); - } + if (!RegXUsed (S, I+1)) { - /* Delete the call to shlax */ - CS_DelEntry (S, I); + /* Insert shift insns */ + unsigned Count = E->Arg[5] - '0'; + while (Count--) { + X = NewCodeEntry (OP65_ASL, AM65_ACC, "a", 0, E->LI); + CS_InsertEntry (S, X, I+1); + } - /* Remember, we had changes */ - ++Changes; + /* Delete the call to shlax */ + CS_DelEntry (S, I); + + /* Remember, we had changes */ + ++Changes; + + } else if (E->RI->In.RegX == 0 && + E->Arg[5] == '1' && + (N = CS_GetNextEntry (S, I)) != 0) { + + /* asl a */ + X = NewCodeEntry (OP65_ASL, AM65_ACC, "a", 0, E->LI); + CS_InsertEntry (S, X, I+1); + + /* bcc L1 */ + L = CS_GenLabel (S, N); + X = NewCodeEntry (OP65_BCC, AM65_BRA, L->Name, L, E->LI); + CS_InsertEntry (S, X, I+2); + + /* inx */ + X = NewCodeEntry (OP65_INX, AM65_IMP, 0, 0, E->LI); + CS_InsertEntry (S, X, I+3); + + /* Delete the call to shlax */ + CS_DelEntry (S, I); + + /* Remember, we had changes */ + ++Changes; + } } @@ -111,6 +171,9 @@ static unsigned OptShift1 (CodeSeg* S) } + /* Free the register info */ + CS_FreeRegInfo (S); + /* Return the number of changes made */ return Changes; } @@ -119,13 +182,17 @@ static unsigned OptShift1 (CodeSeg* S) static unsigned OptShift2 (CodeSeg* S) /* A call to the shraxN routine may get replaced by one or more lsr insns - * if the value of X is not used later. + * if the value of X is zero. */ { unsigned Changes = 0; + unsigned I; + + /* Generate register info */ + CS_GenRegInfo (S); /* Walk over the entries */ - unsigned I = 0; + I = 0; while (I < CS_GetEntryCount (S)) { /* Get next entry */ @@ -136,16 +203,16 @@ static unsigned OptShift2 (CodeSeg* S) strncmp (E->Arg, "shrax", 5) == 0 && strlen (E->Arg) == 6 && IsDigit (E->Arg[5]) && - !RegXUsed (S, I+1)) { + E->RI->In.RegX == 0) { /* Insert shift insns */ unsigned Count = E->Arg[5] - '0'; while (Count--) { CodeEntry* X = NewCodeEntry (OP65_LSR, AM65_ACC, "a", 0, E->LI); - CS_InsertEntry (S, X, I+1); + CS_InsertEntry (S, X, I+1); } - /* Delete the call to shlax */ + /* Delete the call to shrax */ CS_DelEntry (S, I); /* Remember, we had changes */ @@ -158,76 +225,55 @@ static unsigned OptShift2 (CodeSeg* S) } + /* Free the register info */ + CS_FreeRegInfo (S); + /* Return the number of changes made */ return Changes; } -/*****************************************************************************/ -/* Optimize stores through pointers */ -/*****************************************************************************/ - - - -static unsigned OptPtrStore1Sub (CodeSeg* S, unsigned I, CodeEntry** const L) -/* Check if this is one of the allowed suboperation for OptPtrStore1 */ +static unsigned GetShiftType (const char* Sub) +/* Helper function for OptShift3 */ { - /* Check for a label attached to the entry */ - if (CE_HasLabel (L[0])) { - return 0; - } - - /* Check for single insn sub ops */ - if (L[0]->OPC == OP65_AND || - L[0]->OPC == OP65_EOR || - L[0]->OPC == OP65_ORA || - (L[0]->OPC == OP65_JSR && strncmp (L[0]->Arg, "shlax", 5) == 0) || - (L[0]->OPC == OP65_JSR && strncmp (L[0]->Arg, "shrax", 5) == 0)) { - - /* One insn */ - return 1; - - } else if (L[0]->OPC == OP65_CLC && - (L[1] = CS_GetNextEntry (S, I)) != 0 && - L[1]->OPC == OP65_ADC && - !CE_HasLabel (L[1])) { - return 2; - } else if (L[0]->OPC == OP65_SEC && - (L[1] = CS_GetNextEntry (S, I)) != 0 && - L[1]->OPC == OP65_SBC && - !CE_HasLabel (L[1])) { - return 2; + if (*Sub == 'a') { + if (strcmp (Sub+1, "slax1") == 0) { + return SHIFT_ASL_1; + } else if (strcmp (Sub+1, "srax1") == 0) { + return SHIFT_ASR_1; + } + } else if (*Sub == 's') { + if (strcmp (Sub+1, "hlax1") == 0) { + return SHIFT_LSL_1; + } else if (strcmp (Sub+1, "hrax1") == 0) { + return SHIFT_LSR_1; + } } - - - - /* Not found */ - return 0; + return SHIFT_NONE; } -static unsigned OptPtrStore1 (CodeSeg* S) -/* Search for the sequence: +static unsigned OptShift3 (CodeSeg* S) +/* Search for the sequence * - * jsr pushax - * ldy xxx - * jsr ldauidx - * subop - * ldy yyy - * jsr staspidx + * lda xxx + * ldx yyy + * jsr aslax1/asrax1/shlax1/shrax1 + * sta aaa + * stx bbb * - * and replace it by: + * and replace it by * - * sta ptr1 - * stx ptr1+1 - * ldy xxx - * ldx #$00 - * lda (ptr1),y - * subop - * ldy yyy - * sta (ptr1),y + * lda xxx + * asl a + * sta aaa + * lda yyy + * rol a + * sta bbb + * + * or similar, provided that a/x is not used later */ { unsigned Changes = 0; @@ -236,58 +282,86 @@ static unsigned OptPtrStore1 (CodeSeg* S) unsigned I = 0; while (I < CS_GetEntryCount (S)) { - unsigned K; - CodeEntry* L[10]; + unsigned ShiftType; + CodeEntry* L[5]; /* Get next entry */ L[0] = CS_GetEntry (S, I); /* Check for the sequence */ - if (CE_IsCall (L[0], "pushax") && - CS_GetEntries (S, L+1, I+1, 3) && - L[1]->OPC == OP65_LDY && - CE_KnownImm (L[1]) && - !CE_HasLabel (L[1]) && - CE_IsCall (L[2], "ldauidx") && - !CE_HasLabel (L[2]) && - (K = OptPtrStore1Sub (S, I+3, L+3)) > 0 && - CS_GetEntries (S, L+3+K, I+3+K, 2) && - L[3+K]->OPC == OP65_LDY && - CE_KnownImm (L[3+K]) && - !CE_HasLabel (L[3+K]) && - CE_IsCall (L[4+K], "staspidx") && - !CE_HasLabel (L[4+K])) { - - CodeEntry* X; - - /* Create and insert the stores */ - X = NewCodeEntry (OP65_STA, AM65_ZP, "ptr1", 0, L[0]->LI); - CS_InsertEntry (S, X, I+1); - - X = NewCodeEntry (OP65_STX, AM65_ZP, "ptr1+1", 0, L[0]->LI); - CS_InsertEntry (S, X, I+2); - - /* Delete the call to pushax */ - CS_DelEntry (S, I); - - /* Delete the call to ldauidx */ - CS_DelEntry (S, I+3); - - /* Insert the load from ptr1 */ - X = NewCodeEntry (OP65_LDX, AM65_IMM, "$00", 0, L[3]->LI); - CS_InsertEntry (S, X, I+3); - X = NewCodeEntry (OP65_LDA, AM65_ZP_INDY, "ptr1", 0, L[2]->LI); - CS_InsertEntry (S, X, I+4); - - /* Insert the store through ptr1 */ - X = NewCodeEntry (OP65_STA, AM65_ZP_INDY, "ptr1", 0, L[3]->LI); - CS_InsertEntry (S, X, I+6+K); - - /* Delete the call to staspidx */ - CS_DelEntry (S, I+7+K); + if (L[0]->OPC == OP65_LDA && + (L[0]->AM == AM65_ABS || L[0]->AM == AM65_ZP) && + CS_GetEntries (S, L+1, I+1, 4) && + !CS_RangeHasLabel (S, I+1, 4) && + L[1]->OPC == OP65_LDX && + (L[1]->AM == AM65_ABS || L[1]->AM == AM65_ZP) && + L[2]->OPC == OP65_JSR && + (ShiftType = GetShiftType (L[2]->Arg)) != SHIFT_NONE&& + L[3]->OPC == OP65_STA && + (L[3]->AM == AM65_ABS || L[3]->AM == AM65_ZP) && + L[4]->OPC == OP65_STX && + (L[4]->AM == AM65_ABS || L[4]->AM == AM65_ZP) && + !RegAXUsed (S, I+5)) { + + CodeEntry* X; + + /* Handle the four shift types differently */ + switch (ShiftType) { + + case SHIFT_ASR_1: + X = NewCodeEntry (OP65_LDA, L[1]->AM, L[1]->Arg, 0, L[1]->LI); + CS_InsertEntry (S, X, I+5); + X = NewCodeEntry (OP65_CMP, AM65_IMM, "$80", 0, L[2]->LI); + CS_InsertEntry (S, X, I+6); + X = NewCodeEntry (OP65_ROR, AM65_ACC, "a", 0, L[2]->LI); + CS_InsertEntry (S, X, I+7); + X = NewCodeEntry (OP65_STA, L[4]->AM, L[4]->Arg, 0, L[4]->LI); + CS_InsertEntry (S, X, I+8); + X = NewCodeEntry (OP65_LDA, L[0]->AM, L[0]->Arg, 0, L[0]->LI); + CS_InsertEntry (S, X, I+9); + X = NewCodeEntry (OP65_ROR, AM65_ACC, "a", 0, L[2]->LI); + CS_InsertEntry (S, X, I+10); + X = NewCodeEntry (OP65_STA, L[3]->AM, L[3]->Arg, 0, L[3]->LI); + CS_InsertEntry (S, X, I+11); + CS_DelEntries (S, I, 5); + break; + + case SHIFT_LSR_1: + X = NewCodeEntry (OP65_LDA, L[1]->AM, L[1]->Arg, 0, L[1]->LI); + CS_InsertEntry (S, X, I+5); + X = NewCodeEntry (OP65_LSR, AM65_ACC, "a", 0, L[2]->LI); + CS_InsertEntry (S, X, I+6); + X = NewCodeEntry (OP65_STA, L[4]->AM, L[4]->Arg, 0, L[4]->LI); + CS_InsertEntry (S, X, I+7); + X = NewCodeEntry (OP65_LDA, L[0]->AM, L[0]->Arg, 0, L[0]->LI); + CS_InsertEntry (S, X, I+8); + X = NewCodeEntry (OP65_ROR, AM65_ACC, "a", 0, L[2]->LI); + CS_InsertEntry (S, X, I+9); + X = NewCodeEntry (OP65_STA, L[3]->AM, L[3]->Arg, 0, L[3]->LI); + CS_InsertEntry (S, X, I+10); + CS_DelEntries (S, I, 5); + break; + + case SHIFT_LSL_1: + case SHIFT_ASL_1: + /* These two are identical */ + X = NewCodeEntry (OP65_ASL, AM65_ACC, "a", 0, L[2]->LI); + CS_InsertEntry (S, X, I+1); + X = NewCodeEntry (OP65_STA, L[3]->AM, L[3]->Arg, 0, L[3]->LI); + CS_InsertEntry (S, X, I+2); + X = NewCodeEntry (OP65_LDA, L[1]->AM, L[1]->Arg, 0, L[1]->LI); + CS_InsertEntry (S, X, I+3); + X = NewCodeEntry (OP65_ROL, AM65_ACC, "a", 0, L[2]->LI); + CS_InsertEntry (S, X, I+4); + X = NewCodeEntry (OP65_STA, L[4]->AM, L[4]->Arg, 0, L[4]->LI); + CS_InsertEntry (S, X, I+5); + CS_DelEntries (S, I+6, 4); + break; + + } /* Remember, we had changes */ - ++Changes; + ++Changes; } @@ -302,22 +376,8 @@ static unsigned OptPtrStore1 (CodeSeg* S) -static unsigned OptPtrStore2 (CodeSeg* S) -/* Search for the sequence: - * - * jsr pushax - * lda xxx - * ldy yyy - * jsr staspidx - * - * and replace it by: - * - * sta ptr1 - * stx ptr1+1 - * lda xxx - * ldy yyy - * sta (ptr1),y - */ +static unsigned OptShift4 (CodeSeg* S) +/* Inline the shift subroutines. */ { unsigned Changes = 0; @@ -325,45 +385,73 @@ static unsigned OptPtrStore2 (CodeSeg* S) unsigned I = 0; while (I < CS_GetEntryCount (S)) { - CodeEntry* L[4]; + CodeEntry* X; + unsigned IP; /* Get next entry */ - L[0] = CS_GetEntry (S, I); - - /* Check for the sequence */ - if (CE_IsCall (L[0], "pushax") && - CS_GetEntries (S, L+1, I+1, 3) && - L[1]->OPC == OP65_LDA && - !CE_HasLabel (L[1]) && - L[2]->OPC == OP65_LDY && - !CE_HasLabel (L[2]) && - CE_IsCall (L[3], "staspidx") && - !CE_HasLabel (L[3])) { - - CodeEntry* X; - - /* Create and insert the stores */ - X = NewCodeEntry (OP65_STA, AM65_ZP, "ptr1", 0, L[0]->LI); - CS_InsertEntry (S, X, I+1); - - X = NewCodeEntry (OP65_STX, AM65_ZP, "ptr1+1", 0, L[0]->LI); - CS_InsertEntry (S, X, I+2); - - /* Delete the call to pushax */ - CS_DelEntry (S, I); - - /* Insert the store through ptr1 */ - X = NewCodeEntry (OP65_STA, AM65_ZP_INDY, "ptr1", 0, L[3]->LI); - CS_InsertEntry (S, X, I+4); - - /* Delete the call to staspidx */ - CS_DelEntry (S, I+5); + CodeEntry* E = CS_GetEntry (S, I); + + /* Check for a call to one of the shift routine */ + if (E->OPC == OP65_JSR && + (strncmp (E->Arg, "shlax", 5) == 0 || + strncmp (E->Arg, "aslax", 5) == 0) && + strlen (E->Arg) == 6 && + IsDigit (E->Arg[5])) { + + /* Get number of shifts */ + unsigned ShiftCount = (E->Arg[5] - '0'); + + /* Code is: + * + * stx tmp1 + * asl a + * rol tmp1 + * (repeat ShiftCount-1 times) + * ldx tmp1 + * + * which makes 4 + 3 * ShiftCount bytes, compared to the original + * 3 bytes for the subroutine call. However, in most cases, the + * final load of the X register gets merged with some other insn + * and replaces a txa, so for a shift count of 1, we get a factor + * of 200, which matches nicely the CodeSizeFactor enabled with -Oi + */ + if (ShiftCount > 1 || S->CodeSizeFactor > 200) { + unsigned Size = 4 + 3 * ShiftCount; + if ((Size * 100 / 3) > S->CodeSizeFactor) { + /* Not acceptable */ + goto NextEntry; + } + } + + /* Inline the code. Insertion point is behind the subroutine call */ + IP = (I + 1); + + /* stx tmp1 */ + X = NewCodeEntry (OP65_STX, AM65_ZP, "tmp1", 0, E->LI); + CS_InsertEntry (S, X, IP++); + + while (ShiftCount--) { + /* asl a */ + X = NewCodeEntry (OP65_ASL, AM65_ACC, "a", 0, E->LI); + CS_InsertEntry (S, X, IP++); + + /* rol tmp1 */ + X = NewCodeEntry (OP65_ROL, AM65_ZP, "tmp1", 0, E->LI); + CS_InsertEntry (S, X, IP++); + } + + /* ldx tmp1 */ + X = NewCodeEntry (OP65_LDX, AM65_ZP, "tmp1", 0, E->LI); + CS_InsertEntry (S, X, IP++); + + /* Remove the subroutine call */ + CS_DelEntry (S, I); /* Remember, we had changes */ - ++Changes; - + ++Changes; } +NextEntry: /* Next entry */ ++I; @@ -376,76 +464,52 @@ static unsigned OptPtrStore2 (CodeSeg* S) /*****************************************************************************/ -/* Optimize loads through pointers */ +/* Optimize loads */ /*****************************************************************************/ -static unsigned OptPtrLoad1 (CodeSeg* S) -/* Search for the sequence: - * - * tax - * dey - * lda (sp),y # May be any destination - * ldy ... - * jsr ldauidx - * - * and replace it by: - * - * sta ptr1+1 - * dey - * lda (sp),y - * sta ptr1 - * ldy ... - * ldx #$00 - * lda (ptr1),y +static unsigned OptLoad1 (CodeSeg* S) +/* Search for a call to ldaxysp where X is not used later and replace it by + * a load of just the A register. */ { + unsigned I; unsigned Changes = 0; + /* Generate register info */ + CS_GenRegInfo (S); + /* Walk over the entries */ - unsigned I = 0; + I = 0; while (I < CS_GetEntryCount (S)) { - CodeEntry* L[5]; + CodeEntry* E; /* Get next entry */ - L[0] = CS_GetEntry (S, I); + E = CS_GetEntry (S, I); /* Check for the sequence */ - if (L[0]->OPC == OP65_TAX && - CS_GetEntries (S, L+1, I+1, 4) && - L[1]->OPC == OP65_DEY && - !CE_HasLabel (L[1]) && - L[2]->OPC == OP65_LDA && - !CE_HasLabel (L[2]) && - L[3]->OPC == OP65_LDY && - !CE_HasLabel (L[3]) && - CE_IsCall (L[4], "ldauidx") && - !CE_HasLabel (L[4])) { + if (CE_IsCallTo (E, "ldaxysp") && + RegValIsKnown (E->RI->In.RegY) && + !RegXUsed (S, I+1)) { - CodeEntry* X; + CodeEntry* X; - /* Store the high byte and remove the TAX instead */ - X = NewCodeEntry (OP65_STA, AM65_ZP, "ptr1+1", 0, L[0]->LI); - CS_InsertEntry (S, X, I+1); - CS_DelEntry (S, I); + /* Reload the Y register */ + const char* Arg = MakeHexArg (E->RI->In.RegY - 1); + X = NewCodeEntry (OP65_LDY, AM65_IMM, Arg, 0, E->LI); + CS_InsertEntry (S, X, I+1); - /* Store the low byte */ - X = NewCodeEntry (OP65_STA, AM65_ZP, "ptr1", 0, L[2]->LI); - CS_InsertEntry (S, X, I+3); - - /* Delete the call to ldauidx */ - CS_DelEntry (S, I+5); + /* Load from stack */ + X = NewCodeEntry (OP65_LDA, AM65_ZP_INDY, "sp", 0, E->LI); + CS_InsertEntry (S, X, I+2); - /* Load high and low byte */ - X = NewCodeEntry (OP65_LDX, AM65_IMM, "$00", 0, L[3]->LI); - CS_InsertEntry (S, X, I+5); - X = NewCodeEntry (OP65_LDA, AM65_ZP_INDY, "ptr1", 0, L[3]->LI); - CS_InsertEntry (S, X, I+6); + /* Now remove the call to the subroutine */ + CS_DelEntry (S, I); /* Remember, we had changes */ - ++Changes; + ++Changes; } @@ -454,114 +518,61 @@ static unsigned OptPtrLoad1 (CodeSeg* S) } + /* Free the register info */ + CS_FreeRegInfo (S); + /* Return the number of changes made */ return Changes; } -static unsigned OptPtrLoad2 (CodeSeg* S) -/* Search for the sequence: - * - * clc - * adc xxx - * tay - * txa - * adc yyy - * tax - * tya - * ldy - * jsr ldauidx - * - * and replace it by: - * - * clc - * adc xxx - * sta ptr1 - * txa - * adc yyy - * sta ptr1+1 - * ldy - * ldx #$00 - * lda (ptr1),y - */ +static unsigned OptLoad2 (CodeSeg* S) +/* Replace calls to ldaxysp by inline code */ { + unsigned I; unsigned Changes = 0; /* Walk over the entries */ - unsigned I = 0; + I = 0; while (I < CS_GetEntryCount (S)) { - CodeEntry* L[9]; + CodeEntry* E; /* Get next entry */ - L[0] = CS_GetEntry (S, I); + E = CS_GetEntry (S, I); /* Check for the sequence */ - if (L[0]->OPC == OP65_CLC && - CS_GetEntries (S, L+1, I+1, 8) && - L[1]->OPC == OP65_ADC && - !CE_HasLabel (L[1]) && - L[2]->OPC == OP65_TAY && - !CE_HasLabel (L[2]) && - L[3]->OPC == OP65_TXA && - !CE_HasLabel (L[3]) && - L[4]->OPC == OP65_ADC && - !CE_HasLabel (L[4]) && - L[5]->OPC == OP65_TAX && - !CE_HasLabel (L[5]) && - L[6]->OPC == OP65_TYA && - !CE_HasLabel (L[6]) && - L[7]->OPC == OP65_LDY && - !CE_HasLabel (L[7]) && - CE_IsCall (L[8], "ldauidx") && - !CE_HasLabel (L[8])) { + if (CE_IsCallTo (E, "ldaxysp")) { - CodeEntry* X; - CodeEntry* P; - - /* Store the low byte and remove the TAY instead */ - X = NewCodeEntry (OP65_STA, AM65_ZP, "ptr1", 0, L[1]->LI); - CS_InsertEntry (S, X, I+2); - CS_DelEntry (S, I+3); + CodeEntry* X; - /* Store the high byte */ - X = NewCodeEntry (OP65_STA, AM65_ZP, "ptr1+1", 0, L[4]->LI); - CS_InsertEntry (S, X, I+5); - - /* If the instruction before the adc is a ldx, replace the - * txa by and lda with the same location of the ldx. - */ - if ((P = CS_GetPrevEntry (S, I)) != 0 && - P->OPC == OP65_LDX && - !CE_HasLabel (P)) { + /* lda (sp),y */ + X = NewCodeEntry (OP65_LDA, AM65_ZP_INDY, "sp", 0, E->LI); + CS_InsertEntry (S, X, I+1); - X = NewCodeEntry (OP65_LDA, P->AM, P->Arg, 0, P->LI); - CS_InsertEntry (S, X, I+4); - CS_DelEntry (S, I+3); - } + /* tax */ + X = NewCodeEntry (OP65_TAX, AM65_IMP, 0, 0, E->LI); + CS_InsertEntry (S, X, I+2); - /* Delete more transfer insns */ - CS_DelEntry (S, I+7); - CS_DelEntry (S, I+6); + /* dey */ + X = NewCodeEntry (OP65_DEY, AM65_IMP, 0, 0, E->LI); + CS_InsertEntry (S, X, I+3); - /* Delete the call to ldauidx */ - CS_DelEntry (S, I+7); + /* lda (sp),y */ + X = NewCodeEntry (OP65_LDA, AM65_ZP_INDY, "sp", 0, E->LI); + CS_InsertEntry (S, X, I+4); - /* Load high and low byte */ - X = NewCodeEntry (OP65_LDX, AM65_IMM, "$00", 0, L[7]->LI); - CS_InsertEntry (S, X, I+7); - X = NewCodeEntry (OP65_LDA, AM65_ZP_INDY, "ptr1", 0, L[7]->LI); - CS_InsertEntry (S, X, I+8); + /* Now remove the call to the subroutine */ + CS_DelEntry (S, I); /* Remember, we had changes */ - ++Changes; + ++Changes; } /* Next entry */ ++I; - } /* Return the number of changes made */ @@ -570,30 +581,84 @@ static unsigned OptPtrLoad2 (CodeSeg* S) -static unsigned OptPtrLoad3 (CodeSeg* S) +/*****************************************************************************/ +/* Optimize stores through pointers */ +/*****************************************************************************/ + + + +static unsigned OptPtrStore1Sub (CodeSeg* S, unsigned I, CodeEntry** const L) +/* Check if this is one of the allowed suboperation for OptPtrStore1 */ +{ + /* Check for a label attached to the entry */ + if (CE_HasLabel (L[0])) { + return 0; + } + + /* Check for single insn sub ops */ + if (L[0]->OPC == OP65_AND || + L[0]->OPC == OP65_EOR || + L[0]->OPC == OP65_ORA || + (L[0]->OPC == OP65_JSR && strncmp (L[0]->Arg, "shlax", 5) == 0) || + (L[0]->OPC == OP65_JSR && strncmp (L[0]->Arg, "shrax", 5) == 0)) { + + /* One insn */ + return 1; + + } else if (L[0]->OPC == OP65_CLC && + (L[1] = CS_GetNextEntry (S, I)) != 0 && + L[1]->OPC == OP65_ADC && + !CE_HasLabel (L[1])) { + return 2; + } else if (L[0]->OPC == OP65_SEC && + (L[1] = CS_GetNextEntry (S, I)) != 0 && + L[1]->OPC == OP65_SBC && + !CE_HasLabel (L[1])) { + return 2; + } + + + + /* Not found */ + return 0; +} + + + +static unsigned OptPtrStore1 (CodeSeg* S) /* Search for the sequence: * - * adc xxx - * pha - * txa - * iny - * adc yyy - * tax - * pla - * ldy - * jsr ldauidx + * jsr pushax + * ldy xxx + * jsr ldauidx + * subop + * ldy yyy + * jsr staspidx * * and replace it by: * - * adc xxx * sta ptr1 - * txa - * iny - * adc yyy - * sta ptr1+1 - * ldy - * ldx #$00 + * stx ptr1+1 + * ldy xxx + * ldx #$00 * lda (ptr1),y + * subop + * ldy yyy + * sta (ptr1),y + * + * In case a/x is loaded from the register bank before the pushax, we can even + * use the register bank instead of ptr1. + */ +/* + * jsr pushax + * ldy xxx + * jsr ldauidx + * ldx #$00 + * lda (zp),y + * subop + * ldy yyy + * sta (zp),y + * jsr staspidx */ { unsigned Changes = 0; @@ -602,62 +667,97 @@ static unsigned OptPtrLoad3 (CodeSeg* S) unsigned I = 0; while (I < CS_GetEntryCount (S)) { - CodeEntry* L[9]; + unsigned K; + CodeEntry* L[10]; /* Get next entry */ L[0] = CS_GetEntry (S, I); /* Check for the sequence */ - if (L[0]->OPC == OP65_ADC && - CS_GetEntries (S, L+1, I+1, 8) && - L[1]->OPC == OP65_PHA && - !CE_HasLabel (L[1]) && - L[2]->OPC == OP65_TXA && - !CE_HasLabel (L[2]) && - L[3]->OPC == OP65_INY && - !CE_HasLabel (L[3]) && - L[4]->OPC == OP65_ADC && - !CE_HasLabel (L[4]) && - L[5]->OPC == OP65_TAX && - !CE_HasLabel (L[5]) && - L[6]->OPC == OP65_PLA && - !CE_HasLabel (L[6]) && - L[7]->OPC == OP65_LDY && - !CE_HasLabel (L[7]) && - CE_IsCall (L[8], "ldauidx") && - !CE_HasLabel (L[8])) { - - CodeEntry* X; + if (CE_IsCallTo (L[0], "pushax") && + CS_GetEntries (S, L+1, I+1, 3) && + L[1]->OPC == OP65_LDY && + CE_IsConstImm (L[1]) && + !CE_HasLabel (L[1]) && + CE_IsCallTo (L[2], "ldauidx") && + !CE_HasLabel (L[2]) && + (K = OptPtrStore1Sub (S, I+3, L+3)) > 0 && + CS_GetEntries (S, L+3+K, I+3+K, 2) && + L[3+K]->OPC == OP65_LDY && + CE_IsConstImm (L[3+K]) && + !CE_HasLabel (L[3+K]) && + CE_IsCallTo (L[4+K], "staspidx") && + !CE_HasLabel (L[4+K])) { - /* Store the low byte and remove the PHA instead */ - X = NewCodeEntry (OP65_STA, AM65_ZP, "ptr1", 0, L[0]->LI); - CS_InsertEntry (S, X, I+1); - CS_DelEntry (S, I+2); - /* Store the high byte */ - X = NewCodeEntry (OP65_STA, AM65_ZP, "ptr1+1", 0, L[4]->LI); - CS_InsertEntry (S, X, I+5); + const char* RegBank = 0; + const char* ZPLoc = "ptr1"; + CodeEntry* X; - /* Delete more transfer and PLA insns */ - CS_DelEntry (S, I+7); - CS_DelEntry (S, I+6); - /* Delete the call to ldauidx */ - CS_DelEntry (S, I+7); + /* Get the preceeding two instructions and check them. We check + * for: + * lda regbank+n + * ldx regbank+n+1 + */ + if (I > 1) { + CodeEntry* P[2]; + P[0] = CS_GetEntry (S, I-2); + P[1] = CS_GetEntry (S, I-1); + if (P[0]->OPC == OP65_LDA && + P[0]->AM == AM65_ZP && + P[1]->OPC == OP65_LDX && + P[1]->AM == AM65_ZP && + !CE_HasLabel (P[1]) && + strncmp (P[0]->Arg, "regbank+", 8) == 0) { + + unsigned Len = strlen (P[0]->Arg); + + if (strncmp (P[0]->Arg, P[1]->Arg, Len) == 0 && + P[1]->Arg[Len+0] == '+' && + P[1]->Arg[Len+1] == '1' && + P[1]->Arg[Len+2] == '\0') { + + /* Ok, found. Use the name of the register bank */ + RegBank = ZPLoc = P[0]->Arg; + } + } + } - /* Load high and low byte */ - X = NewCodeEntry (OP65_LDX, AM65_IMM, "$00", 0, L[6]->LI); - CS_InsertEntry (S, X, I+7); - X = NewCodeEntry (OP65_LDA, AM65_ZP_INDY, "ptr1", 0, L[6]->LI); - CS_InsertEntry (S, X, I+8); + /* Insert the load via the zp pointer */ + X = NewCodeEntry (OP65_LDX, AM65_IMM, "$00", 0, L[3]->LI); + CS_InsertEntry (S, X, I+3); + X = NewCodeEntry (OP65_LDA, AM65_ZP_INDY, ZPLoc, 0, L[2]->LI); + CS_InsertEntry (S, X, I+4); + + /* Insert the store through the zp pointer */ + X = NewCodeEntry (OP65_STA, AM65_ZP_INDY, ZPLoc, 0, L[3]->LI); + CS_InsertEntry (S, X, I+6+K); + + /* Delete the old code */ + CS_DelEntry (S, I+7+K); /* jsr spaspidx */ + CS_DelEntry (S, I+2); /* jsr ldauidx */ + + /* Create and insert the stores into the zp pointer if needed */ + if (RegBank == 0) { + X = NewCodeEntry (OP65_STA, AM65_ZP, "ptr1", 0, L[0]->LI); + CS_InsertEntry (S, X, I+1); + X = NewCodeEntry (OP65_STX, AM65_ZP, "ptr1+1", 0, L[0]->LI); + CS_InsertEntry (S, X, I+2); + } + + /* Delete more old code. Do it here to keep a label attached to + * entry I in place. + */ + CS_DelEntry (S, I); /* jsr pushax */ - /* Remember, we had changes */ - ++Changes; + /* Remember, we had changes */ + ++Changes; - } + } - /* Next entry */ - ++I; + /* Next entry */ + ++I; } @@ -667,7 +767,7 @@ static unsigned OptPtrLoad3 (CodeSeg* S) -static unsigned OptPtrLoad4 (CodeSeg* S) +static unsigned OptPtrStore2 (CodeSeg* S) /* Search for the sequence: * * lda #<(label+0) @@ -676,14 +776,18 @@ static unsigned OptPtrLoad4 (CodeSeg* S) * adc xxx * bcc L * inx - * L: ldy #$00 - * jsr ldauidx + * L: jsr pushax + * ldx #$00 + * lda yyy + * ldy #$00 + * jsr staspidx * * and replace it by: * * ldy xxx - * ldx #$00 - * lda label,y + * ldx #$00 + * lda yyy + * sta label,y */ { unsigned Changes = 0; @@ -692,7 +796,7 @@ static unsigned OptPtrLoad4 (CodeSeg* S) unsigned I = 0; while (I < CS_GetEntryCount (S)) { - CodeEntry* L[8]; + CodeEntry* L[11]; unsigned Len; /* Get next entry */ @@ -701,26 +805,24 @@ static unsigned OptPtrLoad4 (CodeSeg* S) /* Check for the sequence */ if (L[0]->OPC == OP65_LDA && L[0]->AM == AM65_IMM && - CS_GetEntries (S, L+1, I+1, 7) && + CS_GetEntries (S, L+1, I+1, 10) && L[1]->OPC == OP65_LDX && L[1]->AM == AM65_IMM && - !CE_HasLabel (L[1]) && L[2]->OPC == OP65_CLC && - !CE_HasLabel (L[2]) && L[3]->OPC == OP65_ADC && (L[3]->AM == AM65_ABS || L[3]->AM == AM65_ZP) && - !CE_HasLabel (L[3]) && (L[4]->OPC == OP65_BCC || L[4]->OPC == OP65_JCC) && L[4]->JumpTo != 0 && L[4]->JumpTo->Owner == L[6] && - !CE_HasLabel (L[4]) && L[5]->OPC == OP65_INX && - !CE_HasLabel (L[5]) && - L[6]->OPC == OP65_LDY && - CE_KnownImm (L[6]) && - L[6]->Num == 0 && - CE_IsCall (L[7], "ldauidx") && - !CE_HasLabel (L[7]) && + CE_IsCallTo (L[6], "pushax") && + L[7]->OPC == OP65_LDX && + L[8]->OPC == OP65_LDA && + L[9]->OPC == OP65_LDY && + CE_IsKnownImm (L[9], 0) && + CE_IsCallTo (L[10], "staspidx") && + !CS_RangeHasLabel (S, I+1, 5) && + !CS_RangeHasLabel (S, I+7, 4) && /* Check the label last because this is quite costly */ (Len = strlen (L[0]->Arg)) > 3 && L[0]->Arg[0] == '<' && @@ -736,19 +838,22 @@ static unsigned OptPtrLoad4 (CodeSeg* S) * we keep the line references. */ X = NewCodeEntry (OP65_LDY, L[3]->AM, L[3]->Arg, 0, L[0]->LI); - CS_InsertEntry (S, X, I+8); + CS_InsertEntry (S, X, I+11); + + X = NewCodeEntry (OP65_LDX, L[7]->AM, L[7]->Arg, 0, L[7]->LI); + CS_InsertEntry (S, X, I+12); - X = NewCodeEntry (OP65_LDX, AM65_IMM, "$00", 0, L[0]->LI); - CS_InsertEntry (S, X, I+9); + X = NewCodeEntry (OP65_LDA, L[8]->AM, L[8]->Arg, 0, L[8]->LI); + CS_InsertEntry (S, X, I+13); Label = memcpy (xmalloc (Len-2), L[0]->Arg+2, Len-3); Label[Len-3] = '\0'; - X = NewCodeEntry (OP65_LDA, AM65_ABSY, Label, 0, L[0]->LI); - CS_InsertEntry (S, X, I+10); + X = NewCodeEntry (OP65_STA, AM65_ABSY, Label, 0, L[10]->LI); + CS_InsertEntry (S, X, I+14); xfree (Label); /* Remove the old code */ - CS_DelEntries (S, I, 8); + CS_DelEntries (S, I, 11); /* Remember, we had changes */ ++Changes; @@ -766,26 +871,30 @@ static unsigned OptPtrLoad4 (CodeSeg* S) -static unsigned OptPtrLoad5 (CodeSeg* S) +static unsigned OptPtrStore3 (CodeSeg* S) /* Search for the sequence: * * lda #<(label+0) * ldx #>(label+0) - * ldy #$xx + * ldy aaa * clc * adc (sp),y * bcc L * inx - * L: ldy #$00 - * jsr ldauidx + * L: jsr pushax + * ldx #$00 + * lda yyy + * ldy #$00 + * jsr staspidx * * and replace it by: * - * ldy #$xx - * lda (sp),y + * ldy aaa + * ldx #$00 + * lda (sp),y * tay - * ldx #$00 - * lda label,y + * lda yyy + * sta label,y */ { unsigned Changes = 0; @@ -794,7 +903,7 @@ static unsigned OptPtrLoad5 (CodeSeg* S) unsigned I = 0; while (I < CS_GetEntryCount (S)) { - CodeEntry* L[9]; + CodeEntry* L[12]; unsigned Len; /* Get next entry */ @@ -803,29 +912,26 @@ static unsigned OptPtrLoad5 (CodeSeg* S) /* Check for the sequence */ if (L[0]->OPC == OP65_LDA && L[0]->AM == AM65_IMM && - CS_GetEntries (S, L+1, I+1, 8) && + CS_GetEntries (S, L+1, I+1, 11) && L[1]->OPC == OP65_LDX && L[1]->AM == AM65_IMM && - !CE_HasLabel (L[1]) && - L[2]->OPC == OP65_LDY && - CE_KnownImm (L[2]) && - !CE_HasLabel (L[2]) && - L[3]->OPC == OP65_CLC && - !CE_HasLabel (L[3]) && + L[2]->OPC == OP65_LDY && + L[3]->OPC == OP65_CLC && L[4]->OPC == OP65_ADC && - L[4]->AM == AM65_ZP_INDY && - !CE_HasLabel (L[4]) && + L[4]->AM == AM65_ZP_INDY && + strcmp (L[4]->Arg, "sp") == 0 && (L[5]->OPC == OP65_BCC || L[5]->OPC == OP65_JCC) && L[5]->JumpTo != 0 && L[5]->JumpTo->Owner == L[7] && - !CE_HasLabel (L[5]) && L[6]->OPC == OP65_INX && - !CE_HasLabel (L[6]) && - L[7]->OPC == OP65_LDY && - CE_KnownImm (L[7]) && - L[7]->Num == 0 && - CE_IsCall (L[8], "ldauidx") && - !CE_HasLabel (L[8]) && + CE_IsCallTo (L[7], "pushax") && + L[8]->OPC == OP65_LDX && + L[9]->OPC == OP65_LDA && + L[10]->OPC == OP65_LDY && + CE_IsKnownImm (L[10], 0) && + CE_IsCallTo (L[11], "staspidx") && + !CS_RangeHasLabel (S, I+1, 6) && + !CS_RangeHasLabel (S, I+8, 4) && /* Check the label last because this is quite costly */ (Len = strlen (L[0]->Arg)) > 3 && L[0]->Arg[0] == '<' && @@ -837,97 +943,32 @@ static unsigned OptPtrLoad5 (CodeSeg* S) CodeEntry* X; char* Label; - /* Add the lda */ - X = NewCodeEntry (OP65_LDA, AM65_ZP_INDY, L[4]->Arg, 0, L[0]->LI); - CS_InsertEntry (S, X, I+3); + /* We will create all the new stuff behind the current one so + * we keep the line references. + */ + X = NewCodeEntry (OP65_LDY, L[2]->AM, L[2]->Arg, 0, L[2]->LI); + CS_InsertEntry (S, X, I+12); + + X = NewCodeEntry (OP65_LDX, L[8]->AM, L[8]->Arg, 0, L[8]->LI); + CS_InsertEntry (S, X, I+13); - /* Add the tay */ - X = NewCodeEntry (OP65_TAY, AM65_IMP, 0, 0, L[0]->LI); - CS_InsertEntry (S, X, I+4); + X = NewCodeEntry (OP65_LDA, L[4]->AM, L[4]->Arg, 0, L[4]->LI); + CS_InsertEntry (S, X, I+14); - /* Add the ldx */ - X = NewCodeEntry (OP65_LDX, AM65_IMM, "$00", 0, L[0]->LI); - CS_InsertEntry (S, X, I+5); + X = NewCodeEntry (OP65_TAY, AM65_IMP, 0, 0, L[4]->LI); + CS_InsertEntry (S, X, I+15); + + X = NewCodeEntry (OP65_LDA, L[9]->AM, L[9]->Arg, 0, L[9]->LI); + CS_InsertEntry (S, X, I+16); - /* Add the lda */ Label = memcpy (xmalloc (Len-2), L[0]->Arg+2, Len-3); Label[Len-3] = '\0'; - X = NewCodeEntry (OP65_LDA, AM65_ABSY, Label, 0, L[0]->LI); - CS_InsertEntry (S, X, I+6); - xfree (Label); + X = NewCodeEntry (OP65_STA, AM65_ABSY, Label, 0, L[11]->LI); + CS_InsertEntry (S, X, I+17); + xfree (Label); /* Remove the old code */ - CS_DelEntries (S, I, 2); - CS_DelEntries (S, I+5, 6); - - /* Remember, we had changes */ - ++Changes; - - } - - /* Next entry */ - ++I; - - } - - /* Return the number of changes made */ - return Changes; -} - - - -static unsigned OptPtrLoad6 (CodeSeg* S) -/* Search for the sequence - * - * ldy ... - * jsr ldauidx - * - * and replace it by: - * - * ldy ... - * stx ptr1+1 - * sta ptr1 - * ldx #$00 - * lda (ptr1),y - * - * This step must be execute *after* OptPtrLoad1! - */ -{ - unsigned Changes = 0; - - /* Walk over the entries */ - unsigned I = 0; - while (I < CS_GetEntryCount (S)) { - - CodeEntry* L[2]; - - /* Get next entry */ - L[0] = CS_GetEntry (S, I); - - /* Check for the sequence */ - if (L[0]->OPC == OP65_LDY && - CS_GetEntries (S, L+1, I+1, 1) && - CE_IsCall (L[1], "ldauidx") && - !CE_HasLabel (L[1])) { - - CodeEntry* X; - - /* Store the high byte */ - X = NewCodeEntry (OP65_STA, AM65_ZP, "ptr1", 0, L[0]->LI); - CS_InsertEntry (S, X, I+1); - - /* Store the low byte */ - X = NewCodeEntry (OP65_STX, AM65_ZP, "ptr1+1", 0, L[0]->LI); - CS_InsertEntry (S, X, I+2); - - /* Delete the call to ldauidx */ - CS_DelEntry (S, I+3); - - /* Load the high and low byte */ - X = NewCodeEntry (OP65_LDX, AM65_IMM, "$00", 0, L[0]->LI); - CS_InsertEntry (S, X, I+3); - X = NewCodeEntry (OP65_LDA, AM65_ZP_INDY, "ptr1", 0, L[0]->LI); - CS_InsertEntry (S, X, I+4); + CS_DelEntries (S, I, 12); /* Remember, we had changes */ ++Changes; @@ -991,42 +1032,66 @@ static unsigned OptDecouple (CodeSeg* S) /* Check the instruction */ switch (E->OPC) { + case OP65_DEA: + if (RegValIsKnown (In->RegA)) { + Arg = MakeHexArg ((In->RegA - 1) & 0xFF); + X = NewCodeEntry (OP65_LDA, AM65_IMM, Arg, 0, E->LI); + } + break; + case OP65_DEX: - if (E->RI->In.RegX >= 0) { - Arg = MakeHexArg ((E->RI->In.RegX - 1) & 0xFF); + if (RegValIsKnown (In->RegX)) { + Arg = MakeHexArg ((In->RegX - 1) & 0xFF); X = NewCodeEntry (OP65_LDX, AM65_IMM, Arg, 0, E->LI); } break; case OP65_DEY: - if (E->RI->In.RegY >= 0) { - Arg = MakeHexArg ((E->RI->In.RegY - 1) & 0xFF); + if (RegValIsKnown (In->RegY)) { + Arg = MakeHexArg ((In->RegY - 1) & 0xFF); X = NewCodeEntry (OP65_LDY, AM65_IMM, Arg, 0, E->LI); } break; + case OP65_INA: + if (RegValIsKnown (In->RegA)) { + Arg = MakeHexArg ((In->RegA + 1) & 0xFF); + X = NewCodeEntry (OP65_LDA, AM65_IMM, Arg, 0, E->LI); + } + break; + case OP65_INX: - if (E->RI->In.RegX >= 0) { - Arg = MakeHexArg ((E->RI->In.RegX + 1) & 0xFF); + if (RegValIsKnown (In->RegX)) { + Arg = MakeHexArg ((In->RegX + 1) & 0xFF); X = NewCodeEntry (OP65_LDX, AM65_IMM, Arg, 0, E->LI); } break; case OP65_INY: - if (E->RI->In.RegY >= 0) { - Arg = MakeHexArg ((E->RI->In.RegY + 1) & 0xFF); + if (RegValIsKnown (In->RegY)) { + Arg = MakeHexArg ((In->RegY + 1) & 0xFF); X = NewCodeEntry (OP65_LDY, AM65_IMM, Arg, 0, E->LI); } break; case OP65_LDA: if (E->AM == AM65_ZP) { - switch (GetKnownReg (E->Use, In)) { + switch (GetKnownReg (E->Use & REG_ZP, In)) { case REG_TMP1: Arg = MakeHexArg (In->Tmp1); X = NewCodeEntry (OP65_LDA, AM65_IMM, Arg, 0, E->LI); break; + case REG_PTR1_LO: + Arg = MakeHexArg (In->Ptr1Lo); + X = NewCodeEntry (OP65_LDA, AM65_IMM, Arg, 0, E->LI); + break; + + case REG_PTR1_HI: + Arg = MakeHexArg (In->Ptr1Hi); + X = NewCodeEntry (OP65_LDA, AM65_IMM, Arg, 0, E->LI); + break; + case REG_SREG_LO: Arg = MakeHexArg (In->SRegLo); X = NewCodeEntry (OP65_LDA, AM65_IMM, Arg, 0, E->LI); @@ -1042,12 +1107,22 @@ static unsigned OptDecouple (CodeSeg* S) case OP65_LDX: if (E->AM == AM65_ZP) { - switch (GetKnownReg (E->Use, In)) { + switch (GetKnownReg (E->Use & REG_ZP, In)) { case REG_TMP1: Arg = MakeHexArg (In->Tmp1); X = NewCodeEntry (OP65_LDX, AM65_IMM, Arg, 0, E->LI); break; + case REG_PTR1_LO: + Arg = MakeHexArg (In->Ptr1Lo); + X = NewCodeEntry (OP65_LDX, AM65_IMM, Arg, 0, E->LI); + break; + + case REG_PTR1_HI: + Arg = MakeHexArg (In->Ptr1Hi); + X = NewCodeEntry (OP65_LDX, AM65_IMM, Arg, 0, E->LI); + break; + case REG_SREG_LO: Arg = MakeHexArg (In->SRegLo); X = NewCodeEntry (OP65_LDX, AM65_IMM, Arg, 0, E->LI); @@ -1069,6 +1144,16 @@ static unsigned OptDecouple (CodeSeg* S) X = NewCodeEntry (OP65_LDY, AM65_IMM, Arg, 0, E->LI); break; + case REG_PTR1_LO: + Arg = MakeHexArg (In->Ptr1Lo); + X = NewCodeEntry (OP65_LDY, AM65_IMM, Arg, 0, E->LI); + break; + + case REG_PTR1_HI: + Arg = MakeHexArg (In->Ptr1Hi); + X = NewCodeEntry (OP65_LDY, AM65_IMM, Arg, 0, E->LI); + break; + case REG_SREG_LO: Arg = MakeHexArg (In->SRegLo); X = NewCodeEntry (OP65_LDY, AM65_IMM, Arg, 0, E->LI); @@ -1105,7 +1190,7 @@ static unsigned OptDecouple (CodeSeg* S) case OP65_TYA: if (E->RI->In.RegY >= 0) { - Arg = MakeHexArg (In->RegY); + Arg = MakeHexArg (In->RegY); X = NewCodeEntry (OP65_LDA, AM65_IMM, Arg, 0, E->LI); } break; @@ -1118,7 +1203,7 @@ static unsigned OptDecouple (CodeSeg* S) /* Insert the replacement if we have one */ if (X) { - CS_InsertEntry (S, X, I+1); + CS_InsertEntry (S, X, I+1); CS_DelEntry (S, I); ++Changes; } @@ -1138,134 +1223,34 @@ static unsigned OptDecouple (CodeSeg* S) /*****************************************************************************/ -/* Size optimization */ +/* Optimize stack pointer ops */ /*****************************************************************************/ -#if 0 -static unsigned OptSize1 (CodeSeg* S) -/* Do size optimization by calling special subroutines that preload registers. - * This routine does not work standalone, it needs a following register load - * removal pass. +static unsigned IsDecSP (const CodeEntry* E) +/* Check if this is an insn that decrements the stack pointer. If so, return + * the decrement. If not, return zero. + * The function expects E to be a subroutine call. */ { - static const char* Func = { - "stax0sp", /* staxysp, y = 0 */ - "addeq0sp", - "ldax0sp", /* ldaxysp, y = 1 */ - "ldeax0sp", /* ldeaxysp, y = 3 */ - "push0", /* pushax, a = 0, x = 0 */ - "pusha0", /* pushax, x = 0 */ - "pushaFF", /* pushax, x = ff */ - "pusha0sp", /* pushaysp, y = 0 */ - "tosadda0", /* tosaddax, x = 0 */ - "tosanda0", /* tosandax, x = 0 */ - "tosdiva0", /* tosdivax, x = 0 */ - "toseqa0", /* toseqax, x = 0 */ - "tosgea0", /* tosgeax, x = 0 */ - "tosgta0", /* tosgtax, x = 0 */ - "tosadd0ax", /* tosaddeax, sreg = 0 */ - "laddeqa", /* laddeq, sreg = 0, x = 0 */ - "laddeq1", /* laddeq, sreg = 0, x = 0, a = 1 */ - "laddeq0sp", /* laddeqysp, y = 0 */ - "tosand0ax", /* tosandeax, sreg = 0 */ - "ldaxi", /* ldaxidx, y = 1 */ - "ldeaxi", /* ldeaxidx, y = 3 */ - "ldeax0sp", /* ldeaxysp, y = 3 */ - "tosdiv0ax", /* tosdiveax, sreg = 0 */ - "toslea0", /* tosleax, x = 0 */ - "tosmod0ax", /* tosmodeax, sreg = 0 */ - "tosmul0ax", /* tosmuleax, sreg = 0 */ - "tosumul0ax", /* tosumuleax, sreg = 0 */ - "tosor0ax", /* tosoreax, sreg = 0 */ - "push0ax", /* pusheax, sreg = 0 */ - "tosrsub0ax", /* tosrsubeax, sreg = 0 */ - "tosshl0ax", /* tosshleax, sreg = 0 */ - "tosasl0ax", /* tosasleax, sreg = 0 */ - "tosshr0ax", /* tosshreax, sreg = 0 */ - "tosasr0ax", /* tosasreax, sreg = 0 */ - "tossub0ax", /* tossubeax, sreg = 0 */ - "lsubeqa", /* lsubeq, sreg = 0, x = 0 */ - "lsubeq1", /* lsubeq, sreg = 0, x = 0, a = 1 */ - "lsubeq0sp", /* lsubeqysp, y = 0 */ - "toslta0", /* tosltax, x = 0 */ - "tosudiv0ax", /* tosudiveax, sreg = 0 */ - "tosumod0ax", /* tosumodeax, sreg = 0 */ - "tosxor0ax", /* tosxoreax, sreg = 0 */ - "tosmoda0", /* tosmodax, x = 0 */ - "tosmula0", /* tosmulax, x = 0 */ - "tosumula0", /* tosumulax, x = 0 */ - "tosnea0", /* tosneax, x = 0 */ - "tosora0", /* tosorax, x = 0 */ - "push1", /* pushax, x = 0, a = 1 */ - "push2", /* pushax, x = 0, a = 2 */ - "push3", /* pushax, x = 0, a = 3 */ - "push4", /* pushax, x = 0, a = 4 */ - "push5", /* pushax, x = 0, a = 5 */ - "push6", /* pushax, x = 0, a = 6 */ - "push7", /* pushax, x = 0, a = 7 */ - "pushc0", /* pusha, a = 0 */ - "pushc1", /* pusha, a = 1 */ - "pushc2", /* pusha, a = 2 */ - "tosrsuba0", /* tosrsubax, x = 0 */ - "tosshla0", /* tosshlax, x = 0 */ - "tosasla0", /* tosaslax, x = 0 */ - "tosshra0", /* tosshrax, x = 0 */ - "tosasra0", /* tosasrax, x = 0 */ - "steax0sp", /* steaxsp, y = 0 */ - "tossuba0", /* tossubax, x = 0 */ - "subeq0sp", /* subeqysp, y = 0 */ - "tosudiva0", /* tosudivax, x = 0 */ - "tosugea0", /* tosugeax, x = 0 */ - "tosugta0", /* tosugtax, x = 0 */ - "tosulea0", /* tosuleax, x = 0 */ - "tosulta0", /* tosultax, x = 0 */ - "tosumoda0", /* tosumodax, x = 0 */ - "tosxora0", /* tosxorax, x = 0 */ - }; - - unsigned Changes = 0; - unsigned I; - - /* Generate register info for the following step */ - CS_GenRegInfo (S); - - /* Walk over the entries */ - I = 0; - while (I < CS_GetEntryCount (S)) { - - /* Get next entry */ - CodeEntry* E = CS_GetEntry (S, I); - - /* Check if it's a subroutine call */ - if (E->OPC == OP65_JSR) { - - /* Check for any of the known functions */ - - - - } - - /* Next entry */ - ++I; - + if (strncmp (E->Arg, "decsp", 5) == 0) { + if (E->Arg[5] >= '1' && E->Arg[5] <= '8') { + return (E->Arg[5] - '0'); + } + } else if (strcmp (E->Arg, "subysp") == 0 && RegValIsKnown (E->RI->In.RegY)) { + return E->RI->In.RegY; } - /* Free register info */ - CS_FreeRegInfo (S); - - /* Return the number of changes made */ - return Changes; + /* If we come here, it's not a decsp op */ + return 0; } -#endif -static unsigned OptSize2 (CodeSeg* S) -/* Do size optimization by using shorter code sequences, even if this - * introduces relations between instructions. This step must be one of the - * last steps, because it makes further work much more difficult. +static unsigned OptStackPtrOps (CodeSeg* S) +/* Merge adjacent calls to decsp into one. NOTE: This function won't merge all + * known cases! */ { unsigned Changes = 0; @@ -1278,74 +1263,53 @@ static unsigned OptSize2 (CodeSeg* S) I = 0; while (I < CS_GetEntryCount (S)) { - - /* Get next entry */ - CodeEntry* E = CS_GetEntry (S, I); - - /* Assume we have no replacement */ - CodeEntry* X = 0; - - /* Check the instruction */ - switch (E->OPC) { - - case OP65_LDA: - if (CE_KnownImm (E)) { - short Val = (short) E->Num; - if (Val == E->RI->In.RegX) { - X = NewCodeEntry (OP65_TXA, AM65_IMP, 0, 0, E->LI); - } else if (Val == E->RI->In.RegY) { - X = NewCodeEntry (OP65_TYA, AM65_IMP, 0, 0, E->LI); - } else if (E->RI->In.RegA >= 0 && CPU >= CPU_65C02) { - if (Val == ((E->RI->In.RegA - 1) & 0xFF)) { - X = NewCodeEntry (OP65_DEA, AM65_IMP, 0, 0, E->LI); - } else if (Val == ((E->RI->In.RegA + 1) & 0xFF)) { - X = NewCodeEntry (OP65_INA, AM65_IMP, 0, 0, E->LI); - } - } - } - break; - - case OP65_LDX: - if (CE_KnownImm (E)) { - short Val = (short) E->Num; - if (E->RI->In.RegX >= 0 && Val == ((E->RI->In.RegX - 1) & 0xFF)) { - X = NewCodeEntry (OP65_DEX, AM65_IMP, 0, 0, E->LI); - } else if (E->RI->In.RegX >= 0 && Val == ((E->RI->In.RegX + 1) & 0xFF)) { - X = NewCodeEntry (OP65_INX, AM65_IMP, 0, 0, E->LI); - } else if (Val == E->RI->In.RegA) { - X = NewCodeEntry (OP65_TAX, AM65_IMP, 0, 0, E->LI); - } - } - break; - - case OP65_LDY: - if (CE_KnownImm (E)) { - short Val = (short) E->Num; - if (E->RI->In.RegY >= 0 && Val == ((E->RI->In.RegY - 1) & 0xFF)) { - X = NewCodeEntry (OP65_DEY, AM65_IMP, 0, 0, E->LI); - } else if (E->RI->In.RegY >= 0 && Val == ((E->RI->In.RegY + 1) & 0xFF)) { - X = NewCodeEntry (OP65_INY, AM65_IMP, 0, 0, E->LI); - } else if (Val == E->RI->In.RegA) { - X = NewCodeEntry (OP65_TAY, AM65_IMP, 0, 0, E->LI); - } - } - break; - - default: - /* Avoid gcc warnings */ - break; - - } - - /* Insert the replacement if we have one */ - if (X) { - CS_InsertEntry (S, X, I+1); - CS_DelEntry (S, I); + unsigned Dec1; + unsigned Dec2; + const CodeEntry* N; + + /* Get the next entry */ + const CodeEntry* E = CS_GetEntry (S, I); + + /* Check for decspn or subysp */ + if (E->OPC == OP65_JSR && + (Dec1 = IsDecSP (E)) > 0 && + (N = CS_GetNextEntry (S, I)) != 0 && + (Dec2 = IsDecSP (N)) > 0 && + (Dec1 += Dec2) <= 255 && + !CE_HasLabel (N)) { + + CodeEntry* X; + char Buf[20]; + + /* We can combine the two */ + if (Dec1 <= 8) { + /* Insert a call to decsp */ + xsprintf (Buf, sizeof (Buf), "decsp%u", Dec1); + X = NewCodeEntry (OP65_JSR, AM65_ABS, Buf, 0, N->LI); + CS_InsertEntry (S, X, I+2); + } else { + /* Insert a call to subysp */ + const char* Arg = MakeHexArg (Dec1); + X = NewCodeEntry (OP65_LDY, AM65_IMM, Arg, 0, N->LI); + CS_InsertEntry (S, X, I+2); + X = NewCodeEntry (OP65_JSR, AM65_ABS, "subysp", 0, N->LI); + CS_InsertEntry (S, X, I+3); + } + + /* Delete the old code */ + CS_DelEntries (S, I, 2); + + /* Regenerate register info */ + CS_GenRegInfo (S); + + /* Remember we had changes */ ++Changes; - } - /* Next entry */ - ++I; + } else { + + /* Next entry */ + ++I; + } } @@ -1388,25 +1352,37 @@ struct OptFunc { static OptFunc DOpt65C02BitOps = { Opt65C02BitOps, "Opt65C02BitOps", 66, 0, 0, 0, 0, 0 }; static OptFunc DOpt65C02Ind = { Opt65C02Ind, "Opt65C02Ind", 100, 0, 0, 0, 0, 0 }; static OptFunc DOpt65C02Stores = { Opt65C02Stores, "Opt65C02Stores", 100, 0, 0, 0, 0, 0 }; -static OptFunc DOptAdd1 = { OptAdd1, "OptAdd1", 60, 0, 0, 0, 0, 0 }; +static OptFunc DOptAdd1 = { OptAdd1, "OptAdd1", 125, 0, 0, 0, 0, 0 }; static OptFunc DOptAdd2 = { OptAdd2, "OptAdd2", 200, 0, 0, 0, 0, 0 }; -static OptFunc DOptAdd3 = { OptAdd3, "OptAdd3", 40, 0, 0, 0, 0, 0 }; +static OptFunc DOptAdd3 = { OptAdd3, "OptAdd3", 65, 0, 0, 0, 0, 0 }; +static OptFunc DOptAdd4 = { OptAdd4, "OptAdd4", 90, 0, 0, 0, 0, 0 }; +static OptFunc DOptAdd5 = { OptAdd5, "OptAdd5", 100, 0, 0, 0, 0, 0 }; +static OptFunc DOptAdd6 = { OptAdd6, "OptAdd6", 40, 0, 0, 0, 0, 0 }; static OptFunc DOptBoolTrans = { OptBoolTrans, "OptBoolTrans", 100, 0, 0, 0, 0, 0 }; static OptFunc DOptBranchDist = { OptBranchDist, "OptBranchDist", 0, 0, 0, 0, 0, 0 }; -static OptFunc DOptCmp1 = { OptCmp1, "OptCmp1", 85, 0, 0, 0, 0, 0 }; -static OptFunc DOptCmp2 = { OptCmp2, "OptCmp2", 75, 0, 0, 0, 0, 0 }; +static OptFunc DOptCmp1 = { OptCmp1, "OptCmp1", 42, 0, 0, 0, 0, 0 }; +static OptFunc DOptCmp2 = { OptCmp2, "OptCmp2", 85, 0, 0, 0, 0, 0 }; static OptFunc DOptCmp3 = { OptCmp3, "OptCmp3", 75, 0, 0, 0, 0, 0 }; -static OptFunc DOptCmp4 = { OptCmp4, "OptCmp4", 100, 0, 0, 0, 0, 0 }; +static OptFunc DOptCmp4 = { OptCmp4, "OptCmp4", 75, 0, 0, 0, 0, 0 }; static OptFunc DOptCmp5 = { OptCmp5, "OptCmp5", 100, 0, 0, 0, 0, 0 }; -static OptFunc DOptCmp6 = { OptCmp6, "OptCmp6", 85, 0, 0, 0, 0, 0 }; -static OptFunc DOptCmp7 = { OptCmp7, "OptCmp7", 50, 0, 0, 0, 0, 0 }; -static OptFunc DOptCondBranches = { OptCondBranches, "OptCondBranches", 80, 0, 0, 0, 0, 0 }; +static OptFunc DOptCmp6 = { OptCmp6, "OptCmp6", 100, 0, 0, 0, 0, 0 }; +static OptFunc DOptCmp7 = { OptCmp7, "OptCmp7", 85, 0, 0, 0, 0, 0 }; +static OptFunc DOptCmp8 = { OptCmp8, "OptCmp8", 50, 0, 0, 0, 0, 0 }; +static OptFunc DOptCmp9 = { OptCmp9, "OptCmp9", 85, 0, 0, 0, 0, 0 }; +static OptFunc DOptCondBranches1= { OptCondBranches1,"OptCondBranches1", 80, 0, 0, 0, 0, 0 }; +static OptFunc DOptCondBranches2= { OptCondBranches2,"OptCondBranches2", 0, 0, 0, 0, 0, 0 }; static OptFunc DOptDeadCode = { OptDeadCode, "OptDeadCode", 100, 0, 0, 0, 0, 0 }; static OptFunc DOptDeadJumps = { OptDeadJumps, "OptDeadJumps", 100, 0, 0, 0, 0, 0 }; static OptFunc DOptDecouple = { OptDecouple, "OptDecouple", 100, 0, 0, 0, 0, 0 }; static OptFunc DOptDupLoads = { OptDupLoads, "OptDupLoads", 0, 0, 0, 0, 0, 0 }; +static OptFunc DOptIndLoads1 = { OptIndLoads1, "OptIndLoads1", 0, 0, 0, 0, 0, 0 }; +static OptFunc DOptIndLoads2 = { OptIndLoads2, "OptIndLoads2", 0, 0, 0, 0, 0, 0 }; static OptFunc DOptJumpCascades = { OptJumpCascades, "OptJumpCascades", 100, 0, 0, 0, 0, 0 }; -static OptFunc DOptJumpTarget = { OptJumpTarget, "OptJumpTarget", 100, 0, 0, 0, 0, 0 }; +static OptFunc DOptJumpTarget1 = { OptJumpTarget1, "OptJumpTarget1", 100, 0, 0, 0, 0, 0 }; +static OptFunc DOptJumpTarget2 = { OptJumpTarget2, "OptJumpTarget2", 100, 0, 0, 0, 0, 0 }; +static OptFunc DOptJumpTarget3 = { OptJumpTarget3, "OptJumpTarget3", 100, 0, 0, 0, 0, 0 }; +static OptFunc DOptLoad1 = { OptLoad1, "OptLoad1", 100, 0, 0, 0, 0, 0 }; +static OptFunc DOptLoad2 = { OptLoad2, "OptLoad2", 200, 0, 0, 0, 0, 0 }; static OptFunc DOptRTS = { OptRTS, "OptRTS", 100, 0, 0, 0, 0, 0 }; static OptFunc DOptRTSJumps1 = { OptRTSJumps1, "OptRTSJumps1", 100, 0, 0, 0, 0, 0 }; static OptFunc DOptRTSJumps2 = { OptRTSJumps2, "OptRTSJumps2", 100, 0, 0, 0, 0, 0 }; @@ -1416,26 +1392,50 @@ static OptFunc DOptNegAX1 = { OptNegAX1, "OptNegAX1", 100, 0, static OptFunc DOptNegAX2 = { OptNegAX2, "OptNegAX2", 100, 0, 0, 0, 0, 0 }; static OptFunc DOptNegAX3 = { OptNegAX3, "OptNegAX3", 100, 0, 0, 0, 0, 0 }; static OptFunc DOptNegAX4 = { OptNegAX4, "OptNegAX4", 100, 0, 0, 0, 0, 0 }; +static OptFunc DOptPrecalc = { OptPrecalc, "OptPrecalc", 100, 0, 0, 0, 0, 0 }; static OptFunc DOptPtrLoad1 = { OptPtrLoad1, "OptPtrLoad1", 100, 0, 0, 0, 0, 0 }; static OptFunc DOptPtrLoad2 = { OptPtrLoad2, "OptPtrLoad2", 100, 0, 0, 0, 0, 0 }; static OptFunc DOptPtrLoad3 = { OptPtrLoad3, "OptPtrLoad3", 100, 0, 0, 0, 0, 0 }; static OptFunc DOptPtrLoad4 = { OptPtrLoad4, "OptPtrLoad4", 100, 0, 0, 0, 0, 0 }; -static OptFunc DOptPtrLoad5 = { OptPtrLoad5, "OptPtrLoad5", 100, 0, 0, 0, 0, 0 }; -static OptFunc DOptPtrLoad6 = { OptPtrLoad6, "OptPtrLoad6", 100, 0, 0, 0, 0, 0 }; +static OptFunc DOptPtrLoad5 = { OptPtrLoad5, "OptPtrLoad5", 50, 0, 0, 0, 0, 0 }; +static OptFunc DOptPtrLoad6 = { OptPtrLoad6, "OptPtrLoad6", 60, 0, 0, 0, 0, 0 }; +static OptFunc DOptPtrLoad7 = { OptPtrLoad7, "OptPtrLoad7", 140, 0, 0, 0, 0, 0 }; +static OptFunc DOptPtrLoad11 = { OptPtrLoad11, "OptPtrLoad11", 92, 0, 0, 0, 0, 0 }; +static OptFunc DOptPtrLoad12 = { OptPtrLoad12, "OptPtrLoad12", 50, 0, 0, 0, 0, 0 }; +static OptFunc DOptPtrLoad13 = { OptPtrLoad13, "OptPtrLoad13", 65, 0, 0, 0, 0, 0 }; +static OptFunc DOptPtrLoad14 = { OptPtrLoad14, "OptPtrLoad14", 108, 0, 0, 0, 0, 0 }; +static OptFunc DOptPtrLoad15 = { OptPtrLoad15, "OptPtrLoad15", 86, 0, 0, 0, 0, 0 }; +static OptFunc DOptPtrLoad16 = { OptPtrLoad16, "OptPtrLoad16", 100, 0, 0, 0, 0, 0 }; +static OptFunc DOptPtrLoad17 = { OptPtrLoad17, "OptPtrLoad17", 190, 0, 0, 0, 0, 0 }; static OptFunc DOptPtrStore1 = { OptPtrStore1, "OptPtrStore1", 100, 0, 0, 0, 0, 0 }; -static OptFunc DOptPtrStore2 = { OptPtrStore2, "OptPtrStore2", 100, 0, 0, 0, 0, 0 }; +static OptFunc DOptPtrStore2 = { OptPtrStore2, "OptPtrStore2", 40, 0, 0, 0, 0, 0 }; +static OptFunc DOptPtrStore3 = { OptPtrStore3, "OptPtrStore3", 50, 0, 0, 0, 0, 0 }; static OptFunc DOptPush1 = { OptPush1, "OptPush1", 65, 0, 0, 0, 0, 0 }; +static OptFunc DOptPush2 = { OptPush2, "OptPush2", 50, 0, 0, 0, 0, 0 }; static OptFunc DOptPushPop = { OptPushPop, "OptPushPop", 0, 0, 0, 0, 0, 0 }; static OptFunc DOptShift1 = { OptShift1, "OptShift1", 100, 0, 0, 0, 0, 0 }; static OptFunc DOptShift2 = { OptShift2, "OptShift2", 100, 0, 0, 0, 0, 0 }; -/*static OptFunc DOptSize1 = { OptSize1, "OptSize1", 100, 0, 0, 0, 0, 0 };*/ +static OptFunc DOptShift3 = { OptShift3, "OptShift3", 110, 0, 0, 0, 0, 0 }; +static OptFunc DOptShift4 = { OptShift4, "OptShift4", 200, 0, 0, 0, 0, 0 }; +static OptFunc DOptSize1 = { OptSize1, "OptSize1", 100, 0, 0, 0, 0, 0 }; static OptFunc DOptSize2 = { OptSize2, "OptSize2", 100, 0, 0, 0, 0, 0 }; static OptFunc DOptStackOps = { OptStackOps, "OptStackOps", 100, 0, 0, 0, 0, 0 }; +static OptFunc DOptStackPtrOps = { OptStackPtrOps, "OptStackPtrOps", 50, 0, 0, 0, 0, 0 }; +static OptFunc DOptStore1 = { OptStore1, "OptStore1", 70, 0, 0, 0, 0, 0 }; +static OptFunc DOptStore2 = { OptStore2, "OptStore2", 115, 0, 0, 0, 0, 0 }; +static OptFunc DOptStore3 = { OptStore3, "OptStore3", 120, 0, 0, 0, 0, 0 }; +static OptFunc DOptStore4 = { OptStore4, "OptStore4", 50, 0, 0, 0, 0, 0 }; +static OptFunc DOptStore5 = { OptStore5, "OptStore5", 100, 0, 0, 0, 0, 0 }; static OptFunc DOptStoreLoad = { OptStoreLoad, "OptStoreLoad", 0, 0, 0, 0, 0, 0 }; static OptFunc DOptSub1 = { OptSub1, "OptSub1", 100, 0, 0, 0, 0, 0 }; static OptFunc DOptSub2 = { OptSub2, "OptSub2", 100, 0, 0, 0, 0, 0 }; -static OptFunc DOptTest1 = { OptTest1, "OptTest1", 100, 0, 0, 0, 0, 0 }; -static OptFunc DOptTransfers = { OptTransfers, "OptTransfers", 0, 0, 0, 0, 0, 0 }; +static OptFunc DOptSub3 = { OptSub3, "OptSub3", 100, 0, 0, 0, 0, 0 }; +static OptFunc DOptTest1 = { OptTest1, "OptTest1", 65, 0, 0, 0, 0, 0 }; +static OptFunc DOptTest2 = { OptTest2, "OptTest2", 50, 0, 0, 0, 0, 0 }; +static OptFunc DOptTransfers1 = { OptTransfers1, "OptTransfers1", 0, 0, 0, 0, 0, 0 }; +static OptFunc DOptTransfers2 = { OptTransfers2, "OptTransfers2", 60, 0, 0, 0, 0, 0 }; +static OptFunc DOptTransfers3 = { OptTransfers3, "OptTransfers3", 65, 0, 0, 0, 0, 0 }; +static OptFunc DOptTransfers4 = { OptTransfers4, "OptTransfers4", 65, 0, 0, 0, 0, 0 }; static OptFunc DOptUnusedLoads = { OptUnusedLoads, "OptUnusedLoads", 0, 0, 0, 0, 0, 0 }; static OptFunc DOptUnusedStores = { OptUnusedStores, "OptUnusedStores", 0, 0, 0, 0, 0, 0 }; @@ -1448,6 +1448,9 @@ static OptFunc* OptFuncs[] = { &DOptAdd1, &DOptAdd2, &DOptAdd3, + &DOptAdd4, + &DOptAdd5, + &DOptAdd6, &DOptBoolTrans, &DOptBranchDist, &DOptCmp1, @@ -1457,42 +1460,75 @@ static OptFunc* OptFuncs[] = { &DOptCmp5, &DOptCmp6, &DOptCmp7, - &DOptCondBranches, + &DOptCmp8, + &DOptCmp9, + &DOptCondBranches1, + &DOptCondBranches2, &DOptDeadCode, &DOptDeadJumps, &DOptDecouple, &DOptDupLoads, + &DOptIndLoads1, + &DOptIndLoads2, &DOptJumpCascades, - &DOptJumpTarget, + &DOptJumpTarget1, + &DOptJumpTarget2, + &DOptJumpTarget3, + &DOptLoad1, + &DOptLoad2, &DOptNegA1, &DOptNegA2, &DOptNegAX1, &DOptNegAX2, &DOptNegAX3, &DOptNegAX4, + &DOptPrecalc, &DOptPtrLoad1, + &DOptPtrLoad11, + &DOptPtrLoad12, + &DOptPtrLoad13, + &DOptPtrLoad14, + &DOptPtrLoad15, + &DOptPtrLoad16, + &DOptPtrLoad17, &DOptPtrLoad2, &DOptPtrLoad3, &DOptPtrLoad4, &DOptPtrLoad5, &DOptPtrLoad6, + &DOptPtrLoad7, &DOptPtrStore1, &DOptPtrStore2, - &DOptPush1, + &DOptPtrStore3, + &DOptPush1, + &DOptPush2, &DOptPushPop, &DOptRTS, &DOptRTSJumps1, &DOptRTSJumps2, &DOptShift1, &DOptShift2, - /*&DOptSize1,*/ + &DOptShift3, + &DOptShift4, + &DOptSize1, &DOptSize2, &DOptStackOps, + &DOptStackPtrOps, + &DOptStore1, + &DOptStore2, + &DOptStore3, + &DOptStore4, + &DOptStore5, &DOptStoreLoad, &DOptSub1, &DOptSub2, + &DOptSub3, &DOptTest1, - &DOptTransfers, + &DOptTest2, + &DOptTransfers1, + &DOptTransfers2, + &DOptTransfers3, + &DOptTransfers4, &DOptUnusedLoads, &DOptUnusedStores, }; @@ -1663,19 +1699,19 @@ static void WriteOptStats (const char* Name) /* Write a header */ fprintf (F, - "; Optimizer Total Last Total Last\n" - "; Step Runs Runs Chg Chg\n"); + "; Optimizer Total Last Total Last\n" + "; Step Runs Runs Chg Chg\n"); /* Write the data */ for (I = 0; I < OPTFUNC_COUNT; ++I) { const OptFunc* O = OptFuncs[I]; fprintf (F, - "%-20s %6lu %6lu %6lu %6lu\n", + "%-20s %10lu %10lu %10lu %10lu\n", O->Name, O->TotalRuns, O->LastRuns, - O->TotalChanges, + O->TotalChanges, O->LastChanges); } @@ -1693,7 +1729,7 @@ static unsigned RunOptFunc (CodeSeg* S, OptFunc* F, unsigned Max) /* Don't run the function if it is disabled or if it is prohibited by the * code size factor */ - if (F->Disabled || CodeSizeFactor < F->CodeSizeFactor) { + if (F->Disabled || F->CodeSizeFactor > S->CodeSizeFactor) { return 0; } @@ -1728,21 +1764,46 @@ static unsigned RunOptGroup1 (CodeSeg* S) { unsigned Changes = 0; + Changes += RunOptFunc (S, &DOptStackPtrOps, 5); Changes += RunOptFunc (S, &DOptPtrStore1, 1); Changes += RunOptFunc (S, &DOptPtrStore2, 1); + Changes += RunOptFunc (S, &DOptPtrStore3, 1); + Changes += RunOptFunc (S, &DOptAdd3, 1); /* Before OptPtrLoad5! */ Changes += RunOptFunc (S, &DOptPtrLoad1, 1); Changes += RunOptFunc (S, &DOptPtrLoad2, 1); Changes += RunOptFunc (S, &DOptPtrLoad3, 1); Changes += RunOptFunc (S, &DOptPtrLoad4, 1); Changes += RunOptFunc (S, &DOptPtrLoad5, 1); + Changes += RunOptFunc (S, &DOptPtrLoad6, 1); + Changes += RunOptFunc (S, &DOptPtrLoad7, 1); + Changes += RunOptFunc (S, &DOptPtrLoad11, 1); + Changes += RunOptFunc (S, &DOptPtrLoad12, 1); + Changes += RunOptFunc (S, &DOptPtrLoad13, 1); + Changes += RunOptFunc (S, &DOptPtrLoad14, 1); + Changes += RunOptFunc (S, &DOptPtrLoad15, 1); + Changes += RunOptFunc (S, &DOptPtrLoad16, 1); + Changes += RunOptFunc (S, &DOptPtrLoad17, 1); Changes += RunOptFunc (S, &DOptNegAX1, 1); Changes += RunOptFunc (S, &DOptNegAX2, 1); Changes += RunOptFunc (S, &DOptNegAX3, 1); Changes += RunOptFunc (S, &DOptNegAX4, 1); Changes += RunOptFunc (S, &DOptAdd1, 1); Changes += RunOptFunc (S, &DOptAdd2, 1); + Changes += RunOptFunc (S, &DOptAdd4, 1); + Changes += RunOptFunc (S, &DOptAdd5, 1); + Changes += RunOptFunc (S, &DOptAdd6, 1); + Changes += RunOptFunc (S, &DOptAdd6, 1); + Changes += RunOptFunc (S, &DOptSub1, 1); + Changes += RunOptFunc (S, &DOptSub3, 1); + Changes += RunOptFunc (S, &DOptStore4, 1); + Changes += RunOptFunc (S, &DOptStore5, 1); Changes += RunOptFunc (S, &DOptShift1, 1); Changes += RunOptFunc (S, &DOptShift2, 1); + Changes += RunOptFunc (S, &DOptShift3, 1); + Changes += RunOptFunc (S, &DOptShift4, 1); + Changes += RunOptFunc (S, &DOptStore1, 1); + Changes += RunOptFunc (S, &DOptStore2, 5); + Changes += RunOptFunc (S, &DOptStore3, 5); /* Return the number of changes */ return Changes; @@ -1779,21 +1840,24 @@ static unsigned RunOptGroup3 (CodeSeg* S) do { C = 0; - C += RunOptFunc (S, &DOptPtrLoad6, 1); C += RunOptFunc (S, &DOptNegA1, 1); C += RunOptFunc (S, &DOptNegA2, 1); + C += RunOptFunc (S, &DOptStackOps, 1); C += RunOptFunc (S, &DOptSub1, 1); C += RunOptFunc (S, &DOptSub2, 1); - C += RunOptFunc (S, &DOptAdd3, 1); - C += RunOptFunc (S, &DOptStackOps, 1); + C += RunOptFunc (S, &DOptSub3, 1); + C += RunOptFunc (S, &DOptAdd5, 1); + C += RunOptFunc (S, &DOptAdd6, 1); C += RunOptFunc (S, &DOptJumpCascades, 1); C += RunOptFunc (S, &DOptDeadJumps, 1); C += RunOptFunc (S, &DOptRTS, 1); C += RunOptFunc (S, &DOptDeadCode, 1); - C += RunOptFunc (S, &DOptJumpTarget, 1); - C += RunOptFunc (S, &DOptCondBranches, 1); - C += RunOptFunc (S, &DOptRTSJumps1, 1); C += RunOptFunc (S, &DOptBoolTrans, 1); + C += RunOptFunc (S, &DOptJumpTarget1, 1); + C += RunOptFunc (S, &DOptJumpTarget2, 1); + C += RunOptFunc (S, &DOptCondBranches1, 1); + C += RunOptFunc (S, &DOptCondBranches2, 1); + C += RunOptFunc (S, &DOptRTSJumps1, 1); C += RunOptFunc (S, &DOptCmp1, 1); C += RunOptFunc (S, &DOptCmp2, 1); C += RunOptFunc (S, &DOptCmp3, 1); @@ -1801,13 +1865,22 @@ static unsigned RunOptGroup3 (CodeSeg* S) C += RunOptFunc (S, &DOptCmp5, 1); C += RunOptFunc (S, &DOptCmp6, 1); C += RunOptFunc (S, &DOptCmp7, 1); + C += RunOptFunc (S, &DOptCmp8, 1); + C += RunOptFunc (S, &DOptCmp9, 1); C += RunOptFunc (S, &DOptTest1, 1); + C += RunOptFunc (S, &DOptLoad1, 1); + C += RunOptFunc (S, &DOptJumpTarget3, 1); /* After OptCondBranches2 */ C += RunOptFunc (S, &DOptUnusedLoads, 1); C += RunOptFunc (S, &DOptUnusedStores, 1); C += RunOptFunc (S, &DOptDupLoads, 1); C += RunOptFunc (S, &DOptStoreLoad, 1); - C += RunOptFunc (S, &DOptTransfers, 1); + C += RunOptFunc (S, &DOptTransfers1, 1); + C += RunOptFunc (S, &DOptTransfers3, 1); + C += RunOptFunc (S, &DOptTransfers4, 1); + C += RunOptFunc (S, &DOptStore1, 1); + C += RunOptFunc (S, &DOptStore5, 1); C += RunOptFunc (S, &DOptPushPop, 1); + C += RunOptFunc (S, &DOptPrecalc, 1); Changes += C; @@ -1820,11 +1893,32 @@ static unsigned RunOptGroup3 (CodeSeg* S) static unsigned RunOptGroup4 (CodeSeg* S) +/* Run another round of pattern replacements. These are done late, since there + * may be better replacements before. + */ +{ + unsigned Changes = 0; + + /* Repeat some of the steps here */ + Changes += RunOptFunc (S, &DOptPush1, 1); + Changes += RunOptFunc (S, &DOptPush2, 1); + Changes += RunOptFunc (S, &DOptUnusedLoads, 1); + Changes += RunOptFunc (S, &DOptTest2, 1); + Changes += RunOptFunc (S, &DOptTransfers2, 1); + Changes += RunOptFunc (S, &DOptLoad2, 1); + + /* Return the number of changes */ + return Changes; +} + + + +static unsigned RunOptGroup5 (CodeSeg* S) /* 65C02 specific optimizations. */ { unsigned Changes = 0; - if (CPU >= CPU_65C02) { + if (CPUIsets[CPU] & CPU_ISET_65SC02) { Changes += RunOptFunc (S, &DOpt65C02BitOps, 1); Changes += RunOptFunc (S, &DOpt65C02Ind, 1); Changes += RunOptFunc (S, &DOpt65C02Stores, 1); @@ -1843,14 +1937,25 @@ static unsigned RunOptGroup4 (CodeSeg* S) -static unsigned RunOptGroup5 (CodeSeg* S) -/* Run another round of pattern replacements. These are done late, since there - * may be better replacements before. +static unsigned RunOptGroup6 (CodeSeg* S) +/* This one is quite special. It tries to replace "lda (sp),y" by "lda (sp,x)". + * The latter is ony cycle slower, but if we're able to remove the necessary + * load of the Y register, because X is zero anyway, we gain 1 cycle and + * shorten the code by one (transfer) or two bytes (load). So what we do is + * to replace the insns, remove unused loads, and then change back all insns + * where Y is still zero (meaning that the load has not been removed). */ { unsigned Changes = 0; - Changes += RunOptFunc (S, &DOptPush1, 1); + /* This group will only run for a standard 6502, because the 65C02 has a + * better addressing mode that covers this case. + */ + if ((CPUIsets[CPU] & CPU_ISET_65SC02) == 0) { + Changes += RunOptFunc (S, &DOptIndLoads1, 1); + Changes += RunOptFunc (S, &DOptUnusedLoads, 1); + Changes += RunOptFunc (S, &DOptIndLoads2, 1); + } /* Return the number of changes */ return Changes; @@ -1858,7 +1963,7 @@ static unsigned RunOptGroup5 (CodeSeg* S) -static unsigned RunOptGroup6 (CodeSeg* S) +static unsigned RunOptGroup7 (CodeSeg* S) /* The last group of optimization steps. Adjust branches, do size optimizations. */ { @@ -1869,12 +1974,29 @@ static unsigned RunOptGroup6 (CodeSeg* S) * if this does hinder further optimizations (no problem since we're * done soon). */ - Changes += RunOptFunc (S, &DOptSize2, 1); + C = RunOptFunc (S, &DOptSize1, 1); + if (C) { + Changes += C; + /* Run some optimization passes again, since the size optimizations + * may have opened new oportunities. + */ + Changes += RunOptFunc (S, &DOptUnusedLoads, 1); + Changes += RunOptFunc (S, &DOptUnusedStores, 1); + Changes += RunOptFunc (S, &DOptJumpTarget1, 5); + Changes += RunOptFunc (S, &DOptStore5, 1); + } - /* Run the jump target optimization again, since the size optimization - * above may have opened new oportunities. - */ - Changes += RunOptFunc (S, &DOptJumpTarget, 5); + C = RunOptFunc (S, &DOptSize2, 1); + if (C) { + Changes += C; + /* Run some optimization passes again, since the size optimizations + * may have opened new oportunities. + */ + Changes += RunOptFunc (S, &DOptUnusedLoads, 1); + Changes += RunOptFunc (S, &DOptJumpTarget1, 5); + Changes += RunOptFunc (S, &DOptStore5, 1); + Changes += RunOptFunc (S, &DOptTransfers3, 1); + } /* Adjust branch distances */ Changes += RunOptFunc (S, &DOptBranchDist, 3); @@ -1900,7 +2022,7 @@ void RunOpt (CodeSeg* S) const char* StatFileName; /* If we shouldn't run the optimizer, bail out */ - if (!Optimize) { + if (!S->Optimize) { return; } @@ -1924,6 +2046,7 @@ void RunOpt (CodeSeg* S) RunOptGroup4 (S); RunOptGroup5 (S); RunOptGroup6 (S); + RunOptGroup7 (S); /* Write statistics */ if (StatFileName) {