X-Git-Url: https://git.sur5r.net/?a=blobdiff_plain;f=src%2Fcc65%2Fcodeopt.c;h=e26e747f42c2d8451d847e85015fbed322fac581;hb=d6c50c18267881ae4ad4117edeb81e6c36d33f44;hp=48834fed8c330b09058067f55afd02873bb1764a;hpb=1a39515769d86103a40fe3b5cd7d83e7ed1bcae4;p=cc65 diff --git a/src/cc65/codeopt.c b/src/cc65/codeopt.c index 48834fed8..e26e747f4 100644 --- a/src/cc65/codeopt.c +++ b/src/cc65/codeopt.c @@ -1,4 +1,3 @@ - /*****************************************************************************/ /* */ /* codeopt.c */ @@ -7,9 +6,9 @@ /* */ /* */ /* */ -/* (C) 2001 Ullrich von Bassewitz */ -/* Wacholderweg 14 */ -/* D-70597 Stuttgart */ +/* (C) 2001-2003 Ullrich von Bassewitz */ +/* Römerstraße 52 */ +/* D-70794 Filderstadt */ /* EMail: uz@cc65.org */ /* */ /* */ @@ -34,19 +33,31 @@ +#include #include /* common */ #include "abend.h" #include "chartype.h" +#include "cpu.h" #include "print.h" -#include "xsprintf.h" +#include "xmalloc.h" /* cc65 */ #include "asmlabel.h" #include "codeent.h" #include "codeinfo.h" +#include "coptadd.h" +#include "coptc02.h" +#include "coptcmp.h" #include "coptind.h" +#include "coptneg.h" +#include "coptpush.h" +#include "coptsize.h" +#include "coptstop.h" +#include "coptstore.h" +#include "coptsub.h" +#include "copttest.h" #include "error.h" #include "global.h" #include "codeopt.h" @@ -54,335 +65,319 @@ /*****************************************************************************/ -/* Data */ +/* Data */ /*****************************************************************************/ -/* Defines for the conditions in a compare */ -typedef enum { - CMP_INV = -1, - CMP_EQ, - CMP_NE, - CMP_GT, - CMP_GE, - CMP_LT, - CMP_LE, - CMP_UGT, - CMP_UGE, - CMP_ULT, - CMP_ULE -} cmp_t; - -/* Table with the compare suffixes */ -static const char CmpSuffixTab [][4] = { - "eq", "ne", "gt", "ge", "lt", "le", "ugt", "uge", "ult", "ule" -}; - -/* Table used to invert a condition, indexed by condition */ -static const unsigned char CmpInvertTab [] = { - CMP_NE, CMP_EQ, - CMP_LE, CMP_LT, CMP_GE, CMP_GT, - CMP_ULE, CMP_ULT, CMP_UGE, CMP_UGT -}; - -/* Table to show which compares are signed (use the N flag) */ -static const char CmpSignedTab [] = { - 0, 0, 1, 1, 1, 1, 0, 0, 0, 0 +/* Shift types */ +enum { + SHIFT_NONE, + SHIFT_ASR_1, + SHIFT_ASL_1, + SHIFT_LSR_1, + SHIFT_LSL_1 }; /*****************************************************************************/ -/* Helper functions */ +/* Optimize shifts */ /*****************************************************************************/ -static cmp_t FindCmpCond (const char* Code, unsigned CodeLen) -/* Search for a compare condition by the given code using the given length */ +static unsigned OptShift1 (CodeSeg* S) +/* A call to the shlaxN routine may get replaced by one or more asl insns + * if the value of X is not used later. + */ { - unsigned I; + unsigned Changes = 0; - /* Linear search */ - for (I = 0; I < sizeof (CmpSuffixTab) / sizeof (CmpSuffixTab [0]); ++I) { - if (strncmp (Code, CmpSuffixTab [I], CodeLen) == 0) { - /* Found */ - return I; - } - } + /* Walk over the entries */ + unsigned I = 0; + while (I < CS_GetEntryCount (S)) { - /* Not found */ - return CMP_INV; -} + /* Get next entry */ + CodeEntry* E = CS_GetEntry (S, I); + /* Check for the sequence */ + if (E->OPC == OP65_JSR && + (strncmp (E->Arg, "shlax", 5) == 0 || + strncmp (E->Arg, "aslax", 5) == 0) && + strlen (E->Arg) == 6 && + IsDigit (E->Arg[5]) && + !RegXUsed (S, I+1)) { + /* Insert shift insns */ + unsigned Count = E->Arg[5] - '0'; + while (Count--) { + CodeEntry* X = NewCodeEntry (OP65_ASL, AM65_ACC, "a", 0, E->LI); + CS_InsertEntry (S, X, I+1); + } -static cmp_t FindBoolCmpCond (const char* Name) -/* Map a condition suffix to a code. Return the code or CMP_INV on failure */ -{ - /* Check for the correct subroutine name */ - if (strncmp (Name, "bool", 4) == 0) { - /* Name is ok, search for the code in the table */ - return FindCmpCond (Name+4, strlen(Name)-4); - } else { - /* Not found */ - return CMP_INV; - } -} + /* Delete the call to shlax */ + CS_DelEntry (S, I); + /* Remember, we had changes */ + ++Changes; + } -static cmp_t FindTosCmpCond (const char* Name) -/* Check if this is a call to one of the TOS compare functions (tosgtax). - * Return the condition code or CMP_INV on failure. - */ -{ - unsigned Len = strlen (Name); + /* Next entry */ + ++I; - /* Check for the correct subroutine name */ - if (strncmp (Name, "tos", 3) == 0 && strcmp (Name+Len-2, "ax") == 0) { - /* Name is ok, search for the code in the table */ - return FindCmpCond (Name+3, Len-3-2); - } else { - /* Not found */ - return CMP_INV; } + + /* Return the number of changes made */ + return Changes; } -static void ReplaceCmp (CodeSeg* S, unsigned I, cmp_t Cond) -/* Helper function for the replacement of routines that return a boolean - * followed by a conditional jump. Instead of the boolean value, the condition - * codes are evaluated directly. - * I is the index of the conditional branch, the sequence is already checked - * to be correct. +static unsigned OptShift2 (CodeSeg* S) +/* A call to the shraxN routine may get replaced by one or more lsr insns + * if the value of X is zero. */ { - CodeEntry* N; - CodeLabel* L; + unsigned Changes = 0; + unsigned I; - /* Get the entry */ - CodeEntry* E = CS_GetEntry (S, I); + /* Generate register info */ + CS_GenRegInfo (S); - /* Replace the conditional branch */ - switch (Cond) { + /* Walk over the entries */ + I = 0; + while (I < CS_GetEntryCount (S)) { - case CMP_EQ: - CE_ReplaceOPC (E, OP65_JEQ); - break; + /* Get next entry */ + CodeEntry* E = CS_GetEntry (S, I); - case CMP_NE: - CE_ReplaceOPC (E, OP65_JNE); - break; + /* Check for the sequence */ + if (E->OPC == OP65_JSR && + strncmp (E->Arg, "shrax", 5) == 0 && + strlen (E->Arg) == 6 && + IsDigit (E->Arg[5]) && + E->RI->In.RegX == 0) { - case CMP_GT: - /* Replace by - * beq @L - * jpl Target - * @L: ... - */ - if ((N = CS_GetNextEntry (S, I)) == 0) { - /* No such entry */ - Internal ("Invalid program flow"); - } - L = CS_GenLabel (S, N); - N = NewCodeEntry (OP65_BEQ, AM65_BRA, L->Name, L, E->LI); - CS_InsertEntry (S, N, I); - CE_ReplaceOPC (E, OP65_JPL); - break; - - case CMP_GE: - CE_ReplaceOPC (E, OP65_JPL); - break; - - case CMP_LT: - CE_ReplaceOPC (E, OP65_JMI); - break; - - case CMP_LE: - /* Replace by - * jmi Target - * jeq Target - */ - CE_ReplaceOPC (E, OP65_JMI); - L = E->JumpTo; - N = NewCodeEntry (OP65_JEQ, AM65_BRA, L->Name, L, E->LI); - CS_InsertEntry (S, N, I+1); - break; - - case CMP_UGT: - /* Replace by - * beq @L - * jcs Target - * @L: ... - */ - if ((N = CS_GetNextEntry (S, I)) == 0) { - /* No such entry */ - Internal ("Invalid program flow"); + /* Insert shift insns */ + unsigned Count = E->Arg[5] - '0'; + while (Count--) { + CodeEntry* X = NewCodeEntry (OP65_LSR, AM65_ACC, "a", 0, E->LI); + CS_InsertEntry (S, X, I+1); } - L = CS_GenLabel (S, N); - N = NewCodeEntry (OP65_BEQ, AM65_BRA, L->Name, L, E->LI); - CS_InsertEntry (S, N, I); - CE_ReplaceOPC (E, OP65_JCS); - break; - - case CMP_UGE: - CE_ReplaceOPC (E, OP65_JCS); - break; - - case CMP_ULT: - CE_ReplaceOPC (E, OP65_JCC); - break; - - case CMP_ULE: - /* Replace by - * jcc Target - * jeq Target - */ - CE_ReplaceOPC (E, OP65_JCC); - L = E->JumpTo; - N = NewCodeEntry (OP65_JEQ, AM65_BRA, L->Name, L, E->LI); - CS_InsertEntry (S, N, I+1); - break; - default: - Internal ("Unknown jump condition: %d", Cond); + /* Delete the call to shlax */ + CS_DelEntry (S, I); - } + /* Remember, we had changes */ + ++Changes; -} + } + + /* Next entry */ + ++I; + } + /* Free the register info */ + CS_FreeRegInfo (S); -static int IsCmpToZero (const CodeEntry* E) -/* Check if the given instrcuction is a compare to zero instruction */ -{ - return (E->OPC == OP65_CMP && - E->AM == AM65_IMM && - (E->Flags & CEF_NUMARG) != 0 && - E->Num == 0); + /* Return the number of changes made */ + return Changes; } -static int IsSpLoad (const CodeEntry* E) -/* Return true if this is the load of A from the stack */ +static unsigned GetShiftType (const char* Sub) +/* Helper function for OptShift3 */ { - return E->OPC == OP65_LDA && E->AM == AM65_ZP_INDY && strcmp (E->Arg, "sp") == 0; + if (*Sub == 'a') { + if (strcmp (Sub+1, "slax1") == 0) { + return SHIFT_ASL_1; + } else if (strcmp (Sub+1, "srax1") == 0) { + return SHIFT_ASR_1; + } + } else if (*Sub == 's') { + if (strcmp (Sub+1, "hlax1") == 0) { + return SHIFT_LSL_1; + } else if (strcmp (Sub+1, "hrax1") == 0) { + return SHIFT_LSR_1; + } + } + return SHIFT_NONE; } -static int IsLocalLoad16 (CodeSeg* S, unsigned Index, - CodeEntry** L, unsigned Count) -/* Check if a 16 bit load of a local variable follows: +static unsigned OptShift3 (CodeSeg* S) +/* Search for the sequence * - * ldy #$xx - * lda (sp),y - * tax - * dey - * lda (sp),y + * lda xxx + * ldx yyy + * jsr aslax1/asrax1/shlax1/shrax1 + * sta aaa + * stx bbb + * + * and replace it by + * + * lda xxx + * asl a + * sta aaa + * lda yyy + * rol a + * sta bbb * - * If so, read Count entries following the first ldy into L and return true - * if this is possible. Otherwise return false. + * or similar, provided that a/x is not used later */ { - /* Be sure we read enough entries for the check */ - CHECK (Count >= 5); - - /* Read the first entry */ - L[0] = CS_GetEntry (S, Index); - - /* Check for the sequence */ - return (L[0]->OPC == OP65_LDY && - L[0]->AM == AM65_IMM && - (L[0]->Flags & CEF_NUMARG) != 0 && - CS_GetEntries (S, L+1, Index+1, Count-1) && - IsSpLoad (L[1]) && - !CE_HasLabel (L[1]) && - L[2]->OPC == OP65_TAX && - !CE_HasLabel (L[2]) && - L[3]->OPC == OP65_DEY && - !CE_HasLabel (L[3]) && - IsSpLoad (L[4]) && - !CE_HasLabel (L[4])); -} + unsigned Changes = 0; + /* Walk over the entries */ + unsigned I = 0; + while (I < CS_GetEntryCount (S)) { + unsigned ShiftType; + CodeEntry* L[5]; -static int IsImmCmp16 (CodeSeg* S, CodeEntry** L) -/* Check if the instructions at L are an immidiate compare of a/x: - * - * - */ -{ - return (L[0]->OPC == OP65_CPX && - L[0]->AM == AM65_IMM && - (L[0]->Flags & CEF_NUMARG) != 0 && - !CE_HasLabel (L[0]) && - (L[1]->OPC == OP65_JNE || L[1]->OPC == OP65_BNE) && - L[1]->JumpTo != 0 && - !CE_HasLabel (L[1]) && - L[2]->OPC == OP65_CMP && - L[2]->AM == AM65_IMM && - (L[2]->Flags & CEF_NUMARG) != 0 && - (L[3]->Info & OF_ZBRA) != 0 && - L[3]->JumpTo != 0 && - (L[1]->JumpTo->Owner == L[3] || L[1]->JumpTo == L[3]->JumpTo)); + /* Get next entry */ + L[0] = CS_GetEntry (S, I); + + /* Check for the sequence */ + if (L[0]->OPC == OP65_LDA && + (L[0]->AM == AM65_ABS || L[0]->AM == AM65_ZP) && + CS_GetEntries (S, L+1, I+1, 4) && + !CS_RangeHasLabel (S, I+1, 4) && + L[1]->OPC == OP65_LDX && + (L[1]->AM == AM65_ABS || L[1]->AM == AM65_ZP) && + L[2]->OPC == OP65_JSR && + (ShiftType = GetShiftType (L[2]->Arg)) != SHIFT_NONE&& + L[3]->OPC == OP65_STA && + (L[3]->AM == AM65_ABS || L[3]->AM == AM65_ZP) && + L[4]->OPC == OP65_STX && + (L[4]->AM == AM65_ABS || L[4]->AM == AM65_ZP) && + !RegAXUsed (S, I+5)) { + + CodeEntry* X; + + /* Handle the four shift types differently */ + switch (ShiftType) { + + case SHIFT_ASR_1: + X = NewCodeEntry (OP65_LDA, L[1]->AM, L[1]->Arg, 0, L[1]->LI); + CS_InsertEntry (S, X, I+5); + X = NewCodeEntry (OP65_CMP, AM65_IMM, "$80", 0, L[2]->LI); + CS_InsertEntry (S, X, I+6); + X = NewCodeEntry (OP65_ROR, AM65_ACC, "a", 0, L[2]->LI); + CS_InsertEntry (S, X, I+7); + X = NewCodeEntry (OP65_STA, L[4]->AM, L[4]->Arg, 0, L[4]->LI); + CS_InsertEntry (S, X, I+8); + X = NewCodeEntry (OP65_LDA, L[0]->AM, L[0]->Arg, 0, L[0]->LI); + CS_InsertEntry (S, X, I+9); + X = NewCodeEntry (OP65_ROR, AM65_ACC, "a", 0, L[2]->LI); + CS_InsertEntry (S, X, I+10); + X = NewCodeEntry (OP65_STA, L[3]->AM, L[3]->Arg, 0, L[3]->LI); + CS_InsertEntry (S, X, I+11); + CS_DelEntries (S, I, 5); + break; + + case SHIFT_LSR_1: + X = NewCodeEntry (OP65_LDA, L[1]->AM, L[1]->Arg, 0, L[1]->LI); + CS_InsertEntry (S, X, I+5); + X = NewCodeEntry (OP65_LSR, AM65_ACC, "a", 0, L[2]->LI); + CS_InsertEntry (S, X, I+6); + X = NewCodeEntry (OP65_STA, L[4]->AM, L[4]->Arg, 0, L[4]->LI); + CS_InsertEntry (S, X, I+7); + X = NewCodeEntry (OP65_LDA, L[0]->AM, L[0]->Arg, 0, L[0]->LI); + CS_InsertEntry (S, X, I+8); + X = NewCodeEntry (OP65_ROR, AM65_ACC, "a", 0, L[2]->LI); + CS_InsertEntry (S, X, I+9); + X = NewCodeEntry (OP65_STA, L[3]->AM, L[3]->Arg, 0, L[3]->LI); + CS_InsertEntry (S, X, I+10); + CS_DelEntries (S, I, 5); + break; + + case SHIFT_LSL_1: + case SHIFT_ASL_1: + /* These two are identical */ + X = NewCodeEntry (OP65_ASL, AM65_ACC, "a", 0, L[2]->LI); + CS_InsertEntry (S, X, I+1); + X = NewCodeEntry (OP65_STA, L[3]->AM, L[3]->Arg, 0, L[3]->LI); + CS_InsertEntry (S, X, I+2); + X = NewCodeEntry (OP65_LDA, L[1]->AM, L[1]->Arg, 0, L[1]->LI); + CS_InsertEntry (S, X, I+3); + X = NewCodeEntry (OP65_ROL, AM65_ACC, "a", 0, L[2]->LI); + CS_InsertEntry (S, X, I+4); + X = NewCodeEntry (OP65_STA, L[4]->AM, L[4]->Arg, 0, L[4]->LI); + CS_InsertEntry (S, X, I+5); + CS_DelEntries (S, I+6, 4); + break; + + } + + /* Remember, we had changes */ + ++Changes; + + } + + /* Next entry */ + ++I; + + } + + /* Return the number of changes made */ + return Changes; } /*****************************************************************************/ -/* Remove calls to the bool transformer subroutines */ +/* Optimize loads */ /*****************************************************************************/ -static unsigned OptBoolTransforms (CodeSeg* S) -/* Try to remove the call to boolean transformer routines where the call is - * not really needed. +static unsigned OptLoad1 (CodeSeg* S) +/* Search for a call to ldaxysp where X is not used later and replace it by + * a load of just the A register. */ { + unsigned I; unsigned Changes = 0; + /* Generate register info */ + CS_GenRegInfo (S); + /* Walk over the entries */ - unsigned I = 0; + I = 0; while (I < CS_GetEntryCount (S)) { - CodeEntry* N; - cmp_t Cond; + CodeEntry* E; /* Get next entry */ - CodeEntry* E = CS_GetEntry (S, I); + E = CS_GetEntry (S, I); - /* Check for a boolean transformer */ - if (E->OPC == OP65_JSR && - (Cond = FindBoolCmpCond (E->Arg)) != CMP_INV && - (N = CS_GetNextEntry (S, I)) != 0 && - (N->Info & OF_ZBRA) != 0) { - - /* Make the boolean transformer unnecessary by changing the - * the conditional jump to evaluate the condition flags that - * are set after the compare directly. Note: jeq jumps if - * the condition is not met, jne jumps if the condition is met. - * Invert the code if we jump on condition not met. - */ - if (GetBranchCond (N->OPC) == BC_EQ) { - /* Jumps if condition false, invert condition */ - Cond = CmpInvertTab [Cond]; - } + /* Check for the sequence */ + if (CE_IsCallTo (E, "ldaxysp") && + RegValIsKnown (E->RI->In.RegY) && + !RegXUsed (S, I+1)) { + + CodeEntry* X; - /* Check if we can replace the code by something better */ - ReplaceCmp (S, I+1, Cond); + /* Reload the Y register */ + const char* Arg = MakeHexArg (E->RI->In.RegY - 1); + X = NewCodeEntry (OP65_LDY, AM65_IMM, Arg, 0, E->LI); + CS_InsertEntry (S, X, I+1); - /* Remove the call to the bool transformer */ + /* Load from stack */ + X = NewCodeEntry (OP65_LDA, AM65_ZP_INDY, "sp", 0, E->LI); + CS_InsertEntry (S, X, I+2); + + /* Now remove the call to the subroutine */ CS_DelEntry (S, I); /* Remember, we had changes */ - ++Changes; + ++Changes; } @@ -391,6 +386,9 @@ static unsigned OptBoolTransforms (CodeSeg* S) } + /* Free the register info */ + CS_FreeRegInfo (S); + /* Return the number of changes made */ return Changes; } @@ -398,20 +396,83 @@ static unsigned OptBoolTransforms (CodeSeg* S) /*****************************************************************************/ -/* Optimize subtractions */ +/* Optimize stores through pointers */ /*****************************************************************************/ -static unsigned OptSub1 (CodeSeg* S) -/* Search for the sequence +static unsigned OptPtrStore1Sub (CodeSeg* S, unsigned I, CodeEntry** const L) +/* Check if this is one of the allowed suboperation for OptPtrStore1 */ +{ + /* Check for a label attached to the entry */ + if (CE_HasLabel (L[0])) { + return 0; + } + + /* Check for single insn sub ops */ + if (L[0]->OPC == OP65_AND || + L[0]->OPC == OP65_EOR || + L[0]->OPC == OP65_ORA || + (L[0]->OPC == OP65_JSR && strncmp (L[0]->Arg, "shlax", 5) == 0) || + (L[0]->OPC == OP65_JSR && strncmp (L[0]->Arg, "shrax", 5) == 0)) { + + /* One insn */ + return 1; + + } else if (L[0]->OPC == OP65_CLC && + (L[1] = CS_GetNextEntry (S, I)) != 0 && + L[1]->OPC == OP65_ADC && + !CE_HasLabel (L[1])) { + return 2; + } else if (L[0]->OPC == OP65_SEC && + (L[1] = CS_GetNextEntry (S, I)) != 0 && + L[1]->OPC == OP65_SBC && + !CE_HasLabel (L[1])) { + return 2; + } + + + + /* Not found */ + return 0; +} + + + +static unsigned OptPtrStore1 (CodeSeg* S) +/* Search for the sequence: + * + * jsr pushax + * ldy xxx + * jsr ldauidx + * subop + * ldy yyy + * jsr staspidx * - * sbc ... - * bcs L - * dex - * L: + * and replace it by: + * + * sta ptr1 + * stx ptr1+1 + * ldy xxx + * ldx #$00 + * lda (ptr1),y + * subop + * ldy yyy + * sta (ptr1),y * - * and remove the handling of the high byte if X is not used later. + * In case a/x is loaded from the register bank before the pushax, we can even + * use the register bank instead of ptr1. + */ +/* + * jsr pushax + * ldy xxx + * jsr ldauidx + * ldx #$00 + * lda (zp),y + * subop + * ldy yyy + * sta (zp),y + * jsr staspidx */ { unsigned Changes = 0; @@ -420,32 +481,97 @@ static unsigned OptSub1 (CodeSeg* S) unsigned I = 0; while (I < CS_GetEntryCount (S)) { - CodeEntry* L[3]; + unsigned K; + CodeEntry* L[10]; /* Get next entry */ - CodeEntry* E = CS_GetEntry (S, I); + L[0] = CS_GetEntry (S, I); /* Check for the sequence */ - if (E->OPC == OP65_SBC && - CS_GetEntries (S, L, I+1, 3) && - (L[0]->OPC == OP65_BCS || L[0]->OPC == OP65_JCS) && - L[0]->JumpTo != 0 && - !CE_HasLabel (L[0]) && - L[1]->OPC == OP65_DEX && - !CE_HasLabel (L[1]) && - L[0]->JumpTo->Owner == L[2] && - !RegXUsed (S, I+3)) { - - /* Remove the bcs/dex */ - CS_DelEntries (S, I+1, 2); + if (CE_IsCallTo (L[0], "pushax") && + CS_GetEntries (S, L+1, I+1, 3) && + L[1]->OPC == OP65_LDY && + CE_KnownImm (L[1]) && + !CE_HasLabel (L[1]) && + CE_IsCallTo (L[2], "ldauidx") && + !CE_HasLabel (L[2]) && + (K = OptPtrStore1Sub (S, I+3, L+3)) > 0 && + CS_GetEntries (S, L+3+K, I+3+K, 2) && + L[3+K]->OPC == OP65_LDY && + CE_KnownImm (L[3+K]) && + !CE_HasLabel (L[3+K]) && + CE_IsCallTo (L[4+K], "staspidx") && + !CE_HasLabel (L[4+K])) { - /* Remember, we had changes */ - ++Changes; - } + const char* RegBank = 0; + const char* ZPLoc = "ptr1"; + CodeEntry* X; - /* Next entry */ - ++I; + + /* Get the preceeding two instructions and check them. We check + * for: + * lda regbank+n + * ldx regbank+n+1 + */ + if (I > 1) { + CodeEntry* P[2]; + P[0] = CS_GetEntry (S, I-2); + P[1] = CS_GetEntry (S, I-1); + if (P[0]->OPC == OP65_LDA && + P[0]->AM == AM65_ZP && + P[1]->OPC == OP65_LDX && + P[1]->AM == AM65_ZP && + !CE_HasLabel (P[1]) && + strncmp (P[0]->Arg, "regbank+", 8) == 0) { + + unsigned Len = strlen (P[0]->Arg); + + if (strncmp (P[0]->Arg, P[1]->Arg, Len) == 0 && + P[1]->Arg[Len+0] == '+' && + P[1]->Arg[Len+1] == '1' && + P[1]->Arg[Len+2] == '\0') { + + /* Ok, found. Use the name of the register bank */ + RegBank = ZPLoc = P[0]->Arg; + } + } + } + + /* Insert the load via the zp pointer */ + X = NewCodeEntry (OP65_LDX, AM65_IMM, "$00", 0, L[3]->LI); + CS_InsertEntry (S, X, I+3); + X = NewCodeEntry (OP65_LDA, AM65_ZP_INDY, ZPLoc, 0, L[2]->LI); + CS_InsertEntry (S, X, I+4); + + /* Insert the store through the zp pointer */ + X = NewCodeEntry (OP65_STA, AM65_ZP_INDY, ZPLoc, 0, L[3]->LI); + CS_InsertEntry (S, X, I+6+K); + + /* Delete the old code */ + CS_DelEntry (S, I+7+K); /* jsr spaspidx */ + CS_DelEntry (S, I+2); /* jsr ldauidx */ + + /* Create and insert the stores into the zp pointer if needed */ + if (RegBank == 0) { + X = NewCodeEntry (OP65_STA, AM65_ZP, "ptr1", 0, L[0]->LI); + CS_InsertEntry (S, X, I+1); + X = NewCodeEntry (OP65_STX, AM65_ZP, "ptr1+1", 0, L[0]->LI); + CS_InsertEntry (S, X, I+2); + } + + /* Delete more old code. Do it here to keep a label attached to + * entry I in place. + */ + CS_DelEntry (S, I); /* jsr pushax */ + + /* Remember, we had changes */ + ++Changes; + + } + + /* Next entry */ + ++I; } @@ -455,22 +581,27 @@ static unsigned OptSub1 (CodeSeg* S) -static unsigned OptSub2 (CodeSeg* S) -/* Search for the sequence +static unsigned OptPtrStore2 (CodeSeg* S) +/* Search for the sequence: * - * lda xx - * sec - * sta tmp1 - * lda yy - * sbc tmp1 - * sta yy + * lda #<(label+0) + * ldx #>(label+0) + * clc + * adc xxx + * bcc L + * inx + * L: jsr pushax + * ldx #$00 + * lda yyy + * ldy #$00 + * jsr staspidx * - * and replace it by + * and replace it by: * - * sec - * lda yy - * sbc xx - * sta yy + * ldy xxx + * ldx #$00 + * lda yyy + * sta label,y */ { unsigned Changes = 0; @@ -479,49 +610,68 @@ static unsigned OptSub2 (CodeSeg* S) unsigned I = 0; while (I < CS_GetEntryCount (S)) { - CodeEntry* L[5]; + CodeEntry* L[11]; + unsigned Len; /* Get next entry */ - CodeEntry* E = CS_GetEntry (S, I); + L[0] = CS_GetEntry (S, I); /* Check for the sequence */ - if (E->OPC == OP65_LDA && - CS_GetEntries (S, L, I+1, 5) && - L[0]->OPC == OP65_SEC && - !CE_HasLabel (L[0]) && - L[1]->OPC == OP65_STA && - strcmp (L[1]->Arg, "tmp1") == 0 && - !CE_HasLabel (L[1]) && - L[2]->OPC == OP65_LDA && - !CE_HasLabel (L[2]) && - L[3]->OPC == OP65_SBC && - strcmp (L[3]->Arg, "tmp1") == 0 && - !CE_HasLabel (L[3]) && - L[4]->OPC == OP65_STA && - strcmp (L[4]->Arg, L[2]->Arg) == 0 && - !CE_HasLabel (L[4])) { - - /* Remove the store to tmp1 */ - CS_DelEntry (S, I+2); - - /* Remove the subtraction */ - CS_DelEntry (S, I+3); + if (L[0]->OPC == OP65_LDA && + L[0]->AM == AM65_IMM && + CS_GetEntries (S, L+1, I+1, 10) && + L[1]->OPC == OP65_LDX && + L[1]->AM == AM65_IMM && + L[2]->OPC == OP65_CLC && + L[3]->OPC == OP65_ADC && + (L[3]->AM == AM65_ABS || L[3]->AM == AM65_ZP) && + (L[4]->OPC == OP65_BCC || L[4]->OPC == OP65_JCC) && + L[4]->JumpTo != 0 && + L[4]->JumpTo->Owner == L[6] && + L[5]->OPC == OP65_INX && + CE_IsCallTo (L[6], "pushax") && + L[7]->OPC == OP65_LDX && + L[8]->OPC == OP65_LDA && + L[9]->OPC == OP65_LDY && + CE_KnownImm (L[9]) && + L[9]->Num == 0 && + CE_IsCallTo (L[10], "staspidx") && + !CS_RangeHasLabel (S, I+1, 5) && + !CS_RangeHasLabel (S, I+7, 4) && + /* Check the label last because this is quite costly */ + (Len = strlen (L[0]->Arg)) > 3 && + L[0]->Arg[0] == '<' && + L[0]->Arg[1] == '(' && + strlen (L[1]->Arg) == Len && + L[1]->Arg[0] == '>' && + memcmp (L[0]->Arg+1, L[1]->Arg+1, Len-1) == 0) { - /* Move the lda to the position of the subtraction and change the - * op to SBC. - */ - CS_MoveEntry (S, I, I+3); - CE_ReplaceOPC (E, OP65_SBC); + CodeEntry* X; + char* Label; - /* If the sequence head had a label, move this label back to the - * head. + /* We will create all the new stuff behind the current one so + * we keep the line references. */ - if (CE_HasLabel (E)) { - CS_MoveLabels (S, E, L[0]); - } + X = NewCodeEntry (OP65_LDY, L[3]->AM, L[3]->Arg, 0, L[0]->LI); + CS_InsertEntry (S, X, I+11); + + X = NewCodeEntry (OP65_LDX, L[7]->AM, L[7]->Arg, 0, L[7]->LI); + CS_InsertEntry (S, X, I+12); + + X = NewCodeEntry (OP65_LDA, L[8]->AM, L[8]->Arg, 0, L[8]->LI); + CS_InsertEntry (S, X, I+13); + + Label = memcpy (xmalloc (Len-2), L[0]->Arg+2, Len-3); + Label[Len-3] = '\0'; + X = NewCodeEntry (OP65_STA, AM65_ABSY, Label, 0, L[10]->LI); + CS_InsertEntry (S, X, I+14); + xfree (Label); + + /* Remove the old code */ + CS_DelEntries (S, I, 11); /* Remember, we had changes */ - ++Changes; + ++Changes; } @@ -537,1001 +687,35 @@ static unsigned OptSub2 (CodeSeg* S) /*****************************************************************************/ -/* Optimize additions */ +/* Optimize loads through pointers */ /*****************************************************************************/ -static unsigned OptAdd1 (CodeSeg* S) -/* Search for the sequence - * - * jsr pushax - * ldy xxx - * ldx #$00 - * lda (sp),y - * jsr tosaddax - * - * and replace it by: - * - * ldy xxx-2 - * clc - * adc (sp),y - * bcc L - * inx - * L: - */ -{ - unsigned Changes = 0; - - /* Walk over the entries */ - unsigned I = 0; - while (I < CS_GetEntryCount (S)) { - - CodeEntry* L[5]; - - /* Get next entry */ - CodeEntry* E = CS_GetEntry (S, I); - - /* Check for the sequence */ - if (E->OPC == OP65_JSR && - strcmp (E->Arg, "pushax") == 0 && - CS_GetEntries (S, L, I+1, 5) && - L[0]->OPC == OP65_LDY && - CE_KnownImm (L[0]) && - !CE_HasLabel (L[0]) && - L[1]->OPC == OP65_LDX && - CE_KnownImm (L[1]) && - L[1]->Num == 0 && - !CE_HasLabel (L[1]) && - L[2]->OPC == OP65_LDA && - !CE_HasLabel (L[2]) && - L[3]->OPC == OP65_JSR && - strcmp (L[3]->Arg, "tosaddax") == 0 && - !CE_HasLabel (L[3])) { - - CodeEntry* X; - CodeLabel* Label; - char Buf [16]; - - /* Remove the call to pushax */ - CS_DelEntry (S, I); - - /* Correct the stack offset (needed since pushax was removed) */ - L[0]->Num -= 2; - xsprintf (Buf, sizeof (Buf), "$%02lX", L[0]->Num); - CE_SetArg (L[0], Buf); - - /* Add the clc . */ - X = NewCodeEntry (OP65_CLC, AM65_IMP, 0, 0, L[3]->LI); - CS_InsertEntry (S, X, I+1); - - /* Remove the load */ - CS_DelEntry (S, I+3); /* lda */ - CS_DelEntry (S, I+2); /* ldx */ - - /* Add the adc */ - X = NewCodeEntry (OP65_ADC, AM65_ZP_INDY, "sp", 0, L[3]->LI); - CS_InsertEntry (S, X, I+2); - - /* Generate the branch label and the branch */ - Label = CS_GenLabel (S, L[4]); - X = NewCodeEntry (OP65_BCC, AM65_BRA, Label->Name, Label, L[3]->LI); - CS_InsertEntry (S, X, I+3); - - /* Generate the increment of the high byte */ - X = NewCodeEntry (OP65_INX, AM65_IMP, 0, 0, L[3]->LI); - CS_InsertEntry (S, X, I+4); - - /* Delete the call to tosaddax */ - CS_DelEntry (S, I+5); - - /* Remember, we had changes */ - ++Changes; - - } - - /* Next entry */ - ++I; - - } - - /* Return the number of changes made */ - return Changes; -} - - - -static unsigned OptAdd2 (CodeSeg* S) -/* Search for the sequence - * - * ldy #xx - * lda (sp),y - * tax - * dey - * lda (sp),y - * ldy #$yy - * jsr addeqysp - * - * and replace it by: - * - * ldy #xx-1 - * lda (sp),y - * ldy #yy - * clc - * adc (sp),y - * sta (sp),y - * ldy #xx - * lda (sp),y - * ldy #yy+1 - * adc (sp),y - * sta (sp),y - * - * provided that a/x is not used later. - */ -{ - unsigned Changes = 0; - - /* Walk over the entries */ - unsigned I = 0; - while (I < CS_GetEntryCount (S)) { - - CodeEntry* L[7]; - - /* Get next entry */ - L[0] = CS_GetEntry (S, I); - - /* Check for the sequence */ - if (L[0]->OPC == OP65_LDY && - CE_KnownImm (L[0]) && - CS_GetEntries (S, L+1, I+1, 6) && - L[1]->OPC == OP65_LDA && - L[1]->AM == AM65_ZP_INDY && - !CE_HasLabel (L[1]) && - L[2]->OPC == OP65_TAX && - !CE_HasLabel (L[2]) && - L[3]->OPC == OP65_DEY && - !CE_HasLabel (L[3]) && - L[4]->OPC == OP65_LDA && - L[4]->AM == AM65_ZP_INDY && - !CE_HasLabel (L[4]) && - L[5]->OPC == OP65_LDY && - CE_KnownImm (L[5]) && - !CE_HasLabel (L[5]) && - L[6]->OPC == OP65_JSR && - strcmp (L[6]->Arg, "addeqysp") == 0 && - !CE_HasLabel (L[6]) && - (GetRegInfo (S, I+7) & REG_AX) == 0) { - - char Buf [20]; - CodeEntry* X; - int Offs; - - - /* Create a replacement for the first LDY */ - Offs = (int) (L[0]->Num - 1); - xsprintf (Buf, sizeof (Buf), "$%02X", Offs); - X = NewCodeEntry (OP65_LDY, AM65_IMM, Buf, 0, L[0]->LI); - CS_InsertEntry (S, X, I+1); - CS_DelEntry (S, I); - - /* Load Y with the low offset of the target variable */ - X = NewCodeEntry (OP65_LDY, AM65_IMM, L[5]->Arg, 0, L[1]->LI); - CS_InsertEntry (S, X, I+2); - - /* Add the CLC */ - X = NewCodeEntry (OP65_CLC, AM65_IMP, 0, 0, L[1]->LI); - CS_InsertEntry (S, X, I+3); - - /* Remove the TAX/DEY sequence */ - CS_DelEntry (S, I+5); /* dey */ - CS_DelEntry (S, I+4); /* tax */ - - /* Addition of the low byte */ - X = NewCodeEntry (OP65_ADC, AM65_ZP_INDY, "sp", 0, L[4]->LI); - CS_InsertEntry (S, X, I+4); - X = NewCodeEntry (OP65_STA, AM65_ZP_INDY, "sp", 0, L[4]->LI); - CS_InsertEntry (S, X, I+5); - - /* LDY */ - xsprintf (Buf, sizeof (Buf), "$%02X", (Offs+1)); - X = NewCodeEntry (OP65_LDY, AM65_IMM, Buf, 0, L[4]->LI); - CS_InsertEntry (S, X, I+6); - - /* Addition of the high byte */ - xsprintf (Buf, sizeof (Buf), "$%02X", (int)(L[5]->Num+1)); - X = NewCodeEntry (OP65_LDY, AM65_IMM, Buf, 0, L[5]->LI); - CS_InsertEntry (S, X, I+8); - X = NewCodeEntry (OP65_ADC, AM65_ZP_INDY, "sp", 0, L[6]->LI); - CS_InsertEntry (S, X, I+9); - X = NewCodeEntry (OP65_STA, AM65_ZP_INDY, "sp", 0, L[6]->LI); - CS_InsertEntry (S, X, I+10); - - /* Delete the remaining stuff */ - CS_DelEntry (S, I+12); - CS_DelEntry (S, I+11); - - /* Remember, we had changes */ - ++Changes; - - } - - /* Next entry */ - ++I; - - } - - /* Return the number of changes made */ - return Changes; -} - - - -static unsigned OptAdd3 (CodeSeg* S) -/* Search for the sequence - * - * adc ... - * bcc L - * inx - * L: - * - * and remove the handling of the high byte if X is not used later. - */ -{ - unsigned Changes = 0; - - /* Walk over the entries */ - unsigned I = 0; - while (I < CS_GetEntryCount (S)) { - - CodeEntry* L[3]; - - /* Get next entry */ - CodeEntry* E = CS_GetEntry (S, I); - - /* Check for the sequence */ - if (E->OPC == OP65_ADC && - CS_GetEntries (S, L, I+1, 3) && - (L[0]->OPC == OP65_BCC || L[0]->OPC == OP65_JCC) && - L[0]->JumpTo != 0 && - !CE_HasLabel (L[0]) && - L[1]->OPC == OP65_INX && - !CE_HasLabel (L[1]) && - L[0]->JumpTo->Owner == L[2] && - !RegXUsed (S, I+3)) { - - /* Remove the bcs/dex */ - CS_DelEntries (S, I+1, 2); - - /* Remember, we had changes */ - ++Changes; - - } - - /* Next entry */ - ++I; - - } - - /* Return the number of changes made */ - return Changes; -} - - - -/*****************************************************************************/ -/* Optimize shifts */ -/*****************************************************************************/ - - - -static unsigned OptShift1 (CodeSeg* S) -/* A call to the shlaxN routine may get replaced by one or more asl insns - * if the value of X is not used later. - */ -{ - unsigned Changes = 0; - - /* Walk over the entries */ - unsigned I = 0; - while (I < CS_GetEntryCount (S)) { - - /* Get next entry */ - CodeEntry* E = CS_GetEntry (S, I); - - /* Check for the sequence */ - if (E->OPC == OP65_JSR && - (strncmp (E->Arg, "shlax", 5) == 0 || - strncmp (E->Arg, "aslax", 5) == 0) && - strlen (E->Arg) == 6 && - IsDigit (E->Arg[5]) && - !RegXUsed (S, I+1)) { - - /* Insert shift insns */ - unsigned Count = E->Arg[5] - '0'; - while (Count--) { - CodeEntry* X = NewCodeEntry (OP65_ASL, AM65_ACC, "a", 0, E->LI); - CS_InsertEntry (S, X, I+1); - } - - /* Delete the call to shlax */ - CS_DelEntry (S, I); - - /* Remember, we had changes */ - ++Changes; - - } - - /* Next entry */ - ++I; - - } - - /* Return the number of changes made */ - return Changes; -} - - - -/*****************************************************************************/ -/* Optimizations for compares */ -/*****************************************************************************/ - - - -static unsigned OptCmp1 (CodeSeg* S) -/* Search for the sequence - * - * stx xx - * stx tmp1 - * ora tmp1 - * - * and replace it by - * - * stx xx - * ora xx - */ -{ - unsigned Changes = 0; - - /* Walk over the entries */ - unsigned I = 0; - while (I < CS_GetEntryCount (S)) { - - CodeEntry* L[2]; - - /* Get next entry */ - CodeEntry* E = CS_GetEntry (S, I); - - /* Check for the sequence */ - if (E->OPC == OP65_STX && - CS_GetEntries (S, L, I+1, 2) && - L[0]->OPC == OP65_STX && - strcmp (L[0]->Arg, "tmp1") == 0 && - !CE_HasLabel (L[0]) && - L[1]->OPC == OP65_ORA && - strcmp (L[1]->Arg, "tmp1") == 0 && - !CE_HasLabel (L[1])) { - - /* Remove the remaining instructions */ - CS_DelEntries (S, I+1, 2); - - /* Insert the ora instead */ - CS_InsertEntry (S, NewCodeEntry (OP65_ORA, E->AM, E->Arg, 0, E->LI), I+1); - - /* Remember, we had changes */ - ++Changes; - - } - - /* Next entry */ - ++I; - - } - - /* Return the number of changes made */ - return Changes; -} - - - -static unsigned OptCmp2 (CodeSeg* S) -/* Search for - * - * lda/and/ora/eor ... - * cmp #$00 - * jeq/jne - * - * and remove the cmp. - */ -{ - unsigned Changes = 0; - - /* Walk over the entries */ - unsigned I = 0; - while (I < CS_GetEntryCount (S)) { - - CodeEntry* L[2]; - - /* Get next entry */ - CodeEntry* E = CS_GetEntry (S, I); - - /* Check for the sequence */ - if ((E->OPC == OP65_ADC || - E->OPC == OP65_AND || - E->OPC == OP65_DEA || - E->OPC == OP65_EOR || - E->OPC == OP65_INA || - E->OPC == OP65_LDA || - E->OPC == OP65_ORA || - E->OPC == OP65_PLA || - E->OPC == OP65_SBC || - E->OPC == OP65_TXA || - E->OPC == OP65_TYA) && - CS_GetEntries (S, L, I+1, 2) && - IsCmpToZero (L[0]) && - !CE_HasLabel (L[0]) && - (L[1]->Info & OF_FBRA) != 0 && - !CE_HasLabel (L[1])) { - - /* Remove the compare */ - CS_DelEntry (S, I+1); - - /* Remember, we had changes */ - ++Changes; - - } - - /* Next entry */ - ++I; - - } - - /* Return the number of changes made */ - return Changes; -} - - - -static unsigned OptCmp3 (CodeSeg* S) -/* Search for - * - * lda x - * ldx y - * cpx #a - * bne L1 - * cmp #b - * jne/jeq L2 - * - * If a is zero, we may remove the compare. If a and b are both zero, we may - * replace it by the sequence - * - * lda x - * ora x+1 - * jne/jeq ... - * - * L1 may be either the label at the branch instruction, or the target label - * of this instruction. - */ -{ - unsigned Changes = 0; - - /* Walk over the entries */ - unsigned I = 0; - while (I < CS_GetEntryCount (S)) { - - CodeEntry* L[5]; - - /* Get next entry */ - CodeEntry* E = CS_GetEntry (S, I); - - /* Check for the sequence */ - if (E->OPC == OP65_LDA && - CS_GetEntries (S, L, I+1, 5) && - L[0]->OPC == OP65_LDX && - !CE_HasLabel (L[0]) && - IsImmCmp16 (S, L+1)) { - - if (L[1]->Num == 0 && L[3]->Num == 0) { - /* The value is zero, we may use the simple code version. */ - CE_ReplaceOPC (L[0], OP65_ORA); - CS_DelEntries (S, I+2, 3); - } else { - /* Move the lda instruction after the first branch. This will - * improve speed, since the load is delayed after the first - * test. - */ - CS_MoveEntry (S, I, I+4); - - /* We will replace the ldx/cpx by lda/cmp */ - CE_ReplaceOPC (L[0], OP65_LDA); - CE_ReplaceOPC (L[1], OP65_CMP); - - /* Beware: If the first LDA instruction had a label, we have - * to move this label to the top of the sequence again. - */ - if (CE_HasLabel (E)) { - CS_MoveLabels (S, E, L[0]); - } - - } - - ++Changes; - } - - /* Next entry */ - ++I; - - } - - /* Return the number of changes made */ - return Changes; -} - - - -static unsigned OptCmp4 (CodeSeg* S) -/* Optimize compares of local variables: - * - * ldy #o - * lda (sp),y - * tax - * dey - * lda (sp),y - * cpx #a - * bne L1 - * cmp #b - * jne/jeq L2 - */ -{ - unsigned Changes = 0; - - /* Walk over the entries */ - unsigned I = 0; - while (I < CS_GetEntryCount (S)) { - - CodeEntry* L[9]; - - /* Check for the sequence */ - if (IsLocalLoad16 (S, I, L, 9) && IsImmCmp16 (S, L+5)) { - - if (L[5]->Num == 0 && L[7]->Num == 0) { - - /* The value is zero, we may use the simple code version: - * ldy #o - * lda (sp),y - * dey - * ora (sp),y - * jne/jeq ... - */ - CE_ReplaceOPC (L[4], OP65_ORA); - CS_DelEntries (S, I+5, 3); /* cpx/bne/cmp */ - CS_DelEntry (S, I+2); /* tax */ - - } else { - - /* Change the code to just use the A register. Move the load - * of the low byte after the first branch if possible: - * - * ldy #o - * lda (sp),y - * cmp #a - * bne L1 - * dey - * lda (sp),y - * cmp #b - * jne/jeq ... - */ - CS_DelEntry (S, I+2); /* tax */ - CE_ReplaceOPC (L[5], OP65_CMP); /* cpx -> cmp */ - CS_MoveEntry (S, I+4, I+2); /* cmp */ - CS_MoveEntry (S, I+5, I+3); /* bne */ - - } - - ++Changes; - } - - /* Next entry */ - ++I; - - } - - /* Return the number of changes made */ - return Changes; -} - - - -static unsigned OptCmp5 (CodeSeg* S) -/* Search for calls to compare subroutines followed by a conditional branch - * and replace them by cheaper versions, since the branch means that the - * boolean value returned by these routines is not needed (we may also check - * that explicitly, but for the current code generator it is always true). - */ -{ - unsigned Changes = 0; - - /* Walk over the entries */ - unsigned I = 0; - while (I < CS_GetEntryCount (S)) { - - CodeEntry* N; - cmp_t Cond; - - /* Get next entry */ - CodeEntry* E = CS_GetEntry (S, I); - - /* Check for the sequence */ - if (E->OPC == OP65_JSR && - (Cond = FindTosCmpCond (E->Arg)) != CMP_INV && - (N = CS_GetNextEntry (S, I)) != 0 && - (N->Info & OF_ZBRA) != 0 && - !CE_HasLabel (N)) { - - /* The tos... functions will return a boolean value in a/x and - * the Z flag says if this value is zero or not. We will call - * a cheaper subroutine instead, one that does not return a - * boolean value but only valid flags. Note: jeq jumps if - * the condition is not met, jne jumps if the condition is met. - * Invert the code if we jump on condition not met. - */ - if (GetBranchCond (N->OPC) == BC_EQ) { - /* Jumps if condition false, invert condition */ - Cond = CmpInvertTab [Cond]; - } - - /* Replace the subroutine call. */ - E = NewCodeEntry (OP65_JSR, AM65_ABS, "tosicmp", 0, E->LI); - CS_InsertEntry (S, E, I+1); - CS_DelEntry (S, I); - - /* Replace the conditional branch */ - ReplaceCmp (S, I+1, Cond); - - /* Remember, we had changes */ - ++Changes; - - } - - /* Next entry */ - ++I; - - } - - /* Return the number of changes made */ - return Changes; -} - - - -static unsigned OptCmp6 (CodeSeg* S) -/* Search for a sequence ldx/txa/branch and remove the txa if A is not - * used later. - */ -{ - unsigned Changes = 0; - - /* Walk over the entries */ - unsigned I = 0; - while (I < CS_GetEntryCount (S)) { - - CodeEntry* L[2]; - - /* Get next entry */ - CodeEntry* E = CS_GetEntry (S, I); - - /* Check for the sequence */ - if ((E->OPC == OP65_LDX) && - CS_GetEntries (S, L, I+1, 2) && - L[0]->OPC == OP65_TXA && - !CE_HasLabel (L[0]) && - (L[1]->Info & OF_FBRA) != 0 && - !CE_HasLabel (L[1]) && - !RegAUsed (S, I+3)) { - - /* Remove the txa */ - CS_DelEntry (S, I+1); - - /* Remember, we had changes */ - ++Changes; - - } - - /* Next entry */ - ++I; - - } - - /* Return the number of changes made */ - return Changes; -} - - - -/*****************************************************************************/ -/* Optimize tests */ -/*****************************************************************************/ - - - -static unsigned OptTest1 (CodeSeg* S) -/* On a sequence - * - * stx xxx - * ora xxx - * beq/bne ... - * - * if X is zero, the sequence may be changed to - * - * cmp #$00 - * beq/bne ... - * - * which may be optimized further by another step. - * - * If A is zero, the sequence may be changed to - * - * txa - * beq/bne ... - * - */ -{ - unsigned Changes = 0; - unsigned I; - - /* Generate register info for this step */ - CS_GenRegInfo (S); - - /* Walk over the entries */ - I = 0; - while (I < CS_GetEntryCount (S)) { - - CodeEntry* L[3]; - - /* Get next entry */ - L[0] = CS_GetEntry (S, I); - - /* Check if it's the sequence we're searching for */ - if (L[0]->OPC == OP65_STX && - CS_GetEntries (S, L+1, I+1, 2) && - !CE_HasLabel (L[1]) && - L[1]->OPC == OP65_ORA && - strcmp (L[0]->Arg, L[1]->Arg) == 0 && - !CE_HasLabel (L[2]) && - (L[2]->Info & OF_ZBRA) != 0) { - - /* Check if X is zero */ - if (L[0]->RI->In.RegX == 0) { - - /* Insert the compare */ - CodeEntry* N = NewCodeEntry (OP65_CMP, AM65_IMM, "$00", 0, L[0]->LI); - CS_InsertEntry (S, N, I+2); - - /* Remove the two other insns */ - CS_DelEntry (S, I+1); - CS_DelEntry (S, I); - - /* We had changes */ - ++Changes; - - /* Check if A is zero */ - } else if (L[1]->RI->In.RegA == 0) { - - /* Insert the txa */ - CodeEntry* N = NewCodeEntry (OP65_TXA, AM65_IMP, 0, 0, L[1]->LI); - CS_InsertEntry (S, N, I+2); - - /* Remove the two other insns */ - CS_DelEntry (S, I+1); - CS_DelEntry (S, I); - - /* We had changes */ - ++Changes; - } - } - - /* Next entry */ - ++I; - - } - - /* Free register info */ - CS_FreeRegInfo (S); - - /* Return the number of changes made */ - return Changes; -} - - - -/*****************************************************************************/ -/* nega optimizations */ -/*****************************************************************************/ - - - -static unsigned OptNegA1 (CodeSeg* S) -/* Check for - * - * ldx #$00 - * lda .. - * jsr bnega - * - * Remove the ldx if the lda does not use it. - */ -{ - unsigned Changes = 0; - - /* Walk over the entries */ - unsigned I = 0; - while (I < CS_GetEntryCount (S)) { - - CodeEntry* L[2]; - - /* Get next entry */ - CodeEntry* E = CS_GetEntry (S, I); - - /* Check for a ldx */ - if (E->OPC == OP65_LDX && - E->AM == AM65_IMM && - (E->Flags & CEF_NUMARG) != 0 && - E->Num == 0 && - CS_GetEntries (S, L, I+1, 2) && - L[0]->OPC == OP65_LDA && - (L[0]->Use & REG_X) == 0 && - !CE_HasLabel (L[0]) && - L[1]->OPC == OP65_JSR && - strcmp (L[1]->Arg, "bnega") == 0 && - !CE_HasLabel (L[1])) { - - /* Remove the ldx instruction */ - CS_DelEntry (S, I); - - /* Remember, we had changes */ - ++Changes; - - } - - /* Next entry */ - ++I; - - } - - /* Return the number of changes made */ - return Changes; -} - - - -static unsigned OptNegA2 (CodeSeg* S) -/* Check for - * - * lda .. - * jsr bnega - * jeq/jne .. - * - * Adjust the conditional branch and remove the call to the subroutine. - */ -{ - unsigned Changes = 0; - - /* Walk over the entries */ - unsigned I = 0; - while (I < CS_GetEntryCount (S)) { - - CodeEntry* L[2]; - - /* Get next entry */ - CodeEntry* E = CS_GetEntry (S, I); - - /* Check for the sequence */ - if ((E->OPC == OP65_ADC || - E->OPC == OP65_AND || - E->OPC == OP65_DEA || - E->OPC == OP65_EOR || - E->OPC == OP65_INA || - E->OPC == OP65_LDA || - E->OPC == OP65_ORA || - E->OPC == OP65_PLA || - E->OPC == OP65_SBC || - E->OPC == OP65_TXA || - E->OPC == OP65_TYA) && - CS_GetEntries (S, L, I+1, 2) && - L[0]->OPC == OP65_JSR && - strcmp (L[0]->Arg, "bnega") == 0 && - !CE_HasLabel (L[0]) && - (L[1]->Info & OF_ZBRA) != 0 && - !CE_HasLabel (L[1])) { - - /* Invert the branch */ - CE_ReplaceOPC (L[1], GetInverseBranch (L[1]->OPC)); - - /* Delete the subroutine call */ - CS_DelEntry (S, I+1); - - /* Remember, we had changes */ - ++Changes; - - } - - /* Next entry */ - ++I; - - } - - /* Return the number of changes made */ - return Changes; -} - - - -/*****************************************************************************/ -/* negax optimizations */ -/*****************************************************************************/ - - - -static unsigned OptNegAX1 (CodeSeg* S) -/* On a call to bnegax, if X is zero, the result depends only on the value in - * A, so change the call to a call to bnega. This will get further optimized - * later if possible. - */ -{ - unsigned Changes = 0; - unsigned I; - - /* Generate register info for this step */ - CS_GenRegInfo (S); - - /* Walk over the entries */ - I = 0; - while (I < CS_GetEntryCount (S)) { - - /* Get next entry */ - CodeEntry* E = CS_GetEntry (S, I); - - /* Check if this is a call to bnegax, and if X is known and zero */ - if (E->OPC == OP65_JSR && - E->RI->In.RegX == 0 && - strcmp (E->Arg, "bnegax") == 0) { - - /* We're cheating somewhat here ... */ - E->Arg[5] = '\0'; - E->Use &= ~REG_X; - - /* We had changes */ - ++Changes; - } - - /* Next entry */ - ++I; - - } - - /* Free register info */ - CS_FreeRegInfo (S); - - /* Return the number of changes made */ - return Changes; -} - - - -static unsigned OptNegAX2 (CodeSeg* S) +static unsigned OptPtrLoad1 (CodeSeg* S) /* Search for the sequence: * - * lda (xx),y - * tax - * dey - * lda (xx),y - * jsr bnegax - * jne/jeq ... + * clc + * adc xxx + * tay + * txa + * adc yyy + * tax + * tya + * ldy + * jsr ldauidx * - * and replace it by + * and replace it by: * - * lda (xx),y - * dey - * ora (xx),y - * jeq/jne ... + * clc + * adc xxx + * sta ptr1 + * txa + * adc yyy + * sta ptr1+1 + * ldy + * ldx #$00 + * lda (ptr1),y */ { unsigned Changes = 0; @@ -1540,100 +724,69 @@ static unsigned OptNegAX2 (CodeSeg* S) unsigned I = 0; while (I < CS_GetEntryCount (S)) { - CodeEntry* L[5]; + CodeEntry* L[9]; /* Get next entry */ - CodeEntry* E = CS_GetEntry (S, I); + L[0] = CS_GetEntry (S, I); /* Check for the sequence */ - if (E->OPC == OP65_LDA && - E->AM == AM65_ZP_INDY && - CS_GetEntries (S, L, I+1, 5) && - L[0]->OPC == OP65_TAX && - L[1]->OPC == OP65_DEY && - L[2]->OPC == OP65_LDA && - L[2]->AM == AM65_ZP_INDY && - strcmp (L[2]->Arg, E->Arg) == 0 && - !CE_HasLabel (L[2]) && - L[3]->OPC == OP65_JSR && - strcmp (L[3]->Arg, "bnegax") == 0 && - !CE_HasLabel (L[3]) && - (L[4]->Info & OF_ZBRA) != 0 && - !CE_HasLabel (L[4])) { - - /* lda --> ora */ - CE_ReplaceOPC (L[2], OP65_ORA); - - /* Invert the branch */ - CE_ReplaceOPC (L[4], GetInverseBranch (L[4]->OPC)); - - /* Delete the entries no longer needed. Beware: Deleting entries - * will change the indices. - */ - CS_DelEntry (S, I+4); /* jsr bnegax */ - CS_DelEntry (S, I+1); /* tax */ - - /* Remember, we had changes */ - ++Changes; - - } - - /* Next entry */ - ++I; - - } - - /* Return the number of changes made */ - return Changes; -} + if (L[0]->OPC == OP65_CLC && + CS_GetEntries (S, L+1, I+1, 8) && + L[1]->OPC == OP65_ADC && + L[2]->OPC == OP65_TAY && + L[3]->OPC == OP65_TXA && + L[4]->OPC == OP65_ADC && + L[5]->OPC == OP65_TAX && + L[6]->OPC == OP65_TYA && + L[7]->OPC == OP65_LDY && + CE_IsCallTo (L[8], "ldauidx") && + !CS_RangeHasLabel (S, I+1, 8)) { + CodeEntry* X; + CodeEntry* P; + /* Track the insertion point */ + unsigned IP = I+2; -static unsigned OptNegAX3 (CodeSeg* S) -/* Search for the sequence: - * - * lda xx - * ldx yy - * jsr bnegax - * jne/jeq ... - * - * and replace it by - * - * lda xx - * ora xx+1 - * jeq/jne ... - */ -{ - unsigned Changes = 0; + /* sta ptr1 */ + X = NewCodeEntry (OP65_STA, AM65_ZP, "ptr1", 0, L[2]->LI); + CS_InsertEntry (S, X, IP++); - /* Walk over the entries */ - unsigned I = 0; - while (I < CS_GetEntryCount (S)) { + /* If the instruction before the clc is a ldx, replace the + * txa by an lda with the same location of the ldx. Otherwise + * transfer the value in X to A. + */ + if ((P = CS_GetPrevEntry (S, I)) != 0 && + P->OPC == OP65_LDX && + !CE_HasLabel (P)) { + X = NewCodeEntry (OP65_LDA, P->AM, P->Arg, 0, P->LI); + } else { + X = NewCodeEntry (OP65_TXA, AM65_IMP, 0, 0, L[3]->LI); + } + CS_InsertEntry (S, X, IP++); - CodeEntry* L[3]; + /* adc yyy */ + X = NewCodeEntry (OP65_ADC, L[4]->AM, L[4]->Arg, 0, L[4]->LI); + CS_InsertEntry (S, X, IP++); - /* Get next entry */ - CodeEntry* E = CS_GetEntry (S, I); + /* sta ptr1+1 */ + X = NewCodeEntry (OP65_STA, AM65_ZP, "ptr1+1", 0, L[5]->LI); + CS_InsertEntry (S, X, IP++); - /* Check for the sequence */ - if (E->OPC == OP65_LDA && - CS_GetEntries (S, L, I+1, 3) && - L[0]->OPC == OP65_LDX && - !CE_HasLabel (L[0]) && - L[1]->OPC == OP65_JSR && - strcmp (L[1]->Arg, "bnegax") == 0 && - !CE_HasLabel (L[1]) && - (L[2]->Info & OF_ZBRA) != 0 && - !CE_HasLabel (L[2])) { + /* ldy ... */ + X = NewCodeEntry (OP65_LDY, L[7]->AM, L[7]->Arg, 0, L[7]->LI); + CS_InsertEntry (S, X, IP++); - /* ldx --> ora */ - CE_ReplaceOPC (L[0], OP65_ORA); + /* ldx #$00 */ + X = NewCodeEntry (OP65_LDX, AM65_IMM, "$00", 0, L[8]->LI); + CS_InsertEntry (S, X, IP++); - /* Invert the branch */ - CE_ReplaceOPC (L[2], GetInverseBranch (L[2]->OPC)); + /* lda (ptr1),y */ + X = NewCodeEntry (OP65_LDA, AM65_ZP_INDY, "ptr1", 0, L[8]->LI); + CS_InsertEntry (S, X, IP++); - /* Delete the subroutine call */ - CS_DelEntry (S, I+2); + /* Remove the old instructions */ + CS_DelEntries (S, IP, 7); /* Remember, we had changes */ ++Changes; @@ -1651,18 +804,30 @@ static unsigned OptNegAX3 (CodeSeg* S) -static unsigned OptNegAX4 (CodeSeg* S) +static unsigned OptPtrLoad2 (CodeSeg* S) /* Search for the sequence: * - * jsr xxx - * jsr bnega(x) - * jeq/jne ... + * adc xxx + * pha + * txa + * iny + * adc yyy + * tax + * pla + * ldy + * jsr ldauidx * * and replace it by: * - * jsr xxx - * - * jne/jeq ... + * adc xxx + * sta ptr1 + * txa + * iny + * adc yyy + * sta ptr1+1 + * ldy + * ldx #$00 + * lda (ptr1),y */ { unsigned Changes = 0; @@ -1671,51 +836,53 @@ static unsigned OptNegAX4 (CodeSeg* S) unsigned I = 0; while (I < CS_GetEntryCount (S)) { - CodeEntry* L[2]; + CodeEntry* L[9]; /* Get next entry */ - CodeEntry* E = CS_GetEntry (S, I); + L[0] = CS_GetEntry (S, I); /* Check for the sequence */ - if (E->OPC == OP65_JSR && - CS_GetEntries (S, L, I+1, 2) && - L[0]->OPC == OP65_JSR && - strncmp (L[0]->Arg,"bnega",5) == 0 && - !CE_HasLabel (L[0]) && - (L[1]->Info & OF_ZBRA) != 0 && - !CE_HasLabel (L[1])) { + if (L[0]->OPC == OP65_ADC && + CS_GetEntries (S, L+1, I+1, 8) && + L[1]->OPC == OP65_PHA && + L[2]->OPC == OP65_TXA && + L[3]->OPC == OP65_INY && + L[4]->OPC == OP65_ADC && + L[5]->OPC == OP65_TAX && + L[6]->OPC == OP65_PLA && + L[7]->OPC == OP65_LDY && + CE_IsCallTo (L[8], "ldauidx") && + !CS_RangeHasLabel (S, I+1, 8)) { - CodeEntry* X; + CodeEntry* X; - /* Check if we're calling bnega or bnegax */ - int ByteSized = (strcmp (L[0]->Arg, "bnega") == 0); + /* Store the low byte and remove the PHA instead */ + X = NewCodeEntry (OP65_STA, AM65_ZP, "ptr1", 0, L[0]->LI); + CS_InsertEntry (S, X, I+1); - /* Insert apropriate test code */ - if (ByteSized) { - /* Test bytes */ - X = NewCodeEntry (OP65_TAX, AM65_IMP, 0, 0, L[0]->LI); - CS_InsertEntry (S, X, I+2); - } else { - /* Test words */ - X = NewCodeEntry (OP65_STX, AM65_ZP, "tmp1", 0, L[0]->LI); - CS_InsertEntry (S, X, I+2); - X = NewCodeEntry (OP65_ORA, AM65_ZP, "tmp1", 0, L[0]->LI); - CS_InsertEntry (S, X, I+3); - } + /* Store the high byte */ + X = NewCodeEntry (OP65_STA, AM65_ZP, "ptr1+1", 0, L[4]->LI); + CS_InsertEntry (S, X, I+6); - /* Delete the subroutine call */ - CS_DelEntry (S, I+1); + /* Load high and low byte */ + X = NewCodeEntry (OP65_LDX, AM65_IMM, "$00", 0, L[6]->LI); + CS_InsertEntry (S, X, I+10); + X = NewCodeEntry (OP65_LDA, AM65_ZP_INDY, "ptr1", 0, L[6]->LI); + CS_InsertEntry (S, X, I+11); - /* Invert the branch */ - CE_ReplaceOPC (L[1], GetInverseBranch (L[1]->OPC)); + /* Delete the old code */ + CS_DelEntry (S, I+12); /* jsr ldauidx */ + CS_DelEntry (S, I+8); /* pla */ + CS_DelEntry (S, I+7); /* tax */ + CS_DelEntry (S, I+2); /* pha */ - /* Remember, we had changes */ - ++Changes; + /* Remember, we had changes */ + ++Changes; - } + } - /* Next entry */ - ++I; + /* Next entry */ + ++I; } @@ -1725,70 +892,23 @@ static unsigned OptNegAX4 (CodeSeg* S) -/*****************************************************************************/ -/* Optimize stores through pointers */ -/*****************************************************************************/ - - - -static unsigned OptPtrStore1Sub (CodeSeg* S, unsigned I, CodeEntry** const L) -/* Check if this is one of the allowed suboperation for OptPtrStore1 */ -{ - /* Check for a label attached to the entry */ - if (CE_HasLabel (L[0])) { - return 0; - } - - /* Check for single insn sub ops */ - if (L[0]->OPC == OP65_AND || - L[0]->OPC == OP65_EOR || - L[0]->OPC == OP65_ORA || - (L[0]->OPC == OP65_JSR && strncmp (L[0]->Arg, "shlax", 5) == 0) || - (L[0]->OPC == OP65_JSR && strncmp (L[0]->Arg, "shrax", 5) == 0)) { - - /* One insn */ - return 1; - - } else if (L[0]->OPC == OP65_CLC && - (L[1] = CS_GetNextEntry (S, I)) != 0 && - L[1]->OPC == OP65_ADC && - !CE_HasLabel (L[1])) { - return 2; - } else if (L[0]->OPC == OP65_SEC && - (L[1] = CS_GetNextEntry (S, I)) != 0 && - L[1]->OPC == OP65_SBC && - !CE_HasLabel (L[1])) { - return 2; - } - - - - /* Not found */ - return 0; -} - - - -static unsigned OptPtrStore1 (CodeSeg* S) +static unsigned OptPtrLoad3 (CodeSeg* S) /* Search for the sequence: * - * jsr pushax - * ldy xxx + * lda #<(label+0) + * ldx #>(label+0) + * clc + * adc xxx + * bcc L + * inx + * L: ldy #$00 * jsr ldauidx - * subop - * ldy yyy - * jsr staspidx * * and replace it by: * - * sta ptr1 - * stx ptr1+1 * ldy xxx * ldx #$00 - * lda (ptr1),y - * subop - * ldy yyy - * sta (ptr1),y + * lda label,y */ { unsigned Changes = 0; @@ -1797,58 +917,59 @@ static unsigned OptPtrStore1 (CodeSeg* S) unsigned I = 0; while (I < CS_GetEntryCount (S)) { - unsigned K; - CodeEntry* L[10]; + CodeEntry* L[8]; + unsigned Len; /* Get next entry */ L[0] = CS_GetEntry (S, I); /* Check for the sequence */ - if (L[0]->OPC == OP65_JSR && - strcmp (L[0]->Arg, "pushax") == 0 && - CS_GetEntries (S, L+1, I+1, 3) && - L[1]->OPC == OP65_LDY && - CE_KnownImm (L[1]) && - !CE_HasLabel (L[1]) && - L[2]->OPC == OP65_JSR && - strcmp (L[2]->Arg, "ldauidx") == 0 && - !CE_HasLabel (L[2]) && - (K = OptPtrStore1Sub (S, I+3, L+3)) > 0 && - CS_GetEntries (S, L+3+K, I+3+K, 2) && - L[3+K]->OPC == OP65_LDY && - CE_KnownImm (L[3+K]) && - !CE_HasLabel (L[3+K]) && - L[4+K]->OPC == OP65_JSR && - strcmp (L[4+K]->Arg, "staspidx") == 0 && - !CE_HasLabel (L[4+K])) { + if (L[0]->OPC == OP65_LDA && + L[0]->AM == AM65_IMM && + CS_GetEntries (S, L+1, I+1, 7) && + L[1]->OPC == OP65_LDX && + L[1]->AM == AM65_IMM && + L[2]->OPC == OP65_CLC && + L[3]->OPC == OP65_ADC && + (L[3]->AM == AM65_ABS || L[3]->AM == AM65_ZP) && + (L[4]->OPC == OP65_BCC || L[4]->OPC == OP65_JCC) && + L[4]->JumpTo != 0 && + L[4]->JumpTo->Owner == L[6] && + L[5]->OPC == OP65_INX && + L[6]->OPC == OP65_LDY && + CE_KnownImm (L[6]) && + L[6]->Num == 0 && + CE_IsCallTo (L[7], "ldauidx") && + !CS_RangeHasLabel (S, I+1, 5) && + !CE_HasLabel (L[7]) && + /* Check the label last because this is quite costly */ + (Len = strlen (L[0]->Arg)) > 3 && + L[0]->Arg[0] == '<' && + L[0]->Arg[1] == '(' && + strlen (L[1]->Arg) == Len && + L[1]->Arg[0] == '>' && + memcmp (L[0]->Arg+1, L[1]->Arg+1, Len-1) == 0) { CodeEntry* X; + char* Label; - /* Create and insert the stores */ - X = NewCodeEntry (OP65_STA, AM65_ZP, "ptr1", 0, L[0]->LI); - CS_InsertEntry (S, X, I+1); - - X = NewCodeEntry (OP65_STX, AM65_ZP, "ptr1+1", 0, L[0]->LI); - CS_InsertEntry (S, X, I+2); - - /* Delete the call to pushax */ - CS_DelEntry (S, I); - - /* Delete the call to ldauidx */ - CS_DelEntry (S, I+3); + /* We will create all the new stuff behind the current one so + * we keep the line references. + */ + X = NewCodeEntry (OP65_LDY, L[3]->AM, L[3]->Arg, 0, L[0]->LI); + CS_InsertEntry (S, X, I+8); - /* Insert the load from ptr1 */ - X = NewCodeEntry (OP65_LDX, AM65_IMM, "$00", 0, L[3]->LI); - CS_InsertEntry (S, X, I+3); - X = NewCodeEntry (OP65_LDA, AM65_ZP_INDY, "ptr1", 0, L[2]->LI); - CS_InsertEntry (S, X, I+4); + X = NewCodeEntry (OP65_LDX, AM65_IMM, "$00", 0, L[0]->LI); + CS_InsertEntry (S, X, I+9); - /* Insert the store through ptr1 */ - X = NewCodeEntry (OP65_STA, AM65_ZP_INDY, "ptr1", 0, L[3]->LI); - CS_InsertEntry (S, X, I+6+K); + Label = memcpy (xmalloc (Len-2), L[0]->Arg+2, Len-3); + Label[Len-3] = '\0'; + X = NewCodeEntry (OP65_LDA, AM65_ABSY, Label, 0, L[0]->LI); + CS_InsertEntry (S, X, I+10); + xfree (Label); - /* Delete the call to staspidx */ - CS_DelEntry (S, I+7+K); + /* Remove the old code */ + CS_DelEntries (S, I, 8); /* Remember, we had changes */ ++Changes; @@ -1866,21 +987,26 @@ static unsigned OptPtrStore1 (CodeSeg* S) -static unsigned OptPtrStore2 (CodeSeg* S) +static unsigned OptPtrLoad4 (CodeSeg* S) /* Search for the sequence: * - * jsr pushax - * lda xxx - * ldy yyy - * jsr staspidx + * lda #<(label+0) + * ldx #>(label+0) + * ldy #$xx + * clc + * adc (sp),y + * bcc L + * inx + * L: ldy #$00 + * jsr ldauidx * * and replace it by: * - * sta ptr1 - * stx ptr1+1 - * lda xxx - * ldy yyy - * sta (ptr1),y + * ldy #$xx + * lda (sp),y + * tay + * ldx #$00 + * lda label,y */ { unsigned Changes = 0; @@ -1889,41 +1015,71 @@ static unsigned OptPtrStore2 (CodeSeg* S) unsigned I = 0; while (I < CS_GetEntryCount (S)) { - CodeEntry* L[4]; + CodeEntry* L[9]; + unsigned Len; /* Get next entry */ L[0] = CS_GetEntry (S, I); /* Check for the sequence */ - if (L[0]->OPC == OP65_JSR && - strcmp (L[0]->Arg, "pushax") == 0 && - CS_GetEntries (S, L+1, I+1, 3) && - L[1]->OPC == OP65_LDA && - !CE_HasLabel (L[1]) && - L[2]->OPC == OP65_LDY && - !CE_HasLabel (L[2]) && - L[3]->OPC == OP65_JSR && - strcmp (L[3]->Arg, "staspidx") == 0 && - !CE_HasLabel (L[3])) { + if (L[0]->OPC == OP65_LDA && + L[0]->AM == AM65_IMM && + CS_GetEntries (S, L+1, I+1, 8) && + L[1]->OPC == OP65_LDX && + L[1]->AM == AM65_IMM && + !CE_HasLabel (L[1]) && + L[2]->OPC == OP65_LDY && + CE_KnownImm (L[2]) && + !CE_HasLabel (L[2]) && + L[3]->OPC == OP65_CLC && + !CE_HasLabel (L[3]) && + L[4]->OPC == OP65_ADC && + L[4]->AM == AM65_ZP_INDY && + !CE_HasLabel (L[4]) && + (L[5]->OPC == OP65_BCC || L[5]->OPC == OP65_JCC) && + L[5]->JumpTo != 0 && + L[5]->JumpTo->Owner == L[7] && + !CE_HasLabel (L[5]) && + L[6]->OPC == OP65_INX && + !CE_HasLabel (L[6]) && + L[7]->OPC == OP65_LDY && + CE_KnownImm (L[7]) && + L[7]->Num == 0 && + CE_IsCallTo (L[8], "ldauidx") && + !CE_HasLabel (L[8]) && + /* Check the label last because this is quite costly */ + (Len = strlen (L[0]->Arg)) > 3 && + L[0]->Arg[0] == '<' && + L[0]->Arg[1] == '(' && + strlen (L[1]->Arg) == Len && + L[1]->Arg[0] == '>' && + memcmp (L[0]->Arg+1, L[1]->Arg+1, Len-1) == 0) { CodeEntry* X; + char* Label; - /* Create and insert the stores */ - X = NewCodeEntry (OP65_STA, AM65_ZP, "ptr1", 0, L[0]->LI); - CS_InsertEntry (S, X, I+1); + /* Add the lda */ + X = NewCodeEntry (OP65_LDA, AM65_ZP_INDY, L[4]->Arg, 0, L[0]->LI); + CS_InsertEntry (S, X, I+3); - X = NewCodeEntry (OP65_STX, AM65_ZP, "ptr1+1", 0, L[0]->LI); - CS_InsertEntry (S, X, I+2); + /* Add the tay */ + X = NewCodeEntry (OP65_TAY, AM65_IMP, 0, 0, L[0]->LI); + CS_InsertEntry (S, X, I+4); - /* Delete the call to pushax */ - CS_DelEntry (S, I); + /* Add the ldx */ + X = NewCodeEntry (OP65_LDX, AM65_IMM, "$00", 0, L[0]->LI); + CS_InsertEntry (S, X, I+5); - /* Insert the store through ptr1 */ - X = NewCodeEntry (OP65_STA, AM65_ZP_INDY, "ptr1", 0, L[3]->LI); - CS_InsertEntry (S, X, I+4); + /* Add the lda */ + Label = memcpy (xmalloc (Len-2), L[0]->Arg+2, Len-3); + Label[Len-3] = '\0'; + X = NewCodeEntry (OP65_LDA, AM65_ABSY, Label, 0, L[0]->LI); + CS_InsertEntry (S, X, I+6); + xfree (Label); - /* Delete the call to staspidx */ - CS_DelEntry (S, I+5); + /* Remove the old code */ + CS_DelEntries (S, I, 2); + CS_DelEntries (S, I+5, 6); /* Remember, we had changes */ ++Changes; @@ -1941,30 +1097,36 @@ static unsigned OptPtrStore2 (CodeSeg* S) -/*****************************************************************************/ -/* Optimize loads through pointers */ -/*****************************************************************************/ - - - -static unsigned OptPtrLoad1 (CodeSeg* S) +static unsigned OptPtrLoad5 (CodeSeg* S) /* Search for the sequence: * - * tax - * dey - * lda (sp),y # May be any destination - * ldy ... - * jsr ldauidx - * - * and replace it by: - * - * sta ptr1+1 - * dey - * lda (sp),y - * sta ptr1 - * ldy ... + * lda regbank+n + * ldx regbank+n+1 + * sta regsave + * stx regsave+1 + * clc + * adc #$01 + * bcc L0005 + * inx + * L: sta regbank+n + * stx regbank+n+1 + * lda regsave + * ldx regsave+1 + * ldy #$00 + * jsr ldauidx + * + * and replace it by: + * + * ldy #$00 * ldx #$00 - * lda (ptr1),y + * lda (regbank+n),y + * inc regbank+n + * bne L1 + * inc regbank+n+1 + * L1: tay <- only if flags are used + * + * This function must execute before OptPtrLoad5! + * */ { unsigned Changes = 0; @@ -1973,43 +1135,97 @@ static unsigned OptPtrLoad1 (CodeSeg* S) unsigned I = 0; while (I < CS_GetEntryCount (S)) { - CodeEntry* L[5]; + CodeEntry* L[15]; + unsigned Len; /* Get next entry */ L[0] = CS_GetEntry (S, I); /* Check for the sequence */ - if (L[0]->OPC == OP65_TAX && - CS_GetEntries (S, L+1, I+1, 4) && - L[1]->OPC == OP65_DEY && - !CE_HasLabel (L[1]) && - L[2]->OPC == OP65_LDA && - !CE_HasLabel (L[2]) && - L[3]->OPC == OP65_LDY && - !CE_HasLabel (L[3]) && - L[4]->OPC == OP65_JSR && - strcmp (L[4]->Arg, "ldauidx") == 0 && - !CE_HasLabel (L[4])) { + if (L[0]->OPC == OP65_LDA && + L[0]->AM == AM65_ZP && + strncmp (L[0]->Arg, "regbank+", 8) == 0 && + (Len = strlen (L[0]->Arg)) > 0 && + CS_GetEntries (S, L+1, I+1, 14) && + !CS_RangeHasLabel (S, I+1, 7) && + !CS_RangeHasLabel (S, I+9, 5) && + L[1]->OPC == OP65_LDX && + L[1]->AM == AM65_ZP && + strncmp (L[1]->Arg, L[0]->Arg, Len) == 0 && + strcmp (L[1]->Arg+Len, "+1") == 0 && + L[2]->OPC == OP65_STA && + L[2]->AM == AM65_ZP && + strcmp (L[2]->Arg, "regsave") == 0 && + L[3]->OPC == OP65_STX && + L[3]->AM == AM65_ZP && + strcmp (L[3]->Arg, "regsave+1") == 0 && + L[4]->OPC == OP65_CLC && + L[5]->OPC == OP65_ADC && + CE_KnownImm (L[5]) && + L[5]->Num == 1 && + L[6]->OPC == OP65_BCC && + L[6]->JumpTo != 0 && + L[6]->JumpTo->Owner == L[8] && + L[7]->OPC == OP65_INX && + L[8]->OPC == OP65_STA && + L[8]->AM == AM65_ZP && + strcmp (L[8]->Arg, L[0]->Arg) == 0 && + L[9]->OPC == OP65_STX && + L[9]->AM == AM65_ZP && + strcmp (L[9]->Arg, L[1]->Arg) == 0 && + L[10]->OPC == OP65_LDA && + L[10]->AM == AM65_ZP && + strcmp (L[10]->Arg, "regsave") == 0 && + L[11]->OPC == OP65_LDX && + L[11]->AM == AM65_ZP && + strcmp (L[11]->Arg, "regsave+1") == 0 && + L[12]->OPC == OP65_LDY && + CE_KnownImm (L[12]) && + CE_IsCallTo (L[13], "ldauidx")) { CodeEntry* X; + CodeLabel* Label; - /* Store the high byte and remove the TAX instead */ - X = NewCodeEntry (OP65_STA, AM65_ZP, "ptr1+1", 0, L[0]->LI); - CS_InsertEntry (S, X, I+1); - CS_DelEntry (S, I); + /* Check if the instruction following the sequence uses the flags + * set by the load. If so, insert a test of the value in the + * accumulator. + */ + if (CE_UseLoadFlags (L[14])) { + X = NewCodeEntry (OP65_TAY, AM65_IMP, 0, 0, L[13]->LI); + CS_InsertEntry (S, X, I+14); + } - /* Store the low byte */ - X = NewCodeEntry (OP65_STA, AM65_ZP, "ptr1", 0, L[2]->LI); - CS_InsertEntry (S, X, I+3); + /* Attach a label to L[14]. This may be either the just inserted + * instruction, or the one following the sequence. + */ + Label = CS_GenLabel (S, L[14]); - /* Delete the call to ldauidx */ - CS_DelEntry (S, I+5); + /* ldy #$xx */ + X = NewCodeEntry (OP65_LDY, AM65_IMM, L[12]->Arg, 0, L[12]->LI); + CS_InsertEntry (S, X, I+14); - /* Load high and low byte */ - X = NewCodeEntry (OP65_LDX, AM65_IMM, "$00", 0, L[3]->LI); - CS_InsertEntry (S, X, I+5); - X = NewCodeEntry (OP65_LDA, AM65_ZP_INDY, "ptr1", 0, L[3]->LI); - CS_InsertEntry (S, X, I+6); + /* ldx #$xx */ + X = NewCodeEntry (OP65_LDX, AM65_IMM, "$00", 0, L[13]->LI); + CS_InsertEntry (S, X, I+15); + + /* lda (regbank+n),y */ + X = NewCodeEntry (OP65_LDA, AM65_ZP_INDY, L[0]->Arg, 0, L[13]->LI); + CS_InsertEntry (S, X, I+16); + + /* inc regbank+n */ + X = NewCodeEntry (OP65_INC, AM65_ZP, L[0]->Arg, 0, L[5]->LI); + CS_InsertEntry (S, X, I+17); + + /* bne ... */ + X = NewCodeEntry (OP65_BNE, AM65_BRA, Label->Name, Label, L[6]->LI); + CS_InsertEntry (S, X, I+18); + + /* inc regbank+n+1 */ + X = NewCodeEntry (OP65_INC, AM65_ZP, L[1]->Arg, 0, L[7]->LI); + CS_InsertEntry (S, X, I+19); + + /* Delete the old code */ + CS_DelEntries (S, I, 14); /* Remember, we had changes */ ++Changes; @@ -2027,28 +1243,19 @@ static unsigned OptPtrLoad1 (CodeSeg* S) -static unsigned OptPtrLoad2 (CodeSeg* S) +static unsigned OptPtrLoad6 (CodeSeg* S) /* Search for the sequence: * - * adc xxx - * tay - * txa - * adc yyy - * tax - * tya - * ldy - * jsr ldauidx + * lda zp + * ldx zp+1 + * ldy xx + * jsr ldauidx * * and replace it by: * - * adc xxx - * sta ptr1 - * txa - * adc yyy - * sta ptr1+1 - * ldy - * ldx #$00 - * lda (ptr1),y + * ldy xx + * ldx #$00 + * lda (zp),y */ { unsigned Changes = 0; @@ -2057,53 +1264,36 @@ static unsigned OptPtrLoad2 (CodeSeg* S) unsigned I = 0; while (I < CS_GetEntryCount (S)) { - CodeEntry* L[8]; + CodeEntry* L[4]; + unsigned Len; /* Get next entry */ L[0] = CS_GetEntry (S, I); /* Check for the sequence */ - if (L[0]->OPC == OP65_ADC && - CS_GetEntries (S, L+1, I+1, 7) && - L[1]->OPC == OP65_TAY && - !CE_HasLabel (L[1]) && - L[2]->OPC == OP65_TXA && - !CE_HasLabel (L[2]) && - L[3]->OPC == OP65_ADC && - !CE_HasLabel (L[3]) && - L[4]->OPC == OP65_TAX && - !CE_HasLabel (L[4]) && - L[5]->OPC == OP65_TYA && - !CE_HasLabel (L[5]) && - L[6]->OPC == OP65_LDY && - !CE_HasLabel (L[6]) && - L[7]->OPC == OP65_JSR && - strcmp (L[7]->Arg, "ldauidx") == 0 && - !CE_HasLabel (L[7])) { + if (L[0]->OPC == OP65_LDA && L[0]->AM == AM65_ZP && + CS_GetEntries (S, L+1, I+1, 3) && + !CS_RangeHasLabel (S, I+1, 3) && + L[1]->OPC == OP65_LDX && L[1]->AM == AM65_ZP && + (Len = strlen (L[0]->Arg)) > 0 && + strncmp (L[0]->Arg, L[1]->Arg, Len) == 0 && + strcmp (L[1]->Arg + Len, "+1") == 0 && + L[2]->OPC == OP65_LDY && + CE_IsCallTo (L[3], "ldauidx")) { CodeEntry* X; - /* Store the low byte and remove the TAY instead */ - X = NewCodeEntry (OP65_STA, AM65_ZP, "ptr1", 0, L[0]->LI); - CS_InsertEntry (S, X, I+1); - CS_DelEntry (S, I+2); + /* ldx #$00 */ + X = NewCodeEntry (OP65_LDX, AM65_IMM, "$00", 0, L[3]->LI); + CS_InsertEntry (S, X, I+3); - /* Store the high byte */ - X = NewCodeEntry (OP65_STA, AM65_ZP, "ptr1+1", 0, L[3]->LI); + /* lda (zp),y */ + X = NewCodeEntry (OP65_LDA, AM65_ZP_INDY, L[0]->Arg, 0, L[3]->LI); CS_InsertEntry (S, X, I+4); - /* Delete more transfer insns */ - CS_DelEntry (S, I+6); + /* Remove the old code */ CS_DelEntry (S, I+5); - - /* Delete the call to ldauidx */ - CS_DelEntry (S, I+6); - - /* Load high and low byte */ - X = NewCodeEntry (OP65_LDX, AM65_IMM, "$00", 0, L[6]->LI); - CS_InsertEntry (S, X, I+6); - X = NewCodeEntry (OP65_LDA, AM65_ZP_INDY, "ptr1", 0, L[6]->LI); - CS_InsertEntry (S, X, I+7); + CS_DelEntries (S, I, 2); /* Remember, we had changes */ ++Changes; @@ -2121,30 +1311,21 @@ static unsigned OptPtrLoad2 (CodeSeg* S) -static unsigned OptPtrLoad3 (CodeSeg* S) +static unsigned OptPtrLoad7 (CodeSeg* S) /* Search for the sequence: * - * adc xxx - * pha - * txa - * iny - * adc yyy - * tax - * pla - * ldy - * jsr ldauidx + * lda zp + * ldx zp+1 + * ldy xx + * jsr ldaxidx * * and replace it by: * - * adc xxx - * sta ptr1 - * txa - * iny - * adc yyy - * sta ptr1+1 - * ldy - * ldx #$00 - * lda (ptr1),y + * ldy xx + * lda (zp),y + * tax + * dey + * lda (zp),y */ { unsigned Changes = 0; @@ -2153,55 +1334,44 @@ static unsigned OptPtrLoad3 (CodeSeg* S) unsigned I = 0; while (I < CS_GetEntryCount (S)) { - CodeEntry* L[9]; + CodeEntry* L[4]; + unsigned Len; /* Get next entry */ L[0] = CS_GetEntry (S, I); /* Check for the sequence */ - if (L[0]->OPC == OP65_ADC && - CS_GetEntries (S, L+1, I+1, 8) && - L[1]->OPC == OP65_PHA && - !CE_HasLabel (L[1]) && - L[2]->OPC == OP65_TXA && - !CE_HasLabel (L[2]) && - L[3]->OPC == OP65_INY && - !CE_HasLabel (L[3]) && - L[4]->OPC == OP65_ADC && - !CE_HasLabel (L[4]) && - L[5]->OPC == OP65_TAX && - !CE_HasLabel (L[5]) && - L[6]->OPC == OP65_PLA && - !CE_HasLabel (L[6]) && - L[7]->OPC == OP65_LDY && - !CE_HasLabel (L[7]) && - L[8]->OPC == OP65_JSR && - strcmp (L[8]->Arg, "ldauidx") == 0 && - !CE_HasLabel (L[8])) { + if (L[0]->OPC == OP65_LDA && L[0]->AM == AM65_ZP && + CS_GetEntries (S, L+1, I+1, 3) && + !CS_RangeHasLabel (S, I+1, 3) && + L[1]->OPC == OP65_LDX && L[1]->AM == AM65_ZP && + (Len = strlen (L[0]->Arg)) > 0 && + strncmp (L[0]->Arg, L[1]->Arg, Len) == 0 && + strcmp (L[1]->Arg + Len, "+1") == 0 && + L[2]->OPC == OP65_LDY && + CE_IsCallTo (L[3], "ldaxidx")) { CodeEntry* X; - /* Store the low byte and remove the PHA instead */ - X = NewCodeEntry (OP65_STA, AM65_ZP, "ptr1", 0, L[0]->LI); - CS_InsertEntry (S, X, I+1); - CS_DelEntry (S, I+2); - - /* Store the high byte */ - X = NewCodeEntry (OP65_STA, AM65_ZP, "ptr1+1", 0, L[4]->LI); - CS_InsertEntry (S, X, I+5); + /* lda (zp),y */ + X = NewCodeEntry (OP65_LDA, AM65_ZP_INDY, L[0]->Arg, 0, L[3]->LI); + CS_InsertEntry (S, X, I+4); - /* Delete more transfer and PLA insns */ - CS_DelEntry (S, I+7); - CS_DelEntry (S, I+6); + /* tax */ + X = NewCodeEntry (OP65_TAX, AM65_IMP, 0, 0, L[3]->LI); + CS_InsertEntry (S, X, I+5); - /* Delete the call to ldauidx */ - CS_DelEntry (S, I+7); + /* dey */ + X = NewCodeEntry (OP65_DEY, AM65_IMP, 0, 0, L[3]->LI); + CS_InsertEntry (S, X, I+6); - /* Load high and low byte */ - X = NewCodeEntry (OP65_LDX, AM65_IMM, "$00", 0, L[6]->LI); + /* lda (zp),y */ + X = NewCodeEntry (OP65_LDA, AM65_ZP_INDY, L[0]->Arg, 0, L[3]->LI); CS_InsertEntry (S, X, I+7); - X = NewCodeEntry (OP65_LDA, AM65_ZP_INDY, "ptr1", 0, L[6]->LI); - CS_InsertEntry (S, X, I+8); + + /* Remove the old code */ + CS_DelEntry (S, I+3); + CS_DelEntries (S, I, 2); /* Remember, we had changes */ ++Changes; @@ -2219,7 +1389,7 @@ static unsigned OptPtrLoad3 (CodeSeg* S) -static unsigned OptPtrLoad4 (CodeSeg* S) +static unsigned OptPtrLoad8 (CodeSeg* S) /* Search for the sequence * * ldy ... @@ -2233,7 +1403,7 @@ static unsigned OptPtrLoad4 (CodeSeg* S) * ldx #$00 * lda (ptr1),y * - * This step must be execute *after* OptPtrLoad1! + * This step must be executed *after* OptPtrLoad1! */ { unsigned Changes = 0; @@ -2242,7 +1412,7 @@ static unsigned OptPtrLoad4 (CodeSeg* S) unsigned I = 0; while (I < CS_GetEntryCount (S)) { - CodeEntry* L[2]; + CodeEntry* L[2]; /* Get next entry */ L[0] = CS_GetEntry (S, I); @@ -2250,24 +1420,23 @@ static unsigned OptPtrLoad4 (CodeSeg* S) /* Check for the sequence */ if (L[0]->OPC == OP65_LDY && CS_GetEntries (S, L+1, I+1, 1) && - L[1]->OPC == OP65_JSR && - strcmp (L[1]->Arg, "ldauidx") == 0 && - !CE_HasLabel (L[1])) { + CE_IsCallTo (L[1], "ldauidx") && + !CE_HasLabel (L[1])) { - CodeEntry* X; + CodeEntry* X; /* Store the high byte */ X = NewCodeEntry (OP65_STA, AM65_ZP, "ptr1", 0, L[0]->LI); - CS_InsertEntry (S, X, I+1); + CS_InsertEntry (S, X, I+1); - /* Store the low byte */ - X = NewCodeEntry (OP65_STX, AM65_ZP, "ptr1+1", 0, L[0]->LI); - CS_InsertEntry (S, X, I+2); + /* Store the low byte */ + X = NewCodeEntry (OP65_STX, AM65_ZP, "ptr1+1", 0, L[0]->LI); + CS_InsertEntry (S, X, I+2); - /* Delete the call to ldauidx */ - CS_DelEntry (S, I+3); + /* Delete the call to ldauidx */ + CS_DelEntry (S, I+3); - /* Load the high and low byte */ + /* Load the high and low byte */ X = NewCodeEntry (OP65_LDX, AM65_IMM, "$00", 0, L[0]->LI); CS_InsertEntry (S, X, I+3); X = NewCodeEntry (OP65_LDA, AM65_ZP_INDY, "ptr1", 0, L[0]->LI); @@ -2290,113 +1459,443 @@ static unsigned OptPtrLoad4 (CodeSeg* S) /*****************************************************************************/ -/* Code */ +/* Decouple operations */ /*****************************************************************************/ -/* Types of optimization steps */ -enum { - optPre, /* Repeated once */ - optPreMain, /* Repeated more than once */ - optMain, /* dito */ - optPostMain, /* dito */ - optPost /* Repeated once */ -}; +static unsigned OptDecouple (CodeSeg* S) +/* Decouple operations, that is, do the following replacements: + * + * dex -> ldx #imm + * inx -> ldx #imm + * dey -> ldy #imm + * iny -> ldy #imm + * tax -> ldx #imm + * txa -> lda #imm + * tay -> ldy #imm + * tya -> lda #imm + * lda zp -> lda #imm + * ldx zp -> ldx #imm + * ldy zp -> ldy #imm + * + * Provided that the register values are known of course. + */ +{ + unsigned Changes = 0; + unsigned I; + + /* Generate register info for the following step */ + CS_GenRegInfo (S); + + /* Walk over the entries */ + I = 0; + while (I < CS_GetEntryCount (S)) { + + const char* Arg; + + /* Get next entry and it's input register values */ + CodeEntry* E = CS_GetEntry (S, I); + const RegContents* In = &E->RI->In; + + /* Assume we have no replacement */ + CodeEntry* X = 0; + + /* Check the instruction */ + switch (E->OPC) { + + case OP65_DEA: + if (RegValIsKnown (In->RegA)) { + Arg = MakeHexArg ((In->RegA - 1) & 0xFF); + X = NewCodeEntry (OP65_LDA, AM65_IMM, Arg, 0, E->LI); + } + break; + + case OP65_DEX: + if (RegValIsKnown (In->RegX)) { + Arg = MakeHexArg ((In->RegX - 1) & 0xFF); + X = NewCodeEntry (OP65_LDX, AM65_IMM, Arg, 0, E->LI); + } + break; + + case OP65_DEY: + if (RegValIsKnown (In->RegY)) { + Arg = MakeHexArg ((In->RegY - 1) & 0xFF); + X = NewCodeEntry (OP65_LDY, AM65_IMM, Arg, 0, E->LI); + } + break; + + case OP65_INA: + if (RegValIsKnown (In->RegA)) { + Arg = MakeHexArg ((In->RegA + 1) & 0xFF); + X = NewCodeEntry (OP65_LDA, AM65_IMM, Arg, 0, E->LI); + } + break; + + case OP65_INX: + if (RegValIsKnown (In->RegX)) { + Arg = MakeHexArg ((In->RegX + 1) & 0xFF); + X = NewCodeEntry (OP65_LDX, AM65_IMM, Arg, 0, E->LI); + } + break; + + case OP65_INY: + if (RegValIsKnown (In->RegY)) { + Arg = MakeHexArg ((In->RegY + 1) & 0xFF); + X = NewCodeEntry (OP65_LDY, AM65_IMM, Arg, 0, E->LI); + } + break; + + case OP65_LDA: + if (E->AM == AM65_ZP) { + switch (GetKnownReg (E->Use & REG_ZP, In)) { + case REG_TMP1: + Arg = MakeHexArg (In->Tmp1); + X = NewCodeEntry (OP65_LDA, AM65_IMM, Arg, 0, E->LI); + break; + + case REG_PTR1_LO: + Arg = MakeHexArg (In->Ptr1Lo); + X = NewCodeEntry (OP65_LDA, AM65_IMM, Arg, 0, E->LI); + break; + + case REG_PTR1_HI: + Arg = MakeHexArg (In->Ptr1Hi); + X = NewCodeEntry (OP65_LDA, AM65_IMM, Arg, 0, E->LI); + break; + + case REG_SREG_LO: + Arg = MakeHexArg (In->SRegLo); + X = NewCodeEntry (OP65_LDA, AM65_IMM, Arg, 0, E->LI); + break; + + case REG_SREG_HI: + Arg = MakeHexArg (In->SRegHi); + X = NewCodeEntry (OP65_LDA, AM65_IMM, Arg, 0, E->LI); + break; + } + } + break; + + case OP65_LDX: + if (E->AM == AM65_ZP) { + switch (GetKnownReg (E->Use & REG_ZP, In)) { + case REG_TMP1: + Arg = MakeHexArg (In->Tmp1); + X = NewCodeEntry (OP65_LDX, AM65_IMM, Arg, 0, E->LI); + break; + + case REG_PTR1_LO: + Arg = MakeHexArg (In->Ptr1Lo); + X = NewCodeEntry (OP65_LDX, AM65_IMM, Arg, 0, E->LI); + break; + + case REG_PTR1_HI: + Arg = MakeHexArg (In->Ptr1Hi); + X = NewCodeEntry (OP65_LDX, AM65_IMM, Arg, 0, E->LI); + break; + + case REG_SREG_LO: + Arg = MakeHexArg (In->SRegLo); + X = NewCodeEntry (OP65_LDX, AM65_IMM, Arg, 0, E->LI); + break; + + case REG_SREG_HI: + Arg = MakeHexArg (In->SRegHi); + X = NewCodeEntry (OP65_LDX, AM65_IMM, Arg, 0, E->LI); + break; + } + } + break; + + case OP65_LDY: + if (E->AM == AM65_ZP) { + switch (GetKnownReg (E->Use, In)) { + case REG_TMP1: + Arg = MakeHexArg (In->Tmp1); + X = NewCodeEntry (OP65_LDY, AM65_IMM, Arg, 0, E->LI); + break; + + case REG_PTR1_LO: + Arg = MakeHexArg (In->Ptr1Lo); + X = NewCodeEntry (OP65_LDY, AM65_IMM, Arg, 0, E->LI); + break; + + case REG_PTR1_HI: + Arg = MakeHexArg (In->Ptr1Hi); + X = NewCodeEntry (OP65_LDY, AM65_IMM, Arg, 0, E->LI); + break; + + case REG_SREG_LO: + Arg = MakeHexArg (In->SRegLo); + X = NewCodeEntry (OP65_LDY, AM65_IMM, Arg, 0, E->LI); + break; + + case REG_SREG_HI: + Arg = MakeHexArg (In->SRegHi); + X = NewCodeEntry (OP65_LDY, AM65_IMM, Arg, 0, E->LI); + break; + } + } + break; + + case OP65_TAX: + if (E->RI->In.RegA >= 0) { + Arg = MakeHexArg (In->RegA); + X = NewCodeEntry (OP65_LDX, AM65_IMM, Arg, 0, E->LI); + } + break; + + case OP65_TAY: + if (E->RI->In.RegA >= 0) { + Arg = MakeHexArg (In->RegA); + X = NewCodeEntry (OP65_LDY, AM65_IMM, Arg, 0, E->LI); + } + break; + + case OP65_TXA: + if (E->RI->In.RegX >= 0) { + Arg = MakeHexArg (In->RegX); + X = NewCodeEntry (OP65_LDA, AM65_IMM, Arg, 0, E->LI); + } + break; + + case OP65_TYA: + if (E->RI->In.RegY >= 0) { + Arg = MakeHexArg (In->RegY); + X = NewCodeEntry (OP65_LDA, AM65_IMM, Arg, 0, E->LI); + } + break; + + default: + /* Avoid gcc warnings */ + break; + + } + + /* Insert the replacement if we have one */ + if (X) { + CS_InsertEntry (S, X, I+1); + CS_DelEntry (S, I); + ++Changes; + } + + /* Next entry */ + ++I; + + } + + /* Free register info */ + CS_FreeRegInfo (S); + + /* Return the number of changes made */ + return Changes; +} + + + +/*****************************************************************************/ +/* struct OptFunc */ +/*****************************************************************************/ + + -/* Table with all the optimization functions */ typedef struct OptFunc OptFunc; struct OptFunc { - unsigned (*Func) (CodeSeg*); /* Optimizer function */ - const char* Name; /* Name of optimizer step */ - unsigned char Type; /* Type of this step */ - char Disabled; /* True if pass disabled */ + unsigned (*Func) (CodeSeg*); /* Optimizer function */ + const char* Name; /* Name of the function/group */ + unsigned CodeSizeFactor; /* Code size factor for this opt func */ + unsigned long TotalRuns; /* Total number of runs */ + unsigned long LastRuns; /* Last number of runs */ + unsigned long TotalChanges; /* Total number of changes */ + unsigned long LastChanges; /* Last number of changes */ + char Disabled; /* True if function disabled */ }; -/* Macro that builds a table entry */ -#define OptEntry(func,type) { func, #func, type, 0 } - -/* Table with optimizer steps */ -static OptFunc OptFuncs [] = { - /* Optimizes stores through pointers */ - OptEntry (OptPtrStore1, optPre), - OptEntry (OptPtrStore2, optPre), - /* Optimize loads through pointers */ - OptEntry (OptPtrLoad1, optPre), - OptEntry (OptPtrLoad2, optPre), - OptEntry (OptPtrLoad3, optPre), - OptEntry (OptPtrLoad4, optMain), - /* Optimize calls to nega */ - OptEntry (OptNegA1, optMain), - OptEntry (OptNegA2, optMain), - /* Optimize calls to negax */ - OptEntry (OptNegAX1, optPre), - OptEntry (OptNegAX2, optPre), - OptEntry (OptNegAX3, optPre), - OptEntry (OptNegAX4, optPre), - /* Optimize subtractions */ - OptEntry (OptSub1, optMain), - OptEntry (OptSub2, optMain), - /* Optimize additions */ - OptEntry (OptAdd1, optPre), - OptEntry (OptAdd2, optPre), - OptEntry (OptAdd3, optMain), - /* Optimize shifts */ - OptEntry (OptShift1, optPre), - /* Optimize jump cascades */ - OptEntry (OptJumpCascades, optMain), - /* Remove dead jumps */ - OptEntry (OptDeadJumps, optMain), - /* Change jsr/rts to jmp */ - OptEntry (OptRTS, optMain), - /* Remove dead code */ - OptEntry (OptDeadCode, optMain), - /* Optimize jump targets */ - OptEntry (OptJumpTarget, optMain), - /* Optimize conditional branches */ - OptEntry (OptCondBranches, optMain), - /* Replace jumps to RTS by RTS */ - OptEntry (OptRTSJumps, optMain), - /* Remove calls to the bool transformer subroutines */ - OptEntry (OptBoolTransforms, optMain), - /* Optimize compares */ - OptEntry (OptCmp1, optMain), - OptEntry (OptCmp2, optMain), - OptEntry (OptCmp3, optMain), - OptEntry (OptCmp4, optMain), - OptEntry (OptCmp5, optMain), - OptEntry (OptCmp6, optMain), - /* Optimize tests */ - OptEntry (OptTest1, optMain), - /* Remove unused loads */ - OptEntry (OptUnusedLoads, optMain), - OptEntry (OptDuplicateLoads, optMain), - OptEntry (OptStoreLoad, optMain), - OptEntry (OptTransfers, optMain), - /* Optimize branch distance */ - OptEntry (OptBranchDist, optPost), + + +/*****************************************************************************/ +/* Code */ +/*****************************************************************************/ + + + +/* A list of all the function descriptions */ +static OptFunc DOpt65C02BitOps = { Opt65C02BitOps, "Opt65C02BitOps", 66, 0, 0, 0, 0, 0 }; +static OptFunc DOpt65C02Ind = { Opt65C02Ind, "Opt65C02Ind", 100, 0, 0, 0, 0, 0 }; +static OptFunc DOpt65C02Stores = { Opt65C02Stores, "Opt65C02Stores", 100, 0, 0, 0, 0, 0 }; +static OptFunc DOptAdd1 = { OptAdd1, "OptAdd1", 125, 0, 0, 0, 0, 0 }; +static OptFunc DOptAdd2 = { OptAdd2, "OptAdd2", 200, 0, 0, 0, 0, 0 }; +static OptFunc DOptAdd3 = { OptAdd3, "OptAdd3", 90, 0, 0, 0, 0, 0 }; +static OptFunc DOptAdd4 = { OptAdd4, "OptAdd4", 100, 0, 0, 0, 0, 0 }; +static OptFunc DOptAdd5 = { OptAdd5, "OptAdd5", 40, 0, 0, 0, 0, 0 }; +static OptFunc DOptBoolTrans = { OptBoolTrans, "OptBoolTrans", 100, 0, 0, 0, 0, 0 }; +static OptFunc DOptBranchDist = { OptBranchDist, "OptBranchDist", 0, 0, 0, 0, 0, 0 }; +static OptFunc DOptCmp1 = { OptCmp1, "OptCmp1", 42, 0, 0, 0, 0, 0 }; +static OptFunc DOptCmp2 = { OptCmp2, "OptCmp2", 85, 0, 0, 0, 0, 0 }; +static OptFunc DOptCmp3 = { OptCmp3, "OptCmp3", 75, 0, 0, 0, 0, 0 }; +static OptFunc DOptCmp4 = { OptCmp4, "OptCmp4", 75, 0, 0, 0, 0, 0 }; +static OptFunc DOptCmp5 = { OptCmp5, "OptCmp5", 100, 0, 0, 0, 0, 0 }; +static OptFunc DOptCmp6 = { OptCmp6, "OptCmp6", 100, 0, 0, 0, 0, 0 }; +static OptFunc DOptCmp7 = { OptCmp7, "OptCmp7", 85, 0, 0, 0, 0, 0 }; +static OptFunc DOptCmp8 = { OptCmp8, "OptCmp8", 50, 0, 0, 0, 0, 0 }; +static OptFunc DOptCondBranches = { OptCondBranches, "OptCondBranches", 80, 0, 0, 0, 0, 0 }; +static OptFunc DOptDeadCode = { OptDeadCode, "OptDeadCode", 100, 0, 0, 0, 0, 0 }; +static OptFunc DOptDeadJumps = { OptDeadJumps, "OptDeadJumps", 100, 0, 0, 0, 0, 0 }; +static OptFunc DOptDecouple = { OptDecouple, "OptDecouple", 100, 0, 0, 0, 0, 0 }; +static OptFunc DOptDupLoads = { OptDupLoads, "OptDupLoads", 0, 0, 0, 0, 0, 0 }; +static OptFunc DOptJumpCascades = { OptJumpCascades, "OptJumpCascades", 100, 0, 0, 0, 0, 0 }; +static OptFunc DOptJumpTarget = { OptJumpTarget, "OptJumpTarget", 100, 0, 0, 0, 0, 0 }; +static OptFunc DOptLoad1 = { OptLoad1, "OptLoad1", 100, 0, 0, 0, 0, 0 }; +static OptFunc DOptRTS = { OptRTS, "OptRTS", 100, 0, 0, 0, 0, 0 }; +static OptFunc DOptRTSJumps1 = { OptRTSJumps1, "OptRTSJumps1", 100, 0, 0, 0, 0, 0 }; +static OptFunc DOptRTSJumps2 = { OptRTSJumps2, "OptRTSJumps2", 100, 0, 0, 0, 0, 0 }; +static OptFunc DOptNegA1 = { OptNegA1, "OptNegA1", 100, 0, 0, 0, 0, 0 }; +static OptFunc DOptNegA2 = { OptNegA2, "OptNegA2", 100, 0, 0, 0, 0, 0 }; +static OptFunc DOptNegAX1 = { OptNegAX1, "OptNegAX1", 100, 0, 0, 0, 0, 0 }; +static OptFunc DOptNegAX2 = { OptNegAX2, "OptNegAX2", 100, 0, 0, 0, 0, 0 }; +static OptFunc DOptNegAX3 = { OptNegAX3, "OptNegAX3", 100, 0, 0, 0, 0, 0 }; +static OptFunc DOptNegAX4 = { OptNegAX4, "OptNegAX4", 100, 0, 0, 0, 0, 0 }; +static OptFunc DOptPrecalc = { OptPrecalc, "OptPrecalc", 100, 0, 0, 0, 0, 0 }; +static OptFunc DOptPtrLoad1 = { OptPtrLoad1, "OptPtrLoad1", 100, 0, 0, 0, 0, 0 }; +static OptFunc DOptPtrLoad2 = { OptPtrLoad2, "OptPtrLoad2", 100, 0, 0, 0, 0, 0 }; +static OptFunc DOptPtrLoad3 = { OptPtrLoad3, "OptPtrLoad3", 100, 0, 0, 0, 0, 0 }; +static OptFunc DOptPtrLoad4 = { OptPtrLoad4, "OptPtrLoad4", 100, 0, 0, 0, 0, 0 }; +static OptFunc DOptPtrLoad5 = { OptPtrLoad5, "OptPtrLoad5", 50, 0, 0, 0, 0, 0 }; +static OptFunc DOptPtrLoad6 = { OptPtrLoad6, "OptPtrLoad6", 65, 0, 0, 0, 0, 0 }; +static OptFunc DOptPtrLoad7 = { OptPtrLoad7, "OptPtrLoad7", 86, 0, 0, 0, 0, 0 }; +static OptFunc DOptPtrLoad8 = { OptPtrLoad8, "OptPtrLoad8", 100, 0, 0, 0, 0, 0 }; +static OptFunc DOptPtrStore1 = { OptPtrStore1, "OptPtrStore1", 100, 0, 0, 0, 0, 0 }; +static OptFunc DOptPtrStore2 = { OptPtrStore2, "OptPtrStore2", 40, 0, 0, 0, 0, 0 }; +static OptFunc DOptPush1 = { OptPush1, "OptPush1", 65, 0, 0, 0, 0, 0 }; +static OptFunc DOptPush2 = { OptPush2, "OptPush2", 50, 0, 0, 0, 0, 0 }; +static OptFunc DOptPushPop = { OptPushPop, "OptPushPop", 0, 0, 0, 0, 0, 0 }; +static OptFunc DOptShift1 = { OptShift1, "OptShift1", 100, 0, 0, 0, 0, 0 }; +static OptFunc DOptShift2 = { OptShift2, "OptShift2", 100, 0, 0, 0, 0, 0 }; +static OptFunc DOptShift3 = { OptShift3, "OptShift3", 110, 0, 0, 0, 0, 0 }; +static OptFunc DOptSize1 = { OptSize1, "OptSize1", 100, 0, 0, 0, 0, 0 }; +static OptFunc DOptSize2 = { OptSize2, "OptSize2", 100, 0, 0, 0, 0, 0 }; +static OptFunc DOptStackOps = { OptStackOps, "OptStackOps", 100, 0, 0, 0, 0, 0 }; +static OptFunc DOptStore1 = { OptStore1, "OptStore1", 70, 0, 0, 0, 0, 0 }; +static OptFunc DOptStore2 = { OptStore2, "OptStore2", 220, 0, 0, 0, 0, 0 }; +static OptFunc DOptStore3 = { OptStore3, "OptStore3", 120, 0, 0, 0, 0, 0 }; +static OptFunc DOptStore4 = { OptStore4, "OptStore4", 50, 0, 0, 0, 0, 0 }; +static OptFunc DOptStoreLoad = { OptStoreLoad, "OptStoreLoad", 0, 0, 0, 0, 0, 0 }; +static OptFunc DOptSub1 = { OptSub1, "OptSub1", 100, 0, 0, 0, 0, 0 }; +static OptFunc DOptSub2 = { OptSub2, "OptSub2", 100, 0, 0, 0, 0, 0 }; +static OptFunc DOptTest1 = { OptTest1, "OptTest1", 100, 0, 0, 0, 0, 0 }; +static OptFunc DOptTransfers1 = { OptTransfers1, "OptTransfers1", 0, 0, 0, 0, 0, 0 }; +static OptFunc DOptTransfers2 = { OptTransfers2, "OptTransfers2", 60, 0, 0, 0, 0, 0 }; +static OptFunc DOptUnusedLoads = { OptUnusedLoads, "OptUnusedLoads", 0, 0, 0, 0, 0, 0 }; +static OptFunc DOptUnusedStores = { OptUnusedStores, "OptUnusedStores", 0, 0, 0, 0, 0, 0 }; + + +/* Table containing all the steps in alphabetical order */ +static OptFunc* OptFuncs[] = { + &DOpt65C02BitOps, + &DOpt65C02Ind, + &DOpt65C02Stores, + &DOptAdd1, + &DOptAdd2, + &DOptAdd3, + &DOptAdd4, + &DOptAdd5, + &DOptBoolTrans, + &DOptBranchDist, + &DOptCmp1, + &DOptCmp2, + &DOptCmp3, + &DOptCmp4, + &DOptCmp5, + &DOptCmp6, + &DOptCmp7, + &DOptCmp8, + &DOptCondBranches, + &DOptDeadCode, + &DOptDeadJumps, + &DOptDecouple, + &DOptDupLoads, + &DOptJumpCascades, + &DOptJumpTarget, + &DOptLoad1, + &DOptNegA1, + &DOptNegA2, + &DOptNegAX1, + &DOptNegAX2, + &DOptNegAX3, + &DOptNegAX4, + &DOptPrecalc, + &DOptPtrLoad1, + &DOptPtrLoad2, + &DOptPtrLoad3, + &DOptPtrLoad4, + &DOptPtrLoad5, + &DOptPtrLoad6, + &DOptPtrLoad7, + &DOptPtrLoad8, + &DOptPtrStore1, + &DOptPtrStore2, + &DOptPush1, + &DOptPush2, + &DOptPushPop, + &DOptRTS, + &DOptRTSJumps1, + &DOptRTSJumps2, + &DOptShift1, + &DOptShift2, + &DOptShift3, + &DOptSize1, + &DOptSize2, + &DOptStackOps, + &DOptStore1, + &DOptStore2, + &DOptStore3, + &DOptStore4, + &DOptStoreLoad, + &DOptSub1, + &DOptSub2, + &DOptTest1, + &DOptTransfers1, + &DOptTransfers2, + &DOptUnusedLoads, + &DOptUnusedStores, }; +#define OPTFUNC_COUNT (sizeof(OptFuncs) / sizeof(OptFuncs[0])) + + + +static int CmpOptStep (const void* Key, const void* Func) +/* Compare function for bsearch */ +{ + return strcmp (Key, (*(const OptFunc**)Func)->Name); +} + + + +static OptFunc* FindOptFunc (const char* Name) +/* Find an optimizer step by name in the table and return a pointer. Return + * NULL if no such step is found. + */ +{ + /* Search for the function in the list */ + OptFunc** O = bsearch (Name, OptFuncs, OPTFUNC_COUNT, sizeof (OptFuncs[0]), CmpOptStep); + return O? *O : 0; +} -static OptFunc* FindOptStep (const char* Name) +static OptFunc* GetOptFunc (const char* Name) /* Find an optimizer step by name in the table and return a pointer. Print an * error and call AbEnd if not found. */ { - unsigned I; - - /* Run all optimization steps */ - for (I = 0; I < sizeof(OptFuncs)/sizeof(OptFuncs[0]); ++I) { - if (strcmp (OptFuncs[I].Name, Name) == 0) { - /* Found */ - return OptFuncs+I; - } + /* Search for the function in the list */ + OptFunc* F = FindOptFunc (Name); + if (F == 0) { + /* Not found */ + AbEnd ("Optimization step `%s' not found", Name); } - - /* Not found */ - AbEnd ("Optimization step `%s' not found", Name); - return 0; + return F; } @@ -2406,12 +1905,11 @@ void DisableOpt (const char* Name) { if (strcmp (Name, "any") == 0) { unsigned I; - for (I = 0; I < sizeof(OptFuncs)/sizeof(OptFuncs[0]); ++I) { - OptFuncs[I].Disabled = 1; + for (I = 0; I < OPTFUNC_COUNT; ++I) { + OptFuncs[I]->Disabled = 1; } } else { - OptFunc* F = FindOptStep (Name); - F->Disabled = 1; + GetOptFunc(Name)->Disabled = 1; } } @@ -2422,12 +1920,11 @@ void EnableOpt (const char* Name) { if (strcmp (Name, "any") == 0) { unsigned I; - for (I = 0; I < sizeof(OptFuncs)/sizeof(OptFuncs[0]); ++I) { - OptFuncs[I].Disabled = 0; + for (I = 0; I < OPTFUNC_COUNT; ++I) { + OptFuncs[I]->Disabled = 0; } } else { - OptFunc* F = FindOptStep (Name); - F->Disabled = 0; + GetOptFunc(Name)->Disabled = 0; } } @@ -2437,54 +1934,355 @@ void ListOptSteps (FILE* F) /* List all optimization steps */ { unsigned I; - for (I = 0; I < sizeof(OptFuncs)/sizeof(OptFuncs[0]); ++I) { - fprintf (F, "%s\n", OptFuncs[I].Name); + for (I = 0; I < OPTFUNC_COUNT; ++I) { + fprintf (F, "%s\n", OptFuncs[I]->Name); + } +} + + + +static void ReadOptStats (const char* Name) +/* Read the optimizer statistics file */ +{ + char Buf [256]; + unsigned Lines; + + /* Try to open the file */ + FILE* F = fopen (Name, "r"); + if (F == 0) { + /* Ignore the error */ + return; + } + + /* Read and parse the lines */ + Lines = 0; + while (fgets (Buf, sizeof (Buf), F) != 0) { + + char* B; + unsigned Len; + OptFunc* Func; + + /* Fields */ + char Name[32]; + unsigned long TotalRuns; + unsigned long TotalChanges; + + /* Count lines */ + ++Lines; + + /* Remove trailing white space including the line terminator */ + B = Buf; + Len = strlen (B); + while (Len > 0 && IsSpace (B[Len-1])) { + --Len; + } + B[Len] = '\0'; + + /* Remove leading whitespace */ + while (IsSpace (*B)) { + ++B; + } + + /* Check for empty and comment lines */ + if (*B == '\0' || *B == ';' || *B == '#') { + continue; + } + + /* Parse the line */ + if (sscanf (B, "%31s %lu %*u %lu %*u", Name, &TotalRuns, &TotalChanges) != 3) { + /* Syntax error */ + continue; + } + + /* Search for the optimizer step. */ + Func = FindOptFunc (Name); + if (Func == 0) { + /* Not found */ + continue; + } + + /* Found the step, set the fields */ + Func->TotalRuns = TotalRuns; + Func->TotalChanges = TotalChanges; + } + + /* Close the file, ignore errors here. */ + fclose (F); } -static void RepeatOptStep (CodeSeg* S, unsigned char Type, unsigned Max) -/* Repeat the optimizer step of type Type at may Max times */ +static void WriteOptStats (const char* Name) +/* Write the optimizer statistics file */ { unsigned I; - unsigned Pass = 0; - unsigned OptChanges; - /* Repeat max times of until there are no more changes */ + /* Try to open the file */ + FILE* F = fopen (Name, "w"); + if (F == 0) { + /* Ignore the error */ + return; + } + + /* Write a header */ + fprintf (F, + "; Optimizer Total Last Total Last\n" + "; Step Runs Runs Chg Chg\n"); + + + /* Write the data */ + for (I = 0; I < OPTFUNC_COUNT; ++I) { + const OptFunc* O = OptFuncs[I]; + fprintf (F, + "%-20s %10lu %10lu %10lu %10lu\n", + O->Name, + O->TotalRuns, + O->LastRuns, + O->TotalChanges, + O->LastChanges); + } + + /* Close the file, ignore errors here. */ + fclose (F); +} + + + +static unsigned RunOptFunc (CodeSeg* S, OptFunc* F, unsigned Max) +/* Run one optimizer function Max times or until there are no more changes */ +{ + unsigned Changes, C; + + /* Don't run the function if it is disabled or if it is prohibited by the + * code size factor + */ + if (F->Disabled || F->CodeSizeFactor > S->CodeSizeFactor) { + return 0; + } + + /* Run this until there are no more changes */ + Changes = 0; do { - /* Reset the number of changes */ - OptChanges = 0; - /* Keep the user hapy */ - Print (stdout, 1, " Optimizer pass %u:\n", ++Pass); + /* Run the function */ + C = F->Func (S); + Changes += C; - /* Run all optimization steps */ - for (I = 0; I < sizeof(OptFuncs)/sizeof(OptFuncs[0]); ++I) { + /* Do statistics */ + ++F->TotalRuns; + ++F->LastRuns; + F->TotalChanges += C; + F->LastChanges += C; + + } while (--Max && C > 0); + + /* Return the number of changes */ + return Changes; +} + + + +static unsigned RunOptGroup1 (CodeSeg* S) +/* Run the first group of optimization steps. These steps translate known + * patterns emitted by the code generator into more optimal patterns. Order + * of the steps is important, because some of the steps done earlier cover + * the same patterns as later steps as subpatterns. + */ +{ + unsigned Changes = 0; + + Changes += RunOptFunc (S, &DOptPtrStore1, 1); + Changes += RunOptFunc (S, &DOptPtrStore2, 1); + Changes += RunOptFunc (S, &DOptPtrLoad1, 1); + Changes += RunOptFunc (S, &DOptPtrLoad2, 1); + Changes += RunOptFunc (S, &DOptPtrLoad3, 1); + Changes += RunOptFunc (S, &DOptPtrLoad4, 1); + Changes += RunOptFunc (S, &DOptPtrLoad5, 1); + Changes += RunOptFunc (S, &DOptPtrLoad6, 1); + Changes += RunOptFunc (S, &DOptPtrLoad7, 1); + Changes += RunOptFunc (S, &DOptNegAX1, 1); + Changes += RunOptFunc (S, &DOptNegAX2, 1); + Changes += RunOptFunc (S, &DOptNegAX3, 1); + Changes += RunOptFunc (S, &DOptNegAX4, 1); + Changes += RunOptFunc (S, &DOptAdd1, 1); + Changes += RunOptFunc (S, &DOptAdd2, 1); + Changes += RunOptFunc (S, &DOptAdd3, 1); + Changes += RunOptFunc (S, &DOptStore4, 1); + Changes += RunOptFunc (S, &DOptShift1, 1); + Changes += RunOptFunc (S, &DOptShift2, 1); + Changes += RunOptFunc (S, &DOptShift3, 1); + Changes += RunOptFunc (S, &DOptStore1, 1); + Changes += RunOptFunc (S, &DOptStore2, 5); + Changes += RunOptFunc (S, &DOptStore3, 5); + + /* Return the number of changes */ + return Changes; +} + + + +static unsigned RunOptGroup2 (CodeSeg* S) +/* Run one group of optimization steps. This step involves just decoupling + * instructions by replacing them by instructions that do not depend on + * previous instructions. This makes it easier to find instructions that + * aren't used. + */ +{ + unsigned Changes = 0; + + Changes += RunOptFunc (S, &DOptDecouple, 1); + + /* Return the number of changes */ + return Changes; +} + + + +static unsigned RunOptGroup3 (CodeSeg* S) +/* Run one group of optimization steps. These steps depend on each other, + * that means that one step may allow another step to do additional work, + * so we will repeat the steps as long as we see any changes. + */ +{ + unsigned Changes, C; + + Changes = 0; + do { + C = 0; + + C += RunOptFunc (S, &DOptPtrLoad8, 1); + C += RunOptFunc (S, &DOptNegA1, 1); + C += RunOptFunc (S, &DOptNegA2, 1); + C += RunOptFunc (S, &DOptSub1, 1); + C += RunOptFunc (S, &DOptSub2, 1); + C += RunOptFunc (S, &DOptAdd4, 1); + C += RunOptFunc (S, &DOptAdd5, 1); + C += RunOptFunc (S, &DOptStackOps, 1); + C += RunOptFunc (S, &DOptJumpCascades, 1); + C += RunOptFunc (S, &DOptDeadJumps, 1); + C += RunOptFunc (S, &DOptRTS, 1); + C += RunOptFunc (S, &DOptDeadCode, 1); + C += RunOptFunc (S, &DOptJumpTarget, 1); + C += RunOptFunc (S, &DOptCondBranches, 1); + C += RunOptFunc (S, &DOptRTSJumps1, 1); + C += RunOptFunc (S, &DOptBoolTrans, 1); + C += RunOptFunc (S, &DOptCmp1, 1); + C += RunOptFunc (S, &DOptCmp2, 1); + C += RunOptFunc (S, &DOptCmp3, 1); + C += RunOptFunc (S, &DOptCmp4, 1); + C += RunOptFunc (S, &DOptCmp5, 1); + C += RunOptFunc (S, &DOptCmp6, 1); + C += RunOptFunc (S, &DOptCmp7, 1); + C += RunOptFunc (S, &DOptCmp8, 1); + C += RunOptFunc (S, &DOptTest1, 1); + C += RunOptFunc (S, &DOptLoad1, 1); + C += RunOptFunc (S, &DOptUnusedLoads, 1); + C += RunOptFunc (S, &DOptUnusedStores, 1); + C += RunOptFunc (S, &DOptDupLoads, 1); + C += RunOptFunc (S, &DOptStoreLoad, 1); + C += RunOptFunc (S, &DOptTransfers1, 1); + C += RunOptFunc (S, &DOptPushPop, 1); + C += RunOptFunc (S, &DOptPrecalc, 1); + + Changes += C; + + } while (C); + + /* Return the number of changes */ + return Changes; +} - /* Get the table entry */ - const OptFunc* F = OptFuncs + I; - /* Check if the type matches */ - if (F->Type != Type) { - /* Skip it */ - continue; - } - /* Print the name of the following optimizer step */ - Print (stdout, 1, " %s:%*s", F->Name, (int) (30-strlen(F->Name)), ""); - - /* Check if the step is disabled */ - if (F->Disabled) { - Print (stdout, 1, "Disabled\n"); - } else { - unsigned Changes = F->Func (S); - OptChanges += Changes; - Print (stdout, 1, "%u Changes\n", Changes); - } +static unsigned RunOptGroup4 (CodeSeg* S) +/* 65C02 specific optimizations. */ +{ + unsigned Changes = 0; + + if (CPUIsets[CPU] & CPU_ISET_65SC02) { + Changes += RunOptFunc (S, &DOpt65C02BitOps, 1); + Changes += RunOptFunc (S, &DOpt65C02Ind, 1); + Changes += RunOptFunc (S, &DOpt65C02Stores, 1); + if (Changes) { + /* The 65C02 replacement codes do often make the use of a register + * value unnecessary, so if we have changes, run another load + * removal pass. + */ + Changes += RunOptFunc (S, &DOptUnusedLoads, 1); } + } + + /* Return the number of changes */ + return Changes; +} + + + +static unsigned RunOptGroup5 (CodeSeg* S) +/* Run another round of pattern replacements. These are done late, since there + * may be better replacements before. + */ +{ + unsigned Changes = 0; + + Changes += RunOptFunc (S, &DOptPush1, 1); + Changes += RunOptFunc (S, &DOptPush2, 1); + Changes += RunOptFunc (S, &DOptUnusedLoads, 1); + Changes += RunOptFunc (S, &DOptTransfers2, 1); + + /* Return the number of changes */ + return Changes; +} + + + +static unsigned RunOptGroup6 (CodeSeg* S) +/* The last group of optimization steps. Adjust branches, do size optimizations. + */ +{ + unsigned Changes = 0; + unsigned C; + + if (S->CodeSizeFactor <= 100) { + /* Optimize for size, that is replace operations by shorter ones, even + * if this does hinder further optimizations (no problem since we're + * done soon). + */ + C = RunOptFunc (S, &DOptSize1, 1); + if (C) { + Changes += C; + /* Run some optimization passes again, since the size optimizations + * may have opened new oportunities. + */ + Changes += RunOptFunc (S, &DOptUnusedLoads, 1); + Changes += RunOptFunc (S, &DOptJumpTarget, 5); + } + } + C = RunOptFunc (S, &DOptSize2, 1); + if (C) { + Changes += C; + /* Run some optimization passes again, since the size optimizations + * may have opened new oportunities. + */ + Changes += RunOptFunc (S, &DOptUnusedLoads, 1); + Changes += RunOptFunc (S, &DOptJumpTarget, 5); + } + + /* Adjust branch distances */ + Changes += RunOptFunc (S, &DOptBranchDist, 3); + + /* Replace conditional branches to RTS. If we had changes, we must run dead + * code elimination again, since the change may have introduced dead code. + */ + C = RunOptFunc (S, &DOptRTSJumps2, 1); + Changes += C; + if (C) { + Changes += RunOptFunc (S, &DOptDeadCode, 1); + } - } while (--Max > 0 && OptChanges > 0); + /* Return the number of changes */ + return Changes; } @@ -2492,10 +2290,17 @@ static void RepeatOptStep (CodeSeg* S, unsigned char Type, unsigned Max) void RunOpt (CodeSeg* S) /* Run the optimizer */ { + const char* StatFileName; /* If we shouldn't run the optimizer, bail out */ - if (!Optimize) { - return; + if (!S->Optimize) { + return; + } + + /* Check if we are requested to write optimizer statistics */ + StatFileName = getenv ("CC65_OPTSTATS"); + if (StatFileName) { + ReadOptStats (StatFileName); } /* Print the name of the function we are working on */ @@ -2505,12 +2310,18 @@ void RunOpt (CodeSeg* S) Print (stdout, 1, "Running optimizer for global code segment\n"); } - /* Repeat all steps until there are no more changes */ - RepeatOptStep (S, optPre, 1); - RepeatOptStep (S, optPreMain, 0xFFFF); - RepeatOptStep (S, optMain, 0xFFFF); - RepeatOptStep (S, optPostMain, 0xFFFF); - RepeatOptStep (S, optPost, 1); + /* Run groups of optimizations */ + RunOptGroup1 (S); + RunOptGroup2 (S); + RunOptGroup3 (S); + RunOptGroup4 (S); + RunOptGroup5 (S); + RunOptGroup6 (S); + + /* Write statistics */ + if (StatFileName) { + WriteOptStats (StatFileName); + } }