X-Git-Url: https://git.sur5r.net/?a=blobdiff_plain;f=src%2Fcc65%2Fcodeopt.c;h=30ffd605428e25b36ad5fd080d8f01566cfd3ac4;hb=112ae0e3db511ddd92e769c11328646ebe2a6240;hp=abfd6ac1ba788ec36782f693e38964d51e4383e2;hpb=61195d914ee453620d29d1bc5541d2e9f89e15e4;p=cc65 diff --git a/src/cc65/codeopt.c b/src/cc65/codeopt.c index abfd6ac1b..30ffd6054 100644 --- a/src/cc65/codeopt.c +++ b/src/cc65/codeopt.c @@ -6,10 +6,10 @@ /* */ /* */ /* */ -/* (C) 2001-2009 Ullrich von Bassewitz */ -/* Roemerstrasse 52 */ -/* D-70794 Filderstadt */ -/* EMail: uz@cc65.org */ +/* (C) 2001-2010, Ullrich von Bassewitz */ +/* Roemerstrasse 52 */ +/* D-70794 Filderstadt */ +/* EMail: uz@cc65.org */ /* */ /* */ /* This software is provided 'as-is', without any expressed or implied */ @@ -42,6 +42,7 @@ #include "cpu.h" #include "print.h" #include "xmalloc.h" +#include "xsprintf.h" /* cc65 */ #include "asmlabel.h" @@ -90,15 +91,30 @@ enum { static unsigned OptShift1 (CodeSeg* S) /* A call to the shlaxN routine may get replaced by one or more asl insns - * if the value of X is not used later. + * if the value of X is not used later. If X is used later, but it is zero + * on entry and it's a shift by one, it may get replaced by: + * + * asl a + * bcc L1 + * inx + * L1: + * */ { unsigned Changes = 0; + unsigned I; + + /* Generate register info */ + CS_GenRegInfo (S); /* Walk over the entries */ - unsigned I = 0; + I = 0; while (I < CS_GetEntryCount (S)) { + CodeEntry* N; + CodeEntry* X; + CodeLabel* L; + /* Get next entry */ CodeEntry* E = CS_GetEntry (S, I); @@ -107,21 +123,46 @@ static unsigned OptShift1 (CodeSeg* S) (strncmp (E->Arg, "shlax", 5) == 0 || strncmp (E->Arg, "aslax", 5) == 0) && strlen (E->Arg) == 6 && - IsDigit (E->Arg[5]) && - !RegXUsed (S, I+1)) { + IsDigit (E->Arg[5])) { - /* Insert shift insns */ - unsigned Count = E->Arg[5] - '0'; - while (Count--) { - CodeEntry* X = NewCodeEntry (OP65_ASL, AM65_ACC, "a", 0, E->LI); - CS_InsertEntry (S, X, I+1); - } + if (!RegXUsed (S, I+1)) { - /* Delete the call to shlax */ - CS_DelEntry (S, I); + /* Insert shift insns */ + unsigned Count = E->Arg[5] - '0'; + while (Count--) { + X = NewCodeEntry (OP65_ASL, AM65_ACC, "a", 0, E->LI); + CS_InsertEntry (S, X, I+1); + } - /* Remember, we had changes */ - ++Changes; + /* Delete the call to shlax */ + CS_DelEntry (S, I); + + /* Remember, we had changes */ + ++Changes; + + } else if (E->RI->In.RegX == 0 && + E->Arg[5] == '1' && + (N = CS_GetNextEntry (S, I)) != 0) { + + /* asl a */ + X = NewCodeEntry (OP65_ASL, AM65_ACC, "a", 0, E->LI); + CS_InsertEntry (S, X, I+1); + + /* bcc L1 */ + L = CS_GenLabel (S, N); + X = NewCodeEntry (OP65_BCC, AM65_BRA, L->Name, L, E->LI); + CS_InsertEntry (S, X, I+2); + + /* inx */ + X = NewCodeEntry (OP65_INX, AM65_IMP, 0, 0, E->LI); + CS_InsertEntry (S, X, I+3); + + /* Delete the call to shlax */ + CS_DelEntry (S, I); + + /* Remember, we had changes */ + ++Changes; + } } @@ -130,6 +171,9 @@ static unsigned OptShift1 (CodeSeg* S) } + /* Free the register info */ + CS_FreeRegInfo (S); + /* Return the number of changes made */ return Changes; } @@ -168,7 +212,7 @@ static unsigned OptShift2 (CodeSeg* S) CS_InsertEntry (S, X, I+1); } - /* Delete the call to shlax */ + /* Delete the call to shrax */ CS_DelEntry (S, I); /* Remember, we had changes */ @@ -332,6 +376,93 @@ static unsigned OptShift3 (CodeSeg* S) +static unsigned OptShift4 (CodeSeg* S) +/* Inline the shift subroutines. */ +{ + unsigned Changes = 0; + + /* Walk over the entries */ + unsigned I = 0; + while (I < CS_GetEntryCount (S)) { + + CodeEntry* X; + unsigned IP; + + /* Get next entry */ + CodeEntry* E = CS_GetEntry (S, I); + + /* Check for a call to one of the shift routine */ + if (E->OPC == OP65_JSR && + (strncmp (E->Arg, "shlax", 5) == 0 || + strncmp (E->Arg, "aslax", 5) == 0) && + strlen (E->Arg) == 6 && + IsDigit (E->Arg[5])) { + + /* Get number of shifts */ + unsigned ShiftCount = (E->Arg[5] - '0'); + + /* Code is: + * + * stx tmp1 + * asl a + * rol tmp1 + * (repeat ShiftCount-1 times) + * ldx tmp1 + * + * which makes 4 + 3 * ShiftCount bytes, compared to the original + * 3 bytes for the subroutine call. However, in most cases, the + * final load of the X register gets merged with some other insn + * and replaces a txa, so for a shift count of 1, we get a factor + * of 200, which matches nicely the CodeSizeFactor enabled with -Oi + */ + if (ShiftCount > 1 || S->CodeSizeFactor > 200) { + unsigned Size = 4 + 3 * ShiftCount; + if ((Size * 100 / 3) > S->CodeSizeFactor) { + /* Not acceptable */ + goto NextEntry; + } + } + + /* Inline the code. Insertion point is behind the subroutine call */ + IP = (I + 1); + + /* stx tmp1 */ + X = NewCodeEntry (OP65_STX, AM65_ZP, "tmp1", 0, E->LI); + CS_InsertEntry (S, X, IP++); + + while (ShiftCount--) { + /* asl a */ + X = NewCodeEntry (OP65_ASL, AM65_ACC, "a", 0, E->LI); + CS_InsertEntry (S, X, IP++); + + /* rol tmp1 */ + X = NewCodeEntry (OP65_ROL, AM65_ZP, "tmp1", 0, E->LI); + CS_InsertEntry (S, X, IP++); + } + + /* ldx tmp1 */ + X = NewCodeEntry (OP65_LDX, AM65_ZP, "tmp1", 0, E->LI); + CS_InsertEntry (S, X, IP++); + + /* Remove the subroutine call */ + CS_DelEntry (S, I); + + /* Remember, we had changes */ + ++Changes; + } + +NextEntry: + /* Next entry */ + ++I; + + } + + /* Return the number of changes made */ + return Changes; +} + + + /*****************************************************************************/ /* Optimize loads */ /*****************************************************************************/ @@ -396,6 +527,60 @@ static unsigned OptLoad1 (CodeSeg* S) +static unsigned OptLoad2 (CodeSeg* S) +/* Replace calls to ldaxysp by inline code */ +{ + unsigned I; + unsigned Changes = 0; + + /* Walk over the entries */ + I = 0; + while (I < CS_GetEntryCount (S)) { + + CodeEntry* E; + + /* Get next entry */ + E = CS_GetEntry (S, I); + + /* Check for the sequence */ + if (CE_IsCallTo (E, "ldaxysp")) { + + CodeEntry* X; + + /* lda (sp),y */ + X = NewCodeEntry (OP65_LDA, AM65_ZP_INDY, "sp", 0, E->LI); + CS_InsertEntry (S, X, I+1); + + /* tax */ + X = NewCodeEntry (OP65_TAX, AM65_IMP, 0, 0, E->LI); + CS_InsertEntry (S, X, I+2); + + /* dey */ + X = NewCodeEntry (OP65_DEY, AM65_IMP, 0, 0, E->LI); + CS_InsertEntry (S, X, I+3); + + /* lda (sp),y */ + X = NewCodeEntry (OP65_LDA, AM65_ZP_INDY, "sp", 0, E->LI); + CS_InsertEntry (S, X, I+4); + + /* Now remove the call to the subroutine */ + CS_DelEntry (S, I); + + /* Remember, we had changes */ + ++Changes; + + } + + /* Next entry */ + ++I; + } + + /* Return the number of changes made */ + return Changes; +} + + + /*****************************************************************************/ /* Optimize stores through pointers */ /*****************************************************************************/ @@ -686,6 +871,121 @@ static unsigned OptPtrStore2 (CodeSeg* S) +static unsigned OptPtrStore3 (CodeSeg* S) +/* Search for the sequence: + * + * lda #<(label+0) + * ldx #>(label+0) + * ldy aaa + * clc + * adc (sp),y + * bcc L + * inx + * L: jsr pushax + * ldx #$00 + * lda yyy + * ldy #$00 + * jsr staspidx + * + * and replace it by: + * + * ldy aaa + * ldx #$00 + * lda (sp),y + * tay + * lda yyy + * sta label,y + */ +{ + unsigned Changes = 0; + + /* Walk over the entries */ + unsigned I = 0; + while (I < CS_GetEntryCount (S)) { + + CodeEntry* L[12]; + unsigned Len; + + /* Get next entry */ + L[0] = CS_GetEntry (S, I); + + /* Check for the sequence */ + if (L[0]->OPC == OP65_LDA && + L[0]->AM == AM65_IMM && + CS_GetEntries (S, L+1, I+1, 11) && + L[1]->OPC == OP65_LDX && + L[1]->AM == AM65_IMM && + L[2]->OPC == OP65_LDY && + L[3]->OPC == OP65_CLC && + L[4]->OPC == OP65_ADC && + L[4]->AM == AM65_ZP_INDY && + strcmp (L[4]->Arg, "sp") == 0 && + (L[5]->OPC == OP65_BCC || L[5]->OPC == OP65_JCC) && + L[5]->JumpTo != 0 && + L[5]->JumpTo->Owner == L[7] && + L[6]->OPC == OP65_INX && + CE_IsCallTo (L[7], "pushax") && + L[8]->OPC == OP65_LDX && + L[9]->OPC == OP65_LDA && + L[10]->OPC == OP65_LDY && + CE_IsKnownImm (L[10], 0) && + CE_IsCallTo (L[11], "staspidx") && + !CS_RangeHasLabel (S, I+1, 6) && + !CS_RangeHasLabel (S, I+8, 4) && + /* Check the label last because this is quite costly */ + (Len = strlen (L[0]->Arg)) > 3 && + L[0]->Arg[0] == '<' && + L[0]->Arg[1] == '(' && + strlen (L[1]->Arg) == Len && + L[1]->Arg[0] == '>' && + memcmp (L[0]->Arg+1, L[1]->Arg+1, Len-1) == 0) { + + CodeEntry* X; + char* Label; + + /* We will create all the new stuff behind the current one so + * we keep the line references. + */ + X = NewCodeEntry (OP65_LDY, L[2]->AM, L[2]->Arg, 0, L[2]->LI); + CS_InsertEntry (S, X, I+12); + + X = NewCodeEntry (OP65_LDX, L[8]->AM, L[8]->Arg, 0, L[8]->LI); + CS_InsertEntry (S, X, I+13); + + X = NewCodeEntry (OP65_LDA, L[4]->AM, L[4]->Arg, 0, L[4]->LI); + CS_InsertEntry (S, X, I+14); + + X = NewCodeEntry (OP65_TAY, AM65_IMP, 0, 0, L[4]->LI); + CS_InsertEntry (S, X, I+15); + + X = NewCodeEntry (OP65_LDA, L[9]->AM, L[9]->Arg, 0, L[9]->LI); + CS_InsertEntry (S, X, I+16); + + Label = memcpy (xmalloc (Len-2), L[0]->Arg+2, Len-3); + Label[Len-3] = '\0'; + X = NewCodeEntry (OP65_STA, AM65_ABSY, Label, 0, L[11]->LI); + CS_InsertEntry (S, X, I+17); + xfree (Label); + + /* Remove the old code */ + CS_DelEntries (S, I, 12); + + /* Remember, we had changes */ + ++Changes; + + } + + /* Next entry */ + ++I; + + } + + /* Return the number of changes made */ + return Changes; +} + + + /*****************************************************************************/ /* Decouple operations */ /*****************************************************************************/ @@ -784,7 +1084,7 @@ static unsigned OptDecouple (CodeSeg* S) case REG_PTR1_LO: Arg = MakeHexArg (In->Ptr1Lo); - X = NewCodeEntry (OP65_LDA, AM65_IMM, Arg, 0, E->LI); + X = NewCodeEntry (OP65_LDA, AM65_IMM, Arg, 0, E->LI); break; case REG_PTR1_HI: @@ -890,7 +1190,7 @@ static unsigned OptDecouple (CodeSeg* S) case OP65_TYA: if (E->RI->In.RegY >= 0) { - Arg = MakeHexArg (In->RegY); + Arg = MakeHexArg (In->RegY); X = NewCodeEntry (OP65_LDA, AM65_IMM, Arg, 0, E->LI); } break; @@ -922,6 +1222,106 @@ static unsigned OptDecouple (CodeSeg* S) +/*****************************************************************************/ +/* Optimize stack pointer ops */ +/*****************************************************************************/ + + + +static unsigned IsDecSP (const CodeEntry* E) +/* Check if this is an insn that decrements the stack pointer. If so, return + * the decrement. If not, return zero. + * The function expects E to be a subroutine call. + */ +{ + if (strncmp (E->Arg, "decsp", 5) == 0) { + if (E->Arg[5] >= '1' && E->Arg[5] <= '8') { + return (E->Arg[5] - '0'); + } + } else if (strcmp (E->Arg, "subysp") == 0 && RegValIsKnown (E->RI->In.RegY)) { + return E->RI->In.RegY; + } + + /* If we come here, it's not a decsp op */ + return 0; +} + + + +static unsigned OptStackPtrOps (CodeSeg* S) +/* Merge adjacent calls to decsp into one. NOTE: This function won't merge all + * known cases! + */ +{ + unsigned Changes = 0; + unsigned I; + + /* Generate register info for the following step */ + CS_GenRegInfo (S); + + /* Walk over the entries */ + I = 0; + while (I < CS_GetEntryCount (S)) { + + unsigned Dec1; + unsigned Dec2; + const CodeEntry* N; + + /* Get the next entry */ + const CodeEntry* E = CS_GetEntry (S, I); + + /* Check for decspn or subysp */ + if (E->OPC == OP65_JSR && + (Dec1 = IsDecSP (E)) > 0 && + (N = CS_GetNextEntry (S, I)) != 0 && + (Dec2 = IsDecSP (N)) > 0 && + (Dec1 += Dec2) <= 255 && + !CE_HasLabel (N)) { + + CodeEntry* X; + char Buf[20]; + + /* We can combine the two */ + if (Dec1 <= 8) { + /* Insert a call to decsp */ + xsprintf (Buf, sizeof (Buf), "decsp%u", Dec1); + X = NewCodeEntry (OP65_JSR, AM65_ABS, Buf, 0, N->LI); + CS_InsertEntry (S, X, I+2); + } else { + /* Insert a call to subysp */ + const char* Arg = MakeHexArg (Dec1); + X = NewCodeEntry (OP65_LDY, AM65_IMM, Arg, 0, N->LI); + CS_InsertEntry (S, X, I+2); + X = NewCodeEntry (OP65_JSR, AM65_ABS, "subysp", 0, N->LI); + CS_InsertEntry (S, X, I+3); + } + + /* Delete the old code */ + CS_DelEntries (S, I, 2); + + /* Regenerate register info */ + CS_GenRegInfo (S); + + /* Remember we had changes */ + ++Changes; + + } else { + + /* Next entry */ + ++I; + } + + } + + /* Free register info */ + CS_FreeRegInfo (S); + + /* Return the number of changes made */ + return Changes; +} + + + /*****************************************************************************/ /* struct OptFunc */ /*****************************************************************************/ @@ -968,14 +1368,21 @@ static OptFunc DOptCmp5 = { OptCmp5, "OptCmp5", 100, 0, static OptFunc DOptCmp6 = { OptCmp6, "OptCmp6", 100, 0, 0, 0, 0, 0 }; static OptFunc DOptCmp7 = { OptCmp7, "OptCmp7", 85, 0, 0, 0, 0, 0 }; static OptFunc DOptCmp8 = { OptCmp8, "OptCmp8", 50, 0, 0, 0, 0, 0 }; -static OptFunc DOptCondBranches = { OptCondBranches, "OptCondBranches", 80, 0, 0, 0, 0, 0 }; +static OptFunc DOptCmp9 = { OptCmp9, "OptCmp9", 85, 0, 0, 0, 0, 0 }; +static OptFunc DOptCondBranches1= { OptCondBranches1,"OptCondBranches1", 80, 0, 0, 0, 0, 0 }; +static OptFunc DOptCondBranches2= { OptCondBranches2,"OptCondBranches2", 0, 0, 0, 0, 0, 0 }; static OptFunc DOptDeadCode = { OptDeadCode, "OptDeadCode", 100, 0, 0, 0, 0, 0 }; static OptFunc DOptDeadJumps = { OptDeadJumps, "OptDeadJumps", 100, 0, 0, 0, 0, 0 }; static OptFunc DOptDecouple = { OptDecouple, "OptDecouple", 100, 0, 0, 0, 0, 0 }; static OptFunc DOptDupLoads = { OptDupLoads, "OptDupLoads", 0, 0, 0, 0, 0, 0 }; +static OptFunc DOptIndLoads1 = { OptIndLoads1, "OptIndLoads1", 0, 0, 0, 0, 0, 0 }; +static OptFunc DOptIndLoads2 = { OptIndLoads2, "OptIndLoads2", 0, 0, 0, 0, 0, 0 }; static OptFunc DOptJumpCascades = { OptJumpCascades, "OptJumpCascades", 100, 0, 0, 0, 0, 0 }; -static OptFunc DOptJumpTarget = { OptJumpTarget, "OptJumpTarget", 100, 0, 0, 0, 0, 0 }; +static OptFunc DOptJumpTarget1 = { OptJumpTarget1, "OptJumpTarget1", 100, 0, 0, 0, 0, 0 }; +static OptFunc DOptJumpTarget2 = { OptJumpTarget2, "OptJumpTarget2", 100, 0, 0, 0, 0, 0 }; +static OptFunc DOptJumpTarget3 = { OptJumpTarget3, "OptJumpTarget3", 100, 0, 0, 0, 0, 0 }; static OptFunc DOptLoad1 = { OptLoad1, "OptLoad1", 100, 0, 0, 0, 0, 0 }; +static OptFunc DOptLoad2 = { OptLoad2, "OptLoad2", 200, 0, 0, 0, 0, 0 }; static OptFunc DOptRTS = { OptRTS, "OptRTS", 100, 0, 0, 0, 0, 0 }; static OptFunc DOptRTSJumps1 = { OptRTSJumps1, "OptRTSJumps1", 100, 0, 0, 0, 0, 0 }; static OptFunc DOptRTSJumps2 = { OptRTSJumps2, "OptRTSJumps2", 100, 0, 0, 0, 0, 0 }; @@ -990,26 +1397,32 @@ static OptFunc DOptPtrLoad1 = { OptPtrLoad1, "OptPtrLoad1", 100, 0, static OptFunc DOptPtrLoad2 = { OptPtrLoad2, "OptPtrLoad2", 100, 0, 0, 0, 0, 0 }; static OptFunc DOptPtrLoad3 = { OptPtrLoad3, "OptPtrLoad3", 100, 0, 0, 0, 0, 0 }; static OptFunc DOptPtrLoad4 = { OptPtrLoad4, "OptPtrLoad4", 100, 0, 0, 0, 0, 0 }; -static OptFunc DOptPtrLoad5 = { OptPtrLoad5, "OptPtrLoad5", 92, 0, 0, 0, 0, 0 }; -static OptFunc DOptPtrLoad6 = { OptPtrLoad6, "OptPtrLoad6", 50, 0, 0, 0, 0, 0 }; -static OptFunc DOptPtrLoad7 = { OptPtrLoad7, "OptPtrLoad7", 65, 0, 0, 0, 0, 0 }; -static OptFunc DOptPtrLoad8 = { OptPtrLoad8, "OptPtrLoad8", 108, 0, 0, 0, 0, 0 }; -static OptFunc DOptPtrLoad9 = { OptPtrLoad9, "OptPtrLoad9", 86, 0, 0, 0, 0, 0 }; -static OptFunc DOptPtrLoad10 = { OptPtrLoad10, "OptPtrLoad10", 100, 0, 0, 0, 0, 0 }; -static OptFunc DOptPtrLoad11 = { OptPtrLoad11, "OptPtrLoad11", 190, 0, 0, 0, 0, 0 }; +static OptFunc DOptPtrLoad5 = { OptPtrLoad5, "OptPtrLoad5", 50, 0, 0, 0, 0, 0 }; +static OptFunc DOptPtrLoad6 = { OptPtrLoad6, "OptPtrLoad6", 60, 0, 0, 0, 0, 0 }; +static OptFunc DOptPtrLoad7 = { OptPtrLoad7, "OptPtrLoad7", 140, 0, 0, 0, 0, 0 }; +static OptFunc DOptPtrLoad11 = { OptPtrLoad11, "OptPtrLoad11", 92, 0, 0, 0, 0, 0 }; +static OptFunc DOptPtrLoad12 = { OptPtrLoad12, "OptPtrLoad12", 50, 0, 0, 0, 0, 0 }; +static OptFunc DOptPtrLoad13 = { OptPtrLoad13, "OptPtrLoad13", 65, 0, 0, 0, 0, 0 }; +static OptFunc DOptPtrLoad14 = { OptPtrLoad14, "OptPtrLoad14", 108, 0, 0, 0, 0, 0 }; +static OptFunc DOptPtrLoad15 = { OptPtrLoad15, "OptPtrLoad15", 86, 0, 0, 0, 0, 0 }; +static OptFunc DOptPtrLoad16 = { OptPtrLoad16, "OptPtrLoad16", 100, 0, 0, 0, 0, 0 }; +static OptFunc DOptPtrLoad17 = { OptPtrLoad17, "OptPtrLoad17", 190, 0, 0, 0, 0, 0 }; static OptFunc DOptPtrStore1 = { OptPtrStore1, "OptPtrStore1", 100, 0, 0, 0, 0, 0 }; static OptFunc DOptPtrStore2 = { OptPtrStore2, "OptPtrStore2", 40, 0, 0, 0, 0, 0 }; +static OptFunc DOptPtrStore3 = { OptPtrStore3, "OptPtrStore3", 50, 0, 0, 0, 0, 0 }; static OptFunc DOptPush1 = { OptPush1, "OptPush1", 65, 0, 0, 0, 0, 0 }; static OptFunc DOptPush2 = { OptPush2, "OptPush2", 50, 0, 0, 0, 0, 0 }; static OptFunc DOptPushPop = { OptPushPop, "OptPushPop", 0, 0, 0, 0, 0, 0 }; static OptFunc DOptShift1 = { OptShift1, "OptShift1", 100, 0, 0, 0, 0, 0 }; static OptFunc DOptShift2 = { OptShift2, "OptShift2", 100, 0, 0, 0, 0, 0 }; static OptFunc DOptShift3 = { OptShift3, "OptShift3", 110, 0, 0, 0, 0, 0 }; +static OptFunc DOptShift4 = { OptShift4, "OptShift4", 200, 0, 0, 0, 0, 0 }; static OptFunc DOptSize1 = { OptSize1, "OptSize1", 100, 0, 0, 0, 0, 0 }; static OptFunc DOptSize2 = { OptSize2, "OptSize2", 100, 0, 0, 0, 0, 0 }; static OptFunc DOptStackOps = { OptStackOps, "OptStackOps", 100, 0, 0, 0, 0, 0 }; +static OptFunc DOptStackPtrOps = { OptStackPtrOps, "OptStackPtrOps", 50, 0, 0, 0, 0, 0 }; static OptFunc DOptStore1 = { OptStore1, "OptStore1", 70, 0, 0, 0, 0, 0 }; -static OptFunc DOptStore2 = { OptStore2, "OptStore2", 220, 0, 0, 0, 0, 0 }; +static OptFunc DOptStore2 = { OptStore2, "OptStore2", 115, 0, 0, 0, 0, 0 }; static OptFunc DOptStore3 = { OptStore3, "OptStore3", 120, 0, 0, 0, 0, 0 }; static OptFunc DOptStore4 = { OptStore4, "OptStore4", 50, 0, 0, 0, 0, 0 }; static OptFunc DOptStore5 = { OptStore5, "OptStore5", 100, 0, 0, 0, 0, 0 }; @@ -1017,7 +1430,8 @@ static OptFunc DOptStoreLoad = { OptStoreLoad, "OptStoreLoad", 0, 0, static OptFunc DOptSub1 = { OptSub1, "OptSub1", 100, 0, 0, 0, 0, 0 }; static OptFunc DOptSub2 = { OptSub2, "OptSub2", 100, 0, 0, 0, 0, 0 }; static OptFunc DOptSub3 = { OptSub3, "OptSub3", 100, 0, 0, 0, 0, 0 }; -static OptFunc DOptTest1 = { OptTest1, "OptTest1", 100, 0, 0, 0, 0, 0 }; +static OptFunc DOptTest1 = { OptTest1, "OptTest1", 65, 0, 0, 0, 0, 0 }; +static OptFunc DOptTest2 = { OptTest2, "OptTest2", 50, 0, 0, 0, 0, 0 }; static OptFunc DOptTransfers1 = { OptTransfers1, "OptTransfers1", 0, 0, 0, 0, 0, 0 }; static OptFunc DOptTransfers2 = { OptTransfers2, "OptTransfers2", 60, 0, 0, 0, 0, 0 }; static OptFunc DOptTransfers3 = { OptTransfers3, "OptTransfers3", 65, 0, 0, 0, 0, 0 }; @@ -1047,14 +1461,21 @@ static OptFunc* OptFuncs[] = { &DOptCmp6, &DOptCmp7, &DOptCmp8, - &DOptCondBranches, + &DOptCmp9, + &DOptCondBranches1, + &DOptCondBranches2, &DOptDeadCode, &DOptDeadJumps, &DOptDecouple, &DOptDupLoads, + &DOptIndLoads1, + &DOptIndLoads2, &DOptJumpCascades, - &DOptJumpTarget, + &DOptJumpTarget1, + &DOptJumpTarget2, + &DOptJumpTarget3, &DOptLoad1, + &DOptLoad2, &DOptNegA1, &DOptNegA2, &DOptNegAX1, @@ -1063,18 +1484,22 @@ static OptFunc* OptFuncs[] = { &DOptNegAX4, &DOptPrecalc, &DOptPtrLoad1, - &DOptPtrLoad10, &DOptPtrLoad11, + &DOptPtrLoad12, + &DOptPtrLoad13, + &DOptPtrLoad14, + &DOptPtrLoad15, + &DOptPtrLoad16, + &DOptPtrLoad17, &DOptPtrLoad2, &DOptPtrLoad3, &DOptPtrLoad4, &DOptPtrLoad5, &DOptPtrLoad6, &DOptPtrLoad7, - &DOptPtrLoad8, - &DOptPtrLoad9, &DOptPtrStore1, &DOptPtrStore2, + &DOptPtrStore3, &DOptPush1, &DOptPush2, &DOptPushPop, @@ -1084,9 +1509,11 @@ static OptFunc* OptFuncs[] = { &DOptShift1, &DOptShift2, &DOptShift3, + &DOptShift4, &DOptSize1, &DOptSize2, &DOptStackOps, + &DOptStackPtrOps, &DOptStore1, &DOptStore2, &DOptStore3, @@ -1097,6 +1524,7 @@ static OptFunc* OptFuncs[] = { &DOptSub2, &DOptSub3, &DOptTest1, + &DOptTest2, &DOptTransfers1, &DOptTransfers2, &DOptTransfers3, @@ -1336,8 +1764,10 @@ static unsigned RunOptGroup1 (CodeSeg* S) { unsigned Changes = 0; + Changes += RunOptFunc (S, &DOptStackPtrOps, 5); Changes += RunOptFunc (S, &DOptPtrStore1, 1); Changes += RunOptFunc (S, &DOptPtrStore2, 1); + Changes += RunOptFunc (S, &DOptPtrStore3, 1); Changes += RunOptFunc (S, &DOptAdd3, 1); /* Before OptPtrLoad5! */ Changes += RunOptFunc (S, &DOptPtrLoad1, 1); Changes += RunOptFunc (S, &DOptPtrLoad2, 1); @@ -1346,10 +1776,13 @@ static unsigned RunOptGroup1 (CodeSeg* S) Changes += RunOptFunc (S, &DOptPtrLoad5, 1); Changes += RunOptFunc (S, &DOptPtrLoad6, 1); Changes += RunOptFunc (S, &DOptPtrLoad7, 1); - Changes += RunOptFunc (S, &DOptPtrLoad8, 1); - Changes += RunOptFunc (S, &DOptPtrLoad9, 1); - Changes += RunOptFunc (S, &DOptPtrLoad10, 1); Changes += RunOptFunc (S, &DOptPtrLoad11, 1); + Changes += RunOptFunc (S, &DOptPtrLoad12, 1); + Changes += RunOptFunc (S, &DOptPtrLoad13, 1); + Changes += RunOptFunc (S, &DOptPtrLoad14, 1); + Changes += RunOptFunc (S, &DOptPtrLoad15, 1); + Changes += RunOptFunc (S, &DOptPtrLoad16, 1); + Changes += RunOptFunc (S, &DOptPtrLoad17, 1); Changes += RunOptFunc (S, &DOptNegAX1, 1); Changes += RunOptFunc (S, &DOptNegAX2, 1); Changes += RunOptFunc (S, &DOptNegAX3, 1); @@ -1357,11 +1790,17 @@ static unsigned RunOptGroup1 (CodeSeg* S) Changes += RunOptFunc (S, &DOptAdd1, 1); Changes += RunOptFunc (S, &DOptAdd2, 1); Changes += RunOptFunc (S, &DOptAdd4, 1); + Changes += RunOptFunc (S, &DOptAdd5, 1); + Changes += RunOptFunc (S, &DOptAdd6, 1); + Changes += RunOptFunc (S, &DOptAdd6, 1); + Changes += RunOptFunc (S, &DOptSub1, 1); + Changes += RunOptFunc (S, &DOptSub3, 1); Changes += RunOptFunc (S, &DOptStore4, 1); Changes += RunOptFunc (S, &DOptStore5, 1); Changes += RunOptFunc (S, &DOptShift1, 1); Changes += RunOptFunc (S, &DOptShift2, 1); Changes += RunOptFunc (S, &DOptShift3, 1); + Changes += RunOptFunc (S, &DOptShift4, 1); Changes += RunOptFunc (S, &DOptStore1, 1); Changes += RunOptFunc (S, &DOptStore2, 5); Changes += RunOptFunc (S, &DOptStore3, 5); @@ -1403,20 +1842,22 @@ static unsigned RunOptGroup3 (CodeSeg* S) C += RunOptFunc (S, &DOptNegA1, 1); C += RunOptFunc (S, &DOptNegA2, 1); + C += RunOptFunc (S, &DOptStackOps, 1); C += RunOptFunc (S, &DOptSub1, 1); C += RunOptFunc (S, &DOptSub2, 1); C += RunOptFunc (S, &DOptSub3, 1); C += RunOptFunc (S, &DOptAdd5, 1); C += RunOptFunc (S, &DOptAdd6, 1); - C += RunOptFunc (S, &DOptStackOps, 1); C += RunOptFunc (S, &DOptJumpCascades, 1); C += RunOptFunc (S, &DOptDeadJumps, 1); C += RunOptFunc (S, &DOptRTS, 1); C += RunOptFunc (S, &DOptDeadCode, 1); - C += RunOptFunc (S, &DOptJumpTarget, 1); - C += RunOptFunc (S, &DOptCondBranches, 1); - C += RunOptFunc (S, &DOptRTSJumps1, 1); C += RunOptFunc (S, &DOptBoolTrans, 1); + C += RunOptFunc (S, &DOptJumpTarget1, 1); + C += RunOptFunc (S, &DOptJumpTarget2, 1); + C += RunOptFunc (S, &DOptCondBranches1, 1); + C += RunOptFunc (S, &DOptCondBranches2, 1); + C += RunOptFunc (S, &DOptRTSJumps1, 1); C += RunOptFunc (S, &DOptCmp1, 1); C += RunOptFunc (S, &DOptCmp2, 1); C += RunOptFunc (S, &DOptCmp3, 1); @@ -1425,8 +1866,10 @@ static unsigned RunOptGroup3 (CodeSeg* S) C += RunOptFunc (S, &DOptCmp6, 1); C += RunOptFunc (S, &DOptCmp7, 1); C += RunOptFunc (S, &DOptCmp8, 1); + C += RunOptFunc (S, &DOptCmp9, 1); C += RunOptFunc (S, &DOptTest1, 1); C += RunOptFunc (S, &DOptLoad1, 1); + C += RunOptFunc (S, &DOptJumpTarget3, 1); /* After OptCondBranches2 */ C += RunOptFunc (S, &DOptUnusedLoads, 1); C += RunOptFunc (S, &DOptUnusedStores, 1); C += RunOptFunc (S, &DOptDupLoads, 1); @@ -1450,6 +1893,27 @@ static unsigned RunOptGroup3 (CodeSeg* S) static unsigned RunOptGroup4 (CodeSeg* S) +/* Run another round of pattern replacements. These are done late, since there + * may be better replacements before. + */ +{ + unsigned Changes = 0; + + /* Repeat some of the steps here */ + Changes += RunOptFunc (S, &DOptPush1, 1); + Changes += RunOptFunc (S, &DOptPush2, 1); + Changes += RunOptFunc (S, &DOptUnusedLoads, 1); + Changes += RunOptFunc (S, &DOptTest2, 1); + Changes += RunOptFunc (S, &DOptTransfers2, 1); + Changes += RunOptFunc (S, &DOptLoad2, 1); + + /* Return the number of changes */ + return Changes; +} + + + +static unsigned RunOptGroup5 (CodeSeg* S) /* 65C02 specific optimizations. */ { unsigned Changes = 0; @@ -1473,17 +1937,25 @@ static unsigned RunOptGroup4 (CodeSeg* S) -static unsigned RunOptGroup5 (CodeSeg* S) -/* Run another round of pattern replacements. These are done late, since there - * may be better replacements before. +static unsigned RunOptGroup6 (CodeSeg* S) +/* This one is quite special. It tries to replace "lda (sp),y" by "lda (sp,x)". + * The latter is ony cycle slower, but if we're able to remove the necessary + * load of the Y register, because X is zero anyway, we gain 1 cycle and + * shorten the code by one (transfer) or two bytes (load). So what we do is + * to replace the insns, remove unused loads, and then change back all insns + * where Y is still zero (meaning that the load has not been removed). */ { unsigned Changes = 0; - Changes += RunOptFunc (S, &DOptPush1, 1); - Changes += RunOptFunc (S, &DOptPush2, 1); - Changes += RunOptFunc (S, &DOptUnusedLoads, 1); - Changes += RunOptFunc (S, &DOptTransfers2, 1); + /* This group will only run for a standard 6502, because the 65C02 has a + * better addressing mode that covers this case. + */ + if ((CPUIsets[CPU] & CPU_ISET_65SC02) == 0) { + Changes += RunOptFunc (S, &DOptIndLoads1, 1); + Changes += RunOptFunc (S, &DOptUnusedLoads, 1); + Changes += RunOptFunc (S, &DOptIndLoads2, 1); + } /* Return the number of changes */ return Changes; @@ -1491,7 +1963,7 @@ static unsigned RunOptGroup5 (CodeSeg* S) -static unsigned RunOptGroup6 (CodeSeg* S) +static unsigned RunOptGroup7 (CodeSeg* S) /* The last group of optimization steps. Adjust branches, do size optimizations. */ { @@ -1509,7 +1981,8 @@ static unsigned RunOptGroup6 (CodeSeg* S) * may have opened new oportunities. */ Changes += RunOptFunc (S, &DOptUnusedLoads, 1); - Changes += RunOptFunc (S, &DOptJumpTarget, 5); + Changes += RunOptFunc (S, &DOptUnusedStores, 1); + Changes += RunOptFunc (S, &DOptJumpTarget1, 5); Changes += RunOptFunc (S, &DOptStore5, 1); } @@ -1520,8 +1993,9 @@ static unsigned RunOptGroup6 (CodeSeg* S) * may have opened new oportunities. */ Changes += RunOptFunc (S, &DOptUnusedLoads, 1); - Changes += RunOptFunc (S, &DOptJumpTarget, 5); + Changes += RunOptFunc (S, &DOptJumpTarget1, 5); Changes += RunOptFunc (S, &DOptStore5, 1); + Changes += RunOptFunc (S, &DOptTransfers3, 1); } /* Adjust branch distances */ @@ -1572,6 +2046,7 @@ void RunOpt (CodeSeg* S) RunOptGroup4 (S); RunOptGroup5 (S); RunOptGroup6 (S); + RunOptGroup7 (S); /* Write statistics */ if (StatFileName) {