X-Git-Url: https://git.sur5r.net/?a=blobdiff_plain;f=src%2Fcc65%2Fcodeopt.c;h=d920f001a6134488cad838e84aa3302a56e45db3;hb=5d274e4bc5b81002146e72602dae235dcd4c591e;hp=e26e747f42c2d8451d847e85015fbed322fac581;hpb=ae708289384a8db26ed12168c00af697f1d0b78d;p=cc65 diff --git a/src/cc65/codeopt.c b/src/cc65/codeopt.c index e26e747f4..d920f001a 100644 --- a/src/cc65/codeopt.c +++ b/src/cc65/codeopt.c @@ -1,15 +1,15 @@ /*****************************************************************************/ /* */ -/* codeopt.c */ +/* codeopt.c */ /* */ -/* Optimizer subroutines */ +/* Optimizer subroutines */ /* */ /* */ /* */ -/* (C) 2001-2003 Ullrich von Bassewitz */ -/* Römerstraße 52 */ -/* D-70794 Filderstadt */ -/* EMail: uz@cc65.org */ +/* (C) 2001-2012, Ullrich von Bassewitz */ +/* Roemerstrasse 52 */ +/* D-70794 Filderstadt */ +/* EMail: uz@cc65.org */ /* */ /* */ /* This software is provided 'as-is', without any expressed or implied */ @@ -35,24 +35,32 @@ #include #include +#include /* common */ #include "abend.h" #include "chartype.h" #include "cpu.h" +#include "debugflag.h" #include "print.h" +#include "strbuf.h" #include "xmalloc.h" +#include "xsprintf.h" /* cc65 */ #include "asmlabel.h" #include "codeent.h" #include "codeinfo.h" +#include "codeopt.h" #include "coptadd.h" #include "coptc02.h" #include "coptcmp.h" #include "coptind.h" #include "coptneg.h" +#include "coptptrload.h" +#include "coptptrstore.h" #include "coptpush.h" +#include "coptshift.h" #include "coptsize.h" #include "coptstop.h" #include "coptstore.h" @@ -60,275 +68,8 @@ #include "copttest.h" #include "error.h" #include "global.h" -#include "codeopt.h" - - - -/*****************************************************************************/ -/* Data */ -/*****************************************************************************/ - - - -/* Shift types */ -enum { - SHIFT_NONE, - SHIFT_ASR_1, - SHIFT_ASL_1, - SHIFT_LSR_1, - SHIFT_LSL_1 -}; - - - -/*****************************************************************************/ -/* Optimize shifts */ -/*****************************************************************************/ - - - -static unsigned OptShift1 (CodeSeg* S) -/* A call to the shlaxN routine may get replaced by one or more asl insns - * if the value of X is not used later. - */ -{ - unsigned Changes = 0; - - /* Walk over the entries */ - unsigned I = 0; - while (I < CS_GetEntryCount (S)) { - - /* Get next entry */ - CodeEntry* E = CS_GetEntry (S, I); - - /* Check for the sequence */ - if (E->OPC == OP65_JSR && - (strncmp (E->Arg, "shlax", 5) == 0 || - strncmp (E->Arg, "aslax", 5) == 0) && - strlen (E->Arg) == 6 && - IsDigit (E->Arg[5]) && - !RegXUsed (S, I+1)) { - - /* Insert shift insns */ - unsigned Count = E->Arg[5] - '0'; - while (Count--) { - CodeEntry* X = NewCodeEntry (OP65_ASL, AM65_ACC, "a", 0, E->LI); - CS_InsertEntry (S, X, I+1); - } - - /* Delete the call to shlax */ - CS_DelEntry (S, I); - - /* Remember, we had changes */ - ++Changes; - - } - - /* Next entry */ - ++I; - - } - - /* Return the number of changes made */ - return Changes; -} - - - -static unsigned OptShift2 (CodeSeg* S) -/* A call to the shraxN routine may get replaced by one or more lsr insns - * if the value of X is zero. - */ -{ - unsigned Changes = 0; - unsigned I; - - /* Generate register info */ - CS_GenRegInfo (S); - - /* Walk over the entries */ - I = 0; - while (I < CS_GetEntryCount (S)) { - - /* Get next entry */ - CodeEntry* E = CS_GetEntry (S, I); - - /* Check for the sequence */ - if (E->OPC == OP65_JSR && - strncmp (E->Arg, "shrax", 5) == 0 && - strlen (E->Arg) == 6 && - IsDigit (E->Arg[5]) && - E->RI->In.RegX == 0) { - - /* Insert shift insns */ - unsigned Count = E->Arg[5] - '0'; - while (Count--) { - CodeEntry* X = NewCodeEntry (OP65_LSR, AM65_ACC, "a", 0, E->LI); - CS_InsertEntry (S, X, I+1); - } - - /* Delete the call to shlax */ - CS_DelEntry (S, I); - - /* Remember, we had changes */ - ++Changes; - - } - - /* Next entry */ - ++I; - - } - - /* Free the register info */ - CS_FreeRegInfo (S); - - /* Return the number of changes made */ - return Changes; -} - - - -static unsigned GetShiftType (const char* Sub) -/* Helper function for OptShift3 */ -{ - if (*Sub == 'a') { - if (strcmp (Sub+1, "slax1") == 0) { - return SHIFT_ASL_1; - } else if (strcmp (Sub+1, "srax1") == 0) { - return SHIFT_ASR_1; - } - } else if (*Sub == 's') { - if (strcmp (Sub+1, "hlax1") == 0) { - return SHIFT_LSL_1; - } else if (strcmp (Sub+1, "hrax1") == 0) { - return SHIFT_LSR_1; - } - } - return SHIFT_NONE; -} - - - -static unsigned OptShift3 (CodeSeg* S) -/* Search for the sequence - * - * lda xxx - * ldx yyy - * jsr aslax1/asrax1/shlax1/shrax1 - * sta aaa - * stx bbb - * - * and replace it by - * - * lda xxx - * asl a - * sta aaa - * lda yyy - * rol a - * sta bbb - * - * or similar, provided that a/x is not used later - */ -{ - unsigned Changes = 0; - - /* Walk over the entries */ - unsigned I = 0; - while (I < CS_GetEntryCount (S)) { - - unsigned ShiftType; - CodeEntry* L[5]; - - /* Get next entry */ - L[0] = CS_GetEntry (S, I); - - /* Check for the sequence */ - if (L[0]->OPC == OP65_LDA && - (L[0]->AM == AM65_ABS || L[0]->AM == AM65_ZP) && - CS_GetEntries (S, L+1, I+1, 4) && - !CS_RangeHasLabel (S, I+1, 4) && - L[1]->OPC == OP65_LDX && - (L[1]->AM == AM65_ABS || L[1]->AM == AM65_ZP) && - L[2]->OPC == OP65_JSR && - (ShiftType = GetShiftType (L[2]->Arg)) != SHIFT_NONE&& - L[3]->OPC == OP65_STA && - (L[3]->AM == AM65_ABS || L[3]->AM == AM65_ZP) && - L[4]->OPC == OP65_STX && - (L[4]->AM == AM65_ABS || L[4]->AM == AM65_ZP) && - !RegAXUsed (S, I+5)) { - - CodeEntry* X; - - /* Handle the four shift types differently */ - switch (ShiftType) { - - case SHIFT_ASR_1: - X = NewCodeEntry (OP65_LDA, L[1]->AM, L[1]->Arg, 0, L[1]->LI); - CS_InsertEntry (S, X, I+5); - X = NewCodeEntry (OP65_CMP, AM65_IMM, "$80", 0, L[2]->LI); - CS_InsertEntry (S, X, I+6); - X = NewCodeEntry (OP65_ROR, AM65_ACC, "a", 0, L[2]->LI); - CS_InsertEntry (S, X, I+7); - X = NewCodeEntry (OP65_STA, L[4]->AM, L[4]->Arg, 0, L[4]->LI); - CS_InsertEntry (S, X, I+8); - X = NewCodeEntry (OP65_LDA, L[0]->AM, L[0]->Arg, 0, L[0]->LI); - CS_InsertEntry (S, X, I+9); - X = NewCodeEntry (OP65_ROR, AM65_ACC, "a", 0, L[2]->LI); - CS_InsertEntry (S, X, I+10); - X = NewCodeEntry (OP65_STA, L[3]->AM, L[3]->Arg, 0, L[3]->LI); - CS_InsertEntry (S, X, I+11); - CS_DelEntries (S, I, 5); - break; - - case SHIFT_LSR_1: - X = NewCodeEntry (OP65_LDA, L[1]->AM, L[1]->Arg, 0, L[1]->LI); - CS_InsertEntry (S, X, I+5); - X = NewCodeEntry (OP65_LSR, AM65_ACC, "a", 0, L[2]->LI); - CS_InsertEntry (S, X, I+6); - X = NewCodeEntry (OP65_STA, L[4]->AM, L[4]->Arg, 0, L[4]->LI); - CS_InsertEntry (S, X, I+7); - X = NewCodeEntry (OP65_LDA, L[0]->AM, L[0]->Arg, 0, L[0]->LI); - CS_InsertEntry (S, X, I+8); - X = NewCodeEntry (OP65_ROR, AM65_ACC, "a", 0, L[2]->LI); - CS_InsertEntry (S, X, I+9); - X = NewCodeEntry (OP65_STA, L[3]->AM, L[3]->Arg, 0, L[3]->LI); - CS_InsertEntry (S, X, I+10); - CS_DelEntries (S, I, 5); - break; - - case SHIFT_LSL_1: - case SHIFT_ASL_1: - /* These two are identical */ - X = NewCodeEntry (OP65_ASL, AM65_ACC, "a", 0, L[2]->LI); - CS_InsertEntry (S, X, I+1); - X = NewCodeEntry (OP65_STA, L[3]->AM, L[3]->Arg, 0, L[3]->LI); - CS_InsertEntry (S, X, I+2); - X = NewCodeEntry (OP65_LDA, L[1]->AM, L[1]->Arg, 0, L[1]->LI); - CS_InsertEntry (S, X, I+3); - X = NewCodeEntry (OP65_ROL, AM65_ACC, "a", 0, L[2]->LI); - CS_InsertEntry (S, X, I+4); - X = NewCodeEntry (OP65_STA, L[4]->AM, L[4]->Arg, 0, L[4]->LI); - CS_InsertEntry (S, X, I+5); - CS_DelEntries (S, I+6, 4); - break; - - } - - /* Remember, we had changes */ - ++Changes; - - } - - /* Next entry */ - ++I; - - } - - /* Return the number of changes made */ - return Changes; -} - +#include "output.h" +#include "symtab.h" /*****************************************************************************/ @@ -339,26 +80,23 @@ static unsigned OptShift3 (CodeSeg* S) static unsigned OptLoad1 (CodeSeg* S) /* Search for a call to ldaxysp where X is not used later and replace it by - * a load of just the A register. - */ +** a load of just the A register. +*/ { unsigned I; unsigned Changes = 0; - /* Generate register info */ - CS_GenRegInfo (S); - /* Walk over the entries */ I = 0; while (I < CS_GetEntryCount (S)) { - CodeEntry* E; + CodeEntry* E; - /* Get next entry */ - E = CS_GetEntry (S, I); + /* Get next entry */ + E = CS_GetEntry (S, I); - /* Check for the sequence */ - if (CE_IsCallTo (E, "ldaxysp") && + /* Check for the sequence */ + if (CE_IsCallTo (E, "ldaxysp") && RegValIsKnown (E->RI->In.RegY) && !RegXUsed (S, I+1)) { @@ -374,428 +112,113 @@ static unsigned OptLoad1 (CodeSeg* S) CS_InsertEntry (S, X, I+2); /* Now remove the call to the subroutine */ - CS_DelEntry (S, I); + CS_DelEntry (S, I); - /* Remember, we had changes */ + /* Remember, we had changes */ ++Changes; - } + } - /* Next entry */ - ++I; + /* Next entry */ + ++I; } - /* Free the register info */ - CS_FreeRegInfo (S); - /* Return the number of changes made */ return Changes; } -/*****************************************************************************/ -/* Optimize stores through pointers */ -/*****************************************************************************/ - - - -static unsigned OptPtrStore1Sub (CodeSeg* S, unsigned I, CodeEntry** const L) -/* Check if this is one of the allowed suboperation for OptPtrStore1 */ -{ - /* Check for a label attached to the entry */ - if (CE_HasLabel (L[0])) { - return 0; - } - - /* Check for single insn sub ops */ - if (L[0]->OPC == OP65_AND || - L[0]->OPC == OP65_EOR || - L[0]->OPC == OP65_ORA || - (L[0]->OPC == OP65_JSR && strncmp (L[0]->Arg, "shlax", 5) == 0) || - (L[0]->OPC == OP65_JSR && strncmp (L[0]->Arg, "shrax", 5) == 0)) { - - /* One insn */ - return 1; - - } else if (L[0]->OPC == OP65_CLC && - (L[1] = CS_GetNextEntry (S, I)) != 0 && - L[1]->OPC == OP65_ADC && - !CE_HasLabel (L[1])) { - return 2; - } else if (L[0]->OPC == OP65_SEC && - (L[1] = CS_GetNextEntry (S, I)) != 0 && - L[1]->OPC == OP65_SBC && - !CE_HasLabel (L[1])) { - return 2; - } - - - - /* Not found */ - return 0; -} - - - -static unsigned OptPtrStore1 (CodeSeg* S) -/* Search for the sequence: - * - * jsr pushax - * ldy xxx - * jsr ldauidx - * subop - * ldy yyy - * jsr staspidx - * - * and replace it by: - * - * sta ptr1 - * stx ptr1+1 - * ldy xxx - * ldx #$00 - * lda (ptr1),y - * subop - * ldy yyy - * sta (ptr1),y - * - * In case a/x is loaded from the register bank before the pushax, we can even - * use the register bank instead of ptr1. - */ -/* - * jsr pushax - * ldy xxx - * jsr ldauidx - * ldx #$00 - * lda (zp),y - * subop - * ldy yyy - * sta (zp),y - * jsr staspidx - */ +static unsigned OptLoad2 (CodeSeg* S) +/* Replace calls to ldaxysp by inline code */ { + unsigned I; unsigned Changes = 0; /* Walk over the entries */ - unsigned I = 0; + I = 0; while (I < CS_GetEntryCount (S)) { - unsigned K; - CodeEntry* L[10]; - - /* Get next entry */ - L[0] = CS_GetEntry (S, I); - - /* Check for the sequence */ - if (CE_IsCallTo (L[0], "pushax") && - CS_GetEntries (S, L+1, I+1, 3) && - L[1]->OPC == OP65_LDY && - CE_KnownImm (L[1]) && - !CE_HasLabel (L[1]) && - CE_IsCallTo (L[2], "ldauidx") && - !CE_HasLabel (L[2]) && - (K = OptPtrStore1Sub (S, I+3, L+3)) > 0 && - CS_GetEntries (S, L+3+K, I+3+K, 2) && - L[3+K]->OPC == OP65_LDY && - CE_KnownImm (L[3+K]) && - !CE_HasLabel (L[3+K]) && - CE_IsCallTo (L[4+K], "staspidx") && - !CE_HasLabel (L[4+K])) { - - - const char* RegBank = 0; - const char* ZPLoc = "ptr1"; - CodeEntry* X; - - - /* Get the preceeding two instructions and check them. We check - * for: - * lda regbank+n - * ldx regbank+n+1 - */ - if (I > 1) { - CodeEntry* P[2]; - P[0] = CS_GetEntry (S, I-2); - P[1] = CS_GetEntry (S, I-1); - if (P[0]->OPC == OP65_LDA && - P[0]->AM == AM65_ZP && - P[1]->OPC == OP65_LDX && - P[1]->AM == AM65_ZP && - !CE_HasLabel (P[1]) && - strncmp (P[0]->Arg, "regbank+", 8) == 0) { - - unsigned Len = strlen (P[0]->Arg); - - if (strncmp (P[0]->Arg, P[1]->Arg, Len) == 0 && - P[1]->Arg[Len+0] == '+' && - P[1]->Arg[Len+1] == '1' && - P[1]->Arg[Len+2] == '\0') { - - /* Ok, found. Use the name of the register bank */ - RegBank = ZPLoc = P[0]->Arg; - } - } - } - - /* Insert the load via the zp pointer */ - X = NewCodeEntry (OP65_LDX, AM65_IMM, "$00", 0, L[3]->LI); - CS_InsertEntry (S, X, I+3); - X = NewCodeEntry (OP65_LDA, AM65_ZP_INDY, ZPLoc, 0, L[2]->LI); - CS_InsertEntry (S, X, I+4); - - /* Insert the store through the zp pointer */ - X = NewCodeEntry (OP65_STA, AM65_ZP_INDY, ZPLoc, 0, L[3]->LI); - CS_InsertEntry (S, X, I+6+K); - - /* Delete the old code */ - CS_DelEntry (S, I+7+K); /* jsr spaspidx */ - CS_DelEntry (S, I+2); /* jsr ldauidx */ - - /* Create and insert the stores into the zp pointer if needed */ - if (RegBank == 0) { - X = NewCodeEntry (OP65_STA, AM65_ZP, "ptr1", 0, L[0]->LI); - CS_InsertEntry (S, X, I+1); - X = NewCodeEntry (OP65_STX, AM65_ZP, "ptr1+1", 0, L[0]->LI); - CS_InsertEntry (S, X, I+2); - } - - /* Delete more old code. Do it here to keep a label attached to - * entry I in place. - */ - CS_DelEntry (S, I); /* jsr pushax */ + CodeEntry* L[3]; - /* Remember, we had changes */ - ++Changes; + /* Get next entry */ + L[0] = CS_GetEntry (S, I); - } + /* Check for the sequence */ + if (CE_IsCallTo (L[0], "ldaxysp")) { - /* Next entry */ - ++I; - - } + CodeEntry* X; - /* Return the number of changes made */ - return Changes; -} + /* Followed by sta abs/stx abs? */ + if (CS_GetEntries (S, L+1, I+1, 2) && + L[1]->OPC == OP65_STA && + L[2]->OPC == OP65_STX && + (L[1]->Arg == 0 || + L[2]->Arg == 0 || + strcmp (L[1]->Arg, L[2]->Arg) != 0) && + !CS_RangeHasLabel (S, I+1, 2) && + !RegXUsed (S, I+3)) { + /* A/X are stored into memory somewhere and X is not used + ** later + */ + /* lda (sp),y */ + X = NewCodeEntry (OP65_LDA, AM65_ZP_INDY, "sp", 0, L[0]->LI); + CS_InsertEntry (S, X, I+3); -static unsigned OptPtrStore2 (CodeSeg* S) -/* Search for the sequence: - * - * lda #<(label+0) - * ldx #>(label+0) - * clc - * adc xxx - * bcc L - * inx - * L: jsr pushax - * ldx #$00 - * lda yyy - * ldy #$00 - * jsr staspidx - * - * and replace it by: - * - * ldy xxx - * ldx #$00 - * lda yyy - * sta label,y - */ -{ - unsigned Changes = 0; + /* sta abs */ + X = NewCodeEntry (OP65_STA, L[2]->AM, L[2]->Arg, 0, L[2]->LI); + CS_InsertEntry (S, X, I+4); - /* Walk over the entries */ - unsigned I = 0; - while (I < CS_GetEntryCount (S)) { + /* dey */ + X = NewCodeEntry (OP65_DEY, AM65_IMP, 0, 0, L[0]->LI); + CS_InsertEntry (S, X, I+5); - CodeEntry* L[11]; - unsigned Len; - - /* Get next entry */ - L[0] = CS_GetEntry (S, I); - - /* Check for the sequence */ - if (L[0]->OPC == OP65_LDA && - L[0]->AM == AM65_IMM && - CS_GetEntries (S, L+1, I+1, 10) && - L[1]->OPC == OP65_LDX && - L[1]->AM == AM65_IMM && - L[2]->OPC == OP65_CLC && - L[3]->OPC == OP65_ADC && - (L[3]->AM == AM65_ABS || L[3]->AM == AM65_ZP) && - (L[4]->OPC == OP65_BCC || L[4]->OPC == OP65_JCC) && - L[4]->JumpTo != 0 && - L[4]->JumpTo->Owner == L[6] && - L[5]->OPC == OP65_INX && - CE_IsCallTo (L[6], "pushax") && - L[7]->OPC == OP65_LDX && - L[8]->OPC == OP65_LDA && - L[9]->OPC == OP65_LDY && - CE_KnownImm (L[9]) && - L[9]->Num == 0 && - CE_IsCallTo (L[10], "staspidx") && - !CS_RangeHasLabel (S, I+1, 5) && - !CS_RangeHasLabel (S, I+7, 4) && - /* Check the label last because this is quite costly */ - (Len = strlen (L[0]->Arg)) > 3 && - L[0]->Arg[0] == '<' && - L[0]->Arg[1] == '(' && - strlen (L[1]->Arg) == Len && - L[1]->Arg[0] == '>' && - memcmp (L[0]->Arg+1, L[1]->Arg+1, Len-1) == 0) { - - CodeEntry* X; - char* Label; - - /* We will create all the new stuff behind the current one so - * we keep the line references. - */ - X = NewCodeEntry (OP65_LDY, L[3]->AM, L[3]->Arg, 0, L[0]->LI); - CS_InsertEntry (S, X, I+11); - - X = NewCodeEntry (OP65_LDX, L[7]->AM, L[7]->Arg, 0, L[7]->LI); - CS_InsertEntry (S, X, I+12); - - X = NewCodeEntry (OP65_LDA, L[8]->AM, L[8]->Arg, 0, L[8]->LI); - CS_InsertEntry (S, X, I+13); - - Label = memcpy (xmalloc (Len-2), L[0]->Arg+2, Len-3); - Label[Len-3] = '\0'; - X = NewCodeEntry (OP65_STA, AM65_ABSY, Label, 0, L[10]->LI); - CS_InsertEntry (S, X, I+14); - xfree (Label); - - /* Remove the old code */ - CS_DelEntries (S, I, 11); - - /* Remember, we had changes */ - ++Changes; - - } - - /* Next entry */ - ++I; + /* lda (sp),y */ + X = NewCodeEntry (OP65_LDA, AM65_ZP_INDY, "sp", 0, L[0]->LI); + CS_InsertEntry (S, X, I+6); - } + /* sta abs */ + X = NewCodeEntry (OP65_STA, L[1]->AM, L[1]->Arg, 0, L[1]->LI); + CS_InsertEntry (S, X, I+7); - /* Return the number of changes made */ - return Changes; -} + /* Now remove the call to the subroutine and the sta/stx */ + CS_DelEntries (S, I, 3); + } else { + /* Standard replacement */ -/*****************************************************************************/ -/* Optimize loads through pointers */ -/*****************************************************************************/ + /* lda (sp),y */ + X = NewCodeEntry (OP65_LDA, AM65_ZP_INDY, "sp", 0, L[0]->LI); + CS_InsertEntry (S, X, I+1); + /* tax */ + X = NewCodeEntry (OP65_TAX, AM65_IMP, 0, 0, L[0]->LI); + CS_InsertEntry (S, X, I+2); + /* dey */ + X = NewCodeEntry (OP65_DEY, AM65_IMP, 0, 0, L[0]->LI); + CS_InsertEntry (S, X, I+3); -static unsigned OptPtrLoad1 (CodeSeg* S) -/* Search for the sequence: - * - * clc - * adc xxx - * tay - * txa - * adc yyy - * tax - * tya - * ldy - * jsr ldauidx - * - * and replace it by: - * - * clc - * adc xxx - * sta ptr1 - * txa - * adc yyy - * sta ptr1+1 - * ldy - * ldx #$00 - * lda (ptr1),y - */ -{ - unsigned Changes = 0; + /* lda (sp),y */ + X = NewCodeEntry (OP65_LDA, AM65_ZP_INDY, "sp", 0, L[0]->LI); + CS_InsertEntry (S, X, I+4); - /* Walk over the entries */ - unsigned I = 0; - while (I < CS_GetEntryCount (S)) { + /* Now remove the call to the subroutine */ + CS_DelEntry (S, I); - CodeEntry* L[9]; - - /* Get next entry */ - L[0] = CS_GetEntry (S, I); - - /* Check for the sequence */ - if (L[0]->OPC == OP65_CLC && - CS_GetEntries (S, L+1, I+1, 8) && - L[1]->OPC == OP65_ADC && - L[2]->OPC == OP65_TAY && - L[3]->OPC == OP65_TXA && - L[4]->OPC == OP65_ADC && - L[5]->OPC == OP65_TAX && - L[6]->OPC == OP65_TYA && - L[7]->OPC == OP65_LDY && - CE_IsCallTo (L[8], "ldauidx") && - !CS_RangeHasLabel (S, I+1, 8)) { - - CodeEntry* X; - CodeEntry* P; - - /* Track the insertion point */ - unsigned IP = I+2; - - /* sta ptr1 */ - X = NewCodeEntry (OP65_STA, AM65_ZP, "ptr1", 0, L[2]->LI); - CS_InsertEntry (S, X, IP++); - - /* If the instruction before the clc is a ldx, replace the - * txa by an lda with the same location of the ldx. Otherwise - * transfer the value in X to A. - */ - if ((P = CS_GetPrevEntry (S, I)) != 0 && - P->OPC == OP65_LDX && - !CE_HasLabel (P)) { - X = NewCodeEntry (OP65_LDA, P->AM, P->Arg, 0, P->LI); - } else { - X = NewCodeEntry (OP65_TXA, AM65_IMP, 0, 0, L[3]->LI); } - CS_InsertEntry (S, X, IP++); - - /* adc yyy */ - X = NewCodeEntry (OP65_ADC, L[4]->AM, L[4]->Arg, 0, L[4]->LI); - CS_InsertEntry (S, X, IP++); - /* sta ptr1+1 */ - X = NewCodeEntry (OP65_STA, AM65_ZP, "ptr1+1", 0, L[5]->LI); - CS_InsertEntry (S, X, IP++); - - /* ldy ... */ - X = NewCodeEntry (OP65_LDY, L[7]->AM, L[7]->Arg, 0, L[7]->LI); - CS_InsertEntry (S, X, IP++); - - /* ldx #$00 */ - X = NewCodeEntry (OP65_LDX, AM65_IMM, "$00", 0, L[8]->LI); - CS_InsertEntry (S, X, IP++); - - /* lda (ptr1),y */ - X = NewCodeEntry (OP65_LDA, AM65_ZP_INDY, "ptr1", 0, L[8]->LI); - CS_InsertEntry (S, X, IP++); - - /* Remove the old instructions */ - CS_DelEntries (S, IP, 7); - - /* Remember, we had changes */ - ++Changes; - - } + /* Remember, we had changes */ + ++Changes; - /* Next entry */ - ++I; + } + /* Next entry */ + ++I; } /* Return the number of changes made */ @@ -804,181 +227,62 @@ static unsigned OptPtrLoad1 (CodeSeg* S) -static unsigned OptPtrLoad2 (CodeSeg* S) -/* Search for the sequence: - * - * adc xxx - * pha - * txa - * iny - * adc yyy - * tax - * pla - * ldy - * jsr ldauidx - * - * and replace it by: - * - * adc xxx - * sta ptr1 - * txa - * iny - * adc yyy - * sta ptr1+1 - * ldy - * ldx #$00 - * lda (ptr1),y - */ +static unsigned OptLoad3 (CodeSeg* S) +/* Remove repeated loads from one and the same memory location */ { unsigned Changes = 0; + CodeEntry* Load = 0; /* Walk over the entries */ unsigned I = 0; while (I < CS_GetEntryCount (S)) { - CodeEntry* L[9]; - - /* Get next entry */ - L[0] = CS_GetEntry (S, I); - - /* Check for the sequence */ - if (L[0]->OPC == OP65_ADC && - CS_GetEntries (S, L+1, I+1, 8) && - L[1]->OPC == OP65_PHA && - L[2]->OPC == OP65_TXA && - L[3]->OPC == OP65_INY && - L[4]->OPC == OP65_ADC && - L[5]->OPC == OP65_TAX && - L[6]->OPC == OP65_PLA && - L[7]->OPC == OP65_LDY && - CE_IsCallTo (L[8], "ldauidx") && - !CS_RangeHasLabel (S, I+1, 8)) { - - CodeEntry* X; - - /* Store the low byte and remove the PHA instead */ - X = NewCodeEntry (OP65_STA, AM65_ZP, "ptr1", 0, L[0]->LI); - CS_InsertEntry (S, X, I+1); + /* Get next entry */ + CodeEntry* E = CS_GetEntry (S, I); - /* Store the high byte */ - X = NewCodeEntry (OP65_STA, AM65_ZP, "ptr1+1", 0, L[4]->LI); - CS_InsertEntry (S, X, I+6); - - /* Load high and low byte */ - X = NewCodeEntry (OP65_LDX, AM65_IMM, "$00", 0, L[6]->LI); - CS_InsertEntry (S, X, I+10); - X = NewCodeEntry (OP65_LDA, AM65_ZP_INDY, "ptr1", 0, L[6]->LI); - CS_InsertEntry (S, X, I+11); - - /* Delete the old code */ - CS_DelEntry (S, I+12); /* jsr ldauidx */ - CS_DelEntry (S, I+8); /* pla */ - CS_DelEntry (S, I+7); /* tax */ - CS_DelEntry (S, I+2); /* pha */ + /* Forget a preceeding load if we have a label */ + if (Load && CE_HasLabel (E)) { + Load = 0; + } - /* Remember, we had changes */ - ++Changes; + /* Check if this insn is a load */ + if (E->Info & OF_LOAD) { - } + CodeEntry* N; - /* Next entry */ - ++I; + /* If we had a preceeding load that is identical, remove this one. + ** If it is not identical, or we didn't have one, remember it. + */ + if (Load != 0 && + E->OPC == Load->OPC && + E->AM == Load->AM && + ((E->Arg == 0 && Load->Arg == 0) || + strcmp (E->Arg, Load->Arg) == 0) && + (N = CS_GetNextEntry (S, I)) != 0 && + (N->Info & OF_CBRA) == 0) { - } + /* Now remove the call to the subroutine */ + CS_DelEntry (S, I); - /* Return the number of changes made */ - return Changes; -} + /* Remember, we had changes */ + ++Changes; + /* Next insn */ + continue; + } else { -static unsigned OptPtrLoad3 (CodeSeg* S) -/* Search for the sequence: - * - * lda #<(label+0) - * ldx #>(label+0) - * clc - * adc xxx - * bcc L - * inx - * L: ldy #$00 - * jsr ldauidx - * - * and replace it by: - * - * ldy xxx - * ldx #$00 - * lda label,y - */ -{ - unsigned Changes = 0; + Load = E; - /* Walk over the entries */ - unsigned I = 0; - while (I < CS_GetEntryCount (S)) { + } - CodeEntry* L[8]; - unsigned Len; - - /* Get next entry */ - L[0] = CS_GetEntry (S, I); - - /* Check for the sequence */ - if (L[0]->OPC == OP65_LDA && - L[0]->AM == AM65_IMM && - CS_GetEntries (S, L+1, I+1, 7) && - L[1]->OPC == OP65_LDX && - L[1]->AM == AM65_IMM && - L[2]->OPC == OP65_CLC && - L[3]->OPC == OP65_ADC && - (L[3]->AM == AM65_ABS || L[3]->AM == AM65_ZP) && - (L[4]->OPC == OP65_BCC || L[4]->OPC == OP65_JCC) && - L[4]->JumpTo != 0 && - L[4]->JumpTo->Owner == L[6] && - L[5]->OPC == OP65_INX && - L[6]->OPC == OP65_LDY && - CE_KnownImm (L[6]) && - L[6]->Num == 0 && - CE_IsCallTo (L[7], "ldauidx") && - !CS_RangeHasLabel (S, I+1, 5) && - !CE_HasLabel (L[7]) && - /* Check the label last because this is quite costly */ - (Len = strlen (L[0]->Arg)) > 3 && - L[0]->Arg[0] == '<' && - L[0]->Arg[1] == '(' && - strlen (L[1]->Arg) == Len && - L[1]->Arg[0] == '>' && - memcmp (L[0]->Arg+1, L[1]->Arg+1, Len-1) == 0) { - - CodeEntry* X; - char* Label; - - /* We will create all the new stuff behind the current one so - * we keep the line references. - */ - X = NewCodeEntry (OP65_LDY, L[3]->AM, L[3]->Arg, 0, L[0]->LI); - CS_InsertEntry (S, X, I+8); - - X = NewCodeEntry (OP65_LDX, AM65_IMM, "$00", 0, L[0]->LI); - CS_InsertEntry (S, X, I+9); - - Label = memcpy (xmalloc (Len-2), L[0]->Arg+2, Len-3); - Label[Len-3] = '\0'; - X = NewCodeEntry (OP65_LDA, AM65_ABSY, Label, 0, L[0]->LI); - CS_InsertEntry (S, X, I+10); - xfree (Label); - - /* Remove the old code */ - CS_DelEntries (S, I, 8); - - /* Remember, we had changes */ - ++Changes; - - } - - /* Next entry */ - ++I; + } else if ((E->Info & OF_CMP) == 0 && (E->Info & OF_CBRA) == 0) { + /* Forget the first load on occurance of any insn we don't like */ + Load = 0; + } + /* Next entry */ + ++I; } /* Return the number of changes made */ @@ -987,321 +291,227 @@ static unsigned OptPtrLoad3 (CodeSeg* S) -static unsigned OptPtrLoad4 (CodeSeg* S) -/* Search for the sequence: - * - * lda #<(label+0) - * ldx #>(label+0) - * ldy #$xx - * clc - * adc (sp),y - * bcc L - * inx - * L: ldy #$00 - * jsr ldauidx - * - * and replace it by: - * - * ldy #$xx - * lda (sp),y - * tay - * ldx #$00 - * lda label,y - */ -{ - unsigned Changes = 0; - - /* Walk over the entries */ - unsigned I = 0; - while (I < CS_GetEntryCount (S)) { - - CodeEntry* L[9]; - unsigned Len; - - /* Get next entry */ - L[0] = CS_GetEntry (S, I); - - /* Check for the sequence */ - if (L[0]->OPC == OP65_LDA && - L[0]->AM == AM65_IMM && - CS_GetEntries (S, L+1, I+1, 8) && - L[1]->OPC == OP65_LDX && - L[1]->AM == AM65_IMM && - !CE_HasLabel (L[1]) && - L[2]->OPC == OP65_LDY && - CE_KnownImm (L[2]) && - !CE_HasLabel (L[2]) && - L[3]->OPC == OP65_CLC && - !CE_HasLabel (L[3]) && - L[4]->OPC == OP65_ADC && - L[4]->AM == AM65_ZP_INDY && - !CE_HasLabel (L[4]) && - (L[5]->OPC == OP65_BCC || L[5]->OPC == OP65_JCC) && - L[5]->JumpTo != 0 && - L[5]->JumpTo->Owner == L[7] && - !CE_HasLabel (L[5]) && - L[6]->OPC == OP65_INX && - !CE_HasLabel (L[6]) && - L[7]->OPC == OP65_LDY && - CE_KnownImm (L[7]) && - L[7]->Num == 0 && - CE_IsCallTo (L[8], "ldauidx") && - !CE_HasLabel (L[8]) && - /* Check the label last because this is quite costly */ - (Len = strlen (L[0]->Arg)) > 3 && - L[0]->Arg[0] == '<' && - L[0]->Arg[1] == '(' && - strlen (L[1]->Arg) == Len && - L[1]->Arg[0] == '>' && - memcmp (L[0]->Arg+1, L[1]->Arg+1, Len-1) == 0) { - - CodeEntry* X; - char* Label; - - /* Add the lda */ - X = NewCodeEntry (OP65_LDA, AM65_ZP_INDY, L[4]->Arg, 0, L[0]->LI); - CS_InsertEntry (S, X, I+3); - - /* Add the tay */ - X = NewCodeEntry (OP65_TAY, AM65_IMP, 0, 0, L[0]->LI); - CS_InsertEntry (S, X, I+4); - - /* Add the ldx */ - X = NewCodeEntry (OP65_LDX, AM65_IMM, "$00", 0, L[0]->LI); - CS_InsertEntry (S, X, I+5); - - /* Add the lda */ - Label = memcpy (xmalloc (Len-2), L[0]->Arg+2, Len-3); - Label[Len-3] = '\0'; - X = NewCodeEntry (OP65_LDA, AM65_ABSY, Label, 0, L[0]->LI); - CS_InsertEntry (S, X, I+6); - xfree (Label); - - /* Remove the old code */ - CS_DelEntries (S, I, 2); - CS_DelEntries (S, I+5, 6); - - /* Remember, we had changes */ - ++Changes; - - } - - /* Next entry */ - ++I; - - } - - /* Return the number of changes made */ - return Changes; -} +/*****************************************************************************/ +/* Decouple operations */ +/*****************************************************************************/ -static unsigned OptPtrLoad5 (CodeSeg* S) -/* Search for the sequence: - * - * lda regbank+n - * ldx regbank+n+1 - * sta regsave - * stx regsave+1 - * clc - * adc #$01 - * bcc L0005 - * inx - * L: sta regbank+n - * stx regbank+n+1 - * lda regsave - * ldx regsave+1 - * ldy #$00 - * jsr ldauidx - * - * and replace it by: - * - * ldy #$00 - * ldx #$00 - * lda (regbank+n),y - * inc regbank+n - * bne L1 - * inc regbank+n+1 - * L1: tay <- only if flags are used - * - * This function must execute before OptPtrLoad5! - * - */ +static unsigned OptDecouple (CodeSeg* S) +/* Decouple operations, that is, do the following replacements: +** +** dex -> ldx #imm +** inx -> ldx #imm +** dey -> ldy #imm +** iny -> ldy #imm +** tax -> ldx #imm +** txa -> lda #imm +** tay -> ldy #imm +** tya -> lda #imm +** lda zp -> lda #imm +** ldx zp -> ldx #imm +** ldy zp -> ldy #imm +** +** Provided that the register values are known of course. +*/ { unsigned Changes = 0; + unsigned I; /* Walk over the entries */ - unsigned I = 0; + I = 0; while (I < CS_GetEntryCount (S)) { - CodeEntry* L[15]; - unsigned Len; - - /* Get next entry */ - L[0] = CS_GetEntry (S, I); - - /* Check for the sequence */ - if (L[0]->OPC == OP65_LDA && - L[0]->AM == AM65_ZP && - strncmp (L[0]->Arg, "regbank+", 8) == 0 && - (Len = strlen (L[0]->Arg)) > 0 && - CS_GetEntries (S, L+1, I+1, 14) && - !CS_RangeHasLabel (S, I+1, 7) && - !CS_RangeHasLabel (S, I+9, 5) && - L[1]->OPC == OP65_LDX && - L[1]->AM == AM65_ZP && - strncmp (L[1]->Arg, L[0]->Arg, Len) == 0 && - strcmp (L[1]->Arg+Len, "+1") == 0 && - L[2]->OPC == OP65_STA && - L[2]->AM == AM65_ZP && - strcmp (L[2]->Arg, "regsave") == 0 && - L[3]->OPC == OP65_STX && - L[3]->AM == AM65_ZP && - strcmp (L[3]->Arg, "regsave+1") == 0 && - L[4]->OPC == OP65_CLC && - L[5]->OPC == OP65_ADC && - CE_KnownImm (L[5]) && - L[5]->Num == 1 && - L[6]->OPC == OP65_BCC && - L[6]->JumpTo != 0 && - L[6]->JumpTo->Owner == L[8] && - L[7]->OPC == OP65_INX && - L[8]->OPC == OP65_STA && - L[8]->AM == AM65_ZP && - strcmp (L[8]->Arg, L[0]->Arg) == 0 && - L[9]->OPC == OP65_STX && - L[9]->AM == AM65_ZP && - strcmp (L[9]->Arg, L[1]->Arg) == 0 && - L[10]->OPC == OP65_LDA && - L[10]->AM == AM65_ZP && - strcmp (L[10]->Arg, "regsave") == 0 && - L[11]->OPC == OP65_LDX && - L[11]->AM == AM65_ZP && - strcmp (L[11]->Arg, "regsave+1") == 0 && - L[12]->OPC == OP65_LDY && - CE_KnownImm (L[12]) && - CE_IsCallTo (L[13], "ldauidx")) { - - CodeEntry* X; - CodeLabel* Label; - - /* Check if the instruction following the sequence uses the flags - * set by the load. If so, insert a test of the value in the - * accumulator. - */ - if (CE_UseLoadFlags (L[14])) { - X = NewCodeEntry (OP65_TAY, AM65_IMP, 0, 0, L[13]->LI); - CS_InsertEntry (S, X, I+14); - } - - /* Attach a label to L[14]. This may be either the just inserted - * instruction, or the one following the sequence. - */ - Label = CS_GenLabel (S, L[14]); - - /* ldy #$xx */ - X = NewCodeEntry (OP65_LDY, AM65_IMM, L[12]->Arg, 0, L[12]->LI); - CS_InsertEntry (S, X, I+14); + const char* Arg; - /* ldx #$xx */ - X = NewCodeEntry (OP65_LDX, AM65_IMM, "$00", 0, L[13]->LI); - CS_InsertEntry (S, X, I+15); + /* Get next entry and it's input register values */ + CodeEntry* E = CS_GetEntry (S, I); + const RegContents* In = &E->RI->In; - /* lda (regbank+n),y */ - X = NewCodeEntry (OP65_LDA, AM65_ZP_INDY, L[0]->Arg, 0, L[13]->LI); - CS_InsertEntry (S, X, I+16); + /* Assume we have no replacement */ + CodeEntry* X = 0; - /* inc regbank+n */ - X = NewCodeEntry (OP65_INC, AM65_ZP, L[0]->Arg, 0, L[5]->LI); - CS_InsertEntry (S, X, I+17); - - /* bne ... */ - X = NewCodeEntry (OP65_BNE, AM65_BRA, Label->Name, Label, L[6]->LI); - CS_InsertEntry (S, X, I+18); - - /* inc regbank+n+1 */ - X = NewCodeEntry (OP65_INC, AM65_ZP, L[1]->Arg, 0, L[7]->LI); - CS_InsertEntry (S, X, I+19); - - /* Delete the old code */ - CS_DelEntries (S, I, 14); - - /* Remember, we had changes */ - ++Changes; - - } - - /* Next entry */ - ++I; - - } - - /* Return the number of changes made */ - return Changes; -} + /* Check the instruction */ + switch (E->OPC) { + case OP65_DEA: + if (RegValIsKnown (In->RegA)) { + Arg = MakeHexArg ((In->RegA - 1) & 0xFF); + X = NewCodeEntry (OP65_LDA, AM65_IMM, Arg, 0, E->LI); + } + break; + case OP65_DEX: + if (RegValIsKnown (In->RegX)) { + Arg = MakeHexArg ((In->RegX - 1) & 0xFF); + X = NewCodeEntry (OP65_LDX, AM65_IMM, Arg, 0, E->LI); + } + break; -static unsigned OptPtrLoad6 (CodeSeg* S) -/* Search for the sequence: - * - * lda zp - * ldx zp+1 - * ldy xx - * jsr ldauidx - * - * and replace it by: - * - * ldy xx - * ldx #$00 - * lda (zp),y - */ -{ - unsigned Changes = 0; + case OP65_DEY: + if (RegValIsKnown (In->RegY)) { + Arg = MakeHexArg ((In->RegY - 1) & 0xFF); + X = NewCodeEntry (OP65_LDY, AM65_IMM, Arg, 0, E->LI); + } + break; - /* Walk over the entries */ - unsigned I = 0; - while (I < CS_GetEntryCount (S)) { + case OP65_INA: + if (RegValIsKnown (In->RegA)) { + Arg = MakeHexArg ((In->RegA + 1) & 0xFF); + X = NewCodeEntry (OP65_LDA, AM65_IMM, Arg, 0, E->LI); + } + break; - CodeEntry* L[4]; - unsigned Len; + case OP65_INX: + if (RegValIsKnown (In->RegX)) { + Arg = MakeHexArg ((In->RegX + 1) & 0xFF); + X = NewCodeEntry (OP65_LDX, AM65_IMM, Arg, 0, E->LI); + } + break; - /* Get next entry */ - L[0] = CS_GetEntry (S, I); + case OP65_INY: + if (RegValIsKnown (In->RegY)) { + Arg = MakeHexArg ((In->RegY + 1) & 0xFF); + X = NewCodeEntry (OP65_LDY, AM65_IMM, Arg, 0, E->LI); + } + break; + + case OP65_LDA: + if (E->AM == AM65_ZP) { + switch (GetKnownReg (E->Use & REG_ZP, In)) { + case REG_TMP1: + Arg = MakeHexArg (In->Tmp1); + X = NewCodeEntry (OP65_LDA, AM65_IMM, Arg, 0, E->LI); + break; + + case REG_PTR1_LO: + Arg = MakeHexArg (In->Ptr1Lo); + X = NewCodeEntry (OP65_LDA, AM65_IMM, Arg, 0, E->LI); + break; + + case REG_PTR1_HI: + Arg = MakeHexArg (In->Ptr1Hi); + X = NewCodeEntry (OP65_LDA, AM65_IMM, Arg, 0, E->LI); + break; + + case REG_SREG_LO: + Arg = MakeHexArg (In->SRegLo); + X = NewCodeEntry (OP65_LDA, AM65_IMM, Arg, 0, E->LI); + break; + + case REG_SREG_HI: + Arg = MakeHexArg (In->SRegHi); + X = NewCodeEntry (OP65_LDA, AM65_IMM, Arg, 0, E->LI); + break; + } + } + break; + + case OP65_LDX: + if (E->AM == AM65_ZP) { + switch (GetKnownReg (E->Use & REG_ZP, In)) { + case REG_TMP1: + Arg = MakeHexArg (In->Tmp1); + X = NewCodeEntry (OP65_LDX, AM65_IMM, Arg, 0, E->LI); + break; + + case REG_PTR1_LO: + Arg = MakeHexArg (In->Ptr1Lo); + X = NewCodeEntry (OP65_LDX, AM65_IMM, Arg, 0, E->LI); + break; + + case REG_PTR1_HI: + Arg = MakeHexArg (In->Ptr1Hi); + X = NewCodeEntry (OP65_LDX, AM65_IMM, Arg, 0, E->LI); + break; + + case REG_SREG_LO: + Arg = MakeHexArg (In->SRegLo); + X = NewCodeEntry (OP65_LDX, AM65_IMM, Arg, 0, E->LI); + break; + + case REG_SREG_HI: + Arg = MakeHexArg (In->SRegHi); + X = NewCodeEntry (OP65_LDX, AM65_IMM, Arg, 0, E->LI); + break; + } + } + break; + + case OP65_LDY: + if (E->AM == AM65_ZP) { + switch (GetKnownReg (E->Use, In)) { + case REG_TMP1: + Arg = MakeHexArg (In->Tmp1); + X = NewCodeEntry (OP65_LDY, AM65_IMM, Arg, 0, E->LI); + break; + + case REG_PTR1_LO: + Arg = MakeHexArg (In->Ptr1Lo); + X = NewCodeEntry (OP65_LDY, AM65_IMM, Arg, 0, E->LI); + break; + + case REG_PTR1_HI: + Arg = MakeHexArg (In->Ptr1Hi); + X = NewCodeEntry (OP65_LDY, AM65_IMM, Arg, 0, E->LI); + break; + + case REG_SREG_LO: + Arg = MakeHexArg (In->SRegLo); + X = NewCodeEntry (OP65_LDY, AM65_IMM, Arg, 0, E->LI); + break; + + case REG_SREG_HI: + Arg = MakeHexArg (In->SRegHi); + X = NewCodeEntry (OP65_LDY, AM65_IMM, Arg, 0, E->LI); + break; + } + } + break; - /* Check for the sequence */ - if (L[0]->OPC == OP65_LDA && L[0]->AM == AM65_ZP && - CS_GetEntries (S, L+1, I+1, 3) && - !CS_RangeHasLabel (S, I+1, 3) && - L[1]->OPC == OP65_LDX && L[1]->AM == AM65_ZP && - (Len = strlen (L[0]->Arg)) > 0 && - strncmp (L[0]->Arg, L[1]->Arg, Len) == 0 && - strcmp (L[1]->Arg + Len, "+1") == 0 && - L[2]->OPC == OP65_LDY && - CE_IsCallTo (L[3], "ldauidx")) { + case OP65_TAX: + if (E->RI->In.RegA >= 0) { + Arg = MakeHexArg (In->RegA); + X = NewCodeEntry (OP65_LDX, AM65_IMM, Arg, 0, E->LI); + } + break; - CodeEntry* X; + case OP65_TAY: + if (E->RI->In.RegA >= 0) { + Arg = MakeHexArg (In->RegA); + X = NewCodeEntry (OP65_LDY, AM65_IMM, Arg, 0, E->LI); + } + break; - /* ldx #$00 */ - X = NewCodeEntry (OP65_LDX, AM65_IMM, "$00", 0, L[3]->LI); - CS_InsertEntry (S, X, I+3); + case OP65_TXA: + if (E->RI->In.RegX >= 0) { + Arg = MakeHexArg (In->RegX); + X = NewCodeEntry (OP65_LDA, AM65_IMM, Arg, 0, E->LI); + } + break; - /* lda (zp),y */ - X = NewCodeEntry (OP65_LDA, AM65_ZP_INDY, L[0]->Arg, 0, L[3]->LI); - CS_InsertEntry (S, X, I+4); + case OP65_TYA: + if (E->RI->In.RegY >= 0) { + Arg = MakeHexArg (In->RegY); + X = NewCodeEntry (OP65_LDA, AM65_IMM, Arg, 0, E->LI); + } + break; - /* Remove the old code */ - CS_DelEntry (S, I+5); - CS_DelEntries (S, I, 2); + default: + /* Avoid gcc warnings */ + break; - /* Remember, we had changes */ - ++Changes; + } - } + /* Insert the replacement if we have one */ + if (X) { + CS_InsertEntry (S, X, I+1); + CS_DelEntry (S, I); + ++Changes; + } - /* Next entry */ - ++I; + /* Next entry */ + ++I; } @@ -1311,144 +521,91 @@ static unsigned OptPtrLoad6 (CodeSeg* S) -static unsigned OptPtrLoad7 (CodeSeg* S) -/* Search for the sequence: - * - * lda zp - * ldx zp+1 - * ldy xx - * jsr ldaxidx - * - * and replace it by: - * - * ldy xx - * lda (zp),y - * tax - * dey - * lda (zp),y - */ -{ - unsigned Changes = 0; - - /* Walk over the entries */ - unsigned I = 0; - while (I < CS_GetEntryCount (S)) { - - CodeEntry* L[4]; - unsigned Len; - - /* Get next entry */ - L[0] = CS_GetEntry (S, I); - - /* Check for the sequence */ - if (L[0]->OPC == OP65_LDA && L[0]->AM == AM65_ZP && - CS_GetEntries (S, L+1, I+1, 3) && - !CS_RangeHasLabel (S, I+1, 3) && - L[1]->OPC == OP65_LDX && L[1]->AM == AM65_ZP && - (Len = strlen (L[0]->Arg)) > 0 && - strncmp (L[0]->Arg, L[1]->Arg, Len) == 0 && - strcmp (L[1]->Arg + Len, "+1") == 0 && - L[2]->OPC == OP65_LDY && - CE_IsCallTo (L[3], "ldaxidx")) { - - CodeEntry* X; - - /* lda (zp),y */ - X = NewCodeEntry (OP65_LDA, AM65_ZP_INDY, L[0]->Arg, 0, L[3]->LI); - CS_InsertEntry (S, X, I+4); - - /* tax */ - X = NewCodeEntry (OP65_TAX, AM65_IMP, 0, 0, L[3]->LI); - CS_InsertEntry (S, X, I+5); - - /* dey */ - X = NewCodeEntry (OP65_DEY, AM65_IMP, 0, 0, L[3]->LI); - CS_InsertEntry (S, X, I+6); - - /* lda (zp),y */ - X = NewCodeEntry (OP65_LDA, AM65_ZP_INDY, L[0]->Arg, 0, L[3]->LI); - CS_InsertEntry (S, X, I+7); - - /* Remove the old code */ - CS_DelEntry (S, I+3); - CS_DelEntries (S, I, 2); - - /* Remember, we had changes */ - ++Changes; +/*****************************************************************************/ +/* Optimize stack pointer ops */ +/*****************************************************************************/ - } - /* Next entry */ - ++I; +static unsigned IsDecSP (const CodeEntry* E) +/* Check if this is an insn that decrements the stack pointer. If so, return +** the decrement. If not, return zero. +** The function expects E to be a subroutine call. +*/ +{ + if (strncmp (E->Arg, "decsp", 5) == 0) { + if (E->Arg[5] >= '1' && E->Arg[5] <= '8') { + return (E->Arg[5] - '0'); + } + } else if (strcmp (E->Arg, "subysp") == 0 && RegValIsKnown (E->RI->In.RegY)) { + return E->RI->In.RegY; } - /* Return the number of changes made */ - return Changes; + /* If we come here, it's not a decsp op */ + return 0; } -static unsigned OptPtrLoad8 (CodeSeg* S) -/* Search for the sequence - * - * ldy ... - * jsr ldauidx - * - * and replace it by: - * - * ldy ... - * stx ptr1+1 - * sta ptr1 - * ldx #$00 - * lda (ptr1),y - * - * This step must be executed *after* OptPtrLoad1! - */ +static unsigned OptStackPtrOps (CodeSeg* S) +/* Merge adjacent calls to decsp into one. NOTE: This function won't merge all +** known cases! +*/ { unsigned Changes = 0; + unsigned I; /* Walk over the entries */ - unsigned I = 0; + I = 0; while (I < CS_GetEntryCount (S)) { - CodeEntry* L[2]; + unsigned Dec1; + unsigned Dec2; + const CodeEntry* N; - /* Get next entry */ - L[0] = CS_GetEntry (S, I); + /* Get the next entry */ + const CodeEntry* E = CS_GetEntry (S, I); - /* Check for the sequence */ - if (L[0]->OPC == OP65_LDY && - CS_GetEntries (S, L+1, I+1, 1) && - CE_IsCallTo (L[1], "ldauidx") && - !CE_HasLabel (L[1])) { + /* Check for decspn or subysp */ + if (E->OPC == OP65_JSR && + (Dec1 = IsDecSP (E)) > 0 && + (N = CS_GetNextEntry (S, I)) != 0 && + (Dec2 = IsDecSP (N)) > 0 && + (Dec1 += Dec2) <= 255 && + !CE_HasLabel (N)) { - CodeEntry* X; - - /* Store the high byte */ - X = NewCodeEntry (OP65_STA, AM65_ZP, "ptr1", 0, L[0]->LI); - CS_InsertEntry (S, X, I+1); + CodeEntry* X; + char Buf[20]; - /* Store the low byte */ - X = NewCodeEntry (OP65_STX, AM65_ZP, "ptr1+1", 0, L[0]->LI); - CS_InsertEntry (S, X, I+2); + /* We can combine the two */ + if (Dec1 <= 8) { + /* Insert a call to decsp */ + xsprintf (Buf, sizeof (Buf), "decsp%u", Dec1); + X = NewCodeEntry (OP65_JSR, AM65_ABS, Buf, 0, N->LI); + CS_InsertEntry (S, X, I+2); + } else { + /* Insert a call to subysp */ + const char* Arg = MakeHexArg (Dec1); + X = NewCodeEntry (OP65_LDY, AM65_IMM, Arg, 0, N->LI); + CS_InsertEntry (S, X, I+2); + X = NewCodeEntry (OP65_JSR, AM65_ABS, "subysp", 0, N->LI); + CS_InsertEntry (S, X, I+3); + } - /* Delete the call to ldauidx */ - CS_DelEntry (S, I+3); + /* Delete the old code */ + CS_DelEntries (S, I, 2); - /* Load the high and low byte */ - X = NewCodeEntry (OP65_LDX, AM65_IMM, "$00", 0, L[0]->LI); - CS_InsertEntry (S, X, I+3); - X = NewCodeEntry (OP65_LDA, AM65_ZP_INDY, "ptr1", 0, L[0]->LI); - CS_InsertEntry (S, X, I+4); + /* Regenerate register info */ + CS_GenRegInfo (S); - /* Remember, we had changes */ - ++Changes; + /* Remember we had changes */ + ++Changes; - } + } else { - /* Next entry */ - ++I; + /* Next entry */ + ++I; + } } @@ -1456,246 +613,119 @@ static unsigned OptPtrLoad8 (CodeSeg* S) return Changes; } - - -/*****************************************************************************/ -/* Decouple operations */ -/*****************************************************************************/ - - - -static unsigned OptDecouple (CodeSeg* S) -/* Decouple operations, that is, do the following replacements: - * - * dex -> ldx #imm - * inx -> ldx #imm - * dey -> ldy #imm - * iny -> ldy #imm - * tax -> ldx #imm - * txa -> lda #imm - * tay -> ldy #imm - * tya -> lda #imm - * lda zp -> lda #imm - * ldx zp -> ldx #imm - * ldy zp -> ldy #imm - * - * Provided that the register values are known of course. - */ +static unsigned OptGotoSPAdj (CodeSeg* S) +/* Optimize SP adjustment for forward 'goto' */ { unsigned Changes = 0; unsigned I; - /* Generate register info for the following step */ - CS_GenRegInfo (S); - /* Walk over the entries */ I = 0; while (I < CS_GetEntryCount (S)) { - const char* Arg; - - /* Get next entry and it's input register values */ - CodeEntry* E = CS_GetEntry (S, I); - const RegContents* In = &E->RI->In; - - /* Assume we have no replacement */ - CodeEntry* X = 0; - - /* Check the instruction */ - switch (E->OPC) { - - case OP65_DEA: - if (RegValIsKnown (In->RegA)) { - Arg = MakeHexArg ((In->RegA - 1) & 0xFF); - X = NewCodeEntry (OP65_LDA, AM65_IMM, Arg, 0, E->LI); - } - break; - - case OP65_DEX: - if (RegValIsKnown (In->RegX)) { - Arg = MakeHexArg ((In->RegX - 1) & 0xFF); - X = NewCodeEntry (OP65_LDX, AM65_IMM, Arg, 0, E->LI); - } - break; - - case OP65_DEY: - if (RegValIsKnown (In->RegY)) { - Arg = MakeHexArg ((In->RegY - 1) & 0xFF); - X = NewCodeEntry (OP65_LDY, AM65_IMM, Arg, 0, E->LI); - } - break; - - case OP65_INA: - if (RegValIsKnown (In->RegA)) { - Arg = MakeHexArg ((In->RegA + 1) & 0xFF); - X = NewCodeEntry (OP65_LDA, AM65_IMM, Arg, 0, E->LI); - } - break; - - case OP65_INX: - if (RegValIsKnown (In->RegX)) { - Arg = MakeHexArg ((In->RegX + 1) & 0xFF); - X = NewCodeEntry (OP65_LDX, AM65_IMM, Arg, 0, E->LI); - } - break; - - case OP65_INY: - if (RegValIsKnown (In->RegY)) { - Arg = MakeHexArg ((In->RegY + 1) & 0xFF); - X = NewCodeEntry (OP65_LDY, AM65_IMM, Arg, 0, E->LI); - } - break; - - case OP65_LDA: - if (E->AM == AM65_ZP) { - switch (GetKnownReg (E->Use & REG_ZP, In)) { - case REG_TMP1: - Arg = MakeHexArg (In->Tmp1); - X = NewCodeEntry (OP65_LDA, AM65_IMM, Arg, 0, E->LI); - break; - - case REG_PTR1_LO: - Arg = MakeHexArg (In->Ptr1Lo); - X = NewCodeEntry (OP65_LDA, AM65_IMM, Arg, 0, E->LI); - break; - - case REG_PTR1_HI: - Arg = MakeHexArg (In->Ptr1Hi); - X = NewCodeEntry (OP65_LDA, AM65_IMM, Arg, 0, E->LI); - break; - - case REG_SREG_LO: - Arg = MakeHexArg (In->SRegLo); - X = NewCodeEntry (OP65_LDA, AM65_IMM, Arg, 0, E->LI); - break; - - case REG_SREG_HI: - Arg = MakeHexArg (In->SRegHi); - X = NewCodeEntry (OP65_LDA, AM65_IMM, Arg, 0, E->LI); - break; - } - } - break; - - case OP65_LDX: - if (E->AM == AM65_ZP) { - switch (GetKnownReg (E->Use & REG_ZP, In)) { - case REG_TMP1: - Arg = MakeHexArg (In->Tmp1); - X = NewCodeEntry (OP65_LDX, AM65_IMM, Arg, 0, E->LI); - break; - - case REG_PTR1_LO: - Arg = MakeHexArg (In->Ptr1Lo); - X = NewCodeEntry (OP65_LDX, AM65_IMM, Arg, 0, E->LI); - break; - - case REG_PTR1_HI: - Arg = MakeHexArg (In->Ptr1Hi); - X = NewCodeEntry (OP65_LDX, AM65_IMM, Arg, 0, E->LI); - break; - - case REG_SREG_LO: - Arg = MakeHexArg (In->SRegLo); - X = NewCodeEntry (OP65_LDX, AM65_IMM, Arg, 0, E->LI); - break; - - case REG_SREG_HI: - Arg = MakeHexArg (In->SRegHi); - X = NewCodeEntry (OP65_LDX, AM65_IMM, Arg, 0, E->LI); - break; - } - } - break; - - case OP65_LDY: - if (E->AM == AM65_ZP) { - switch (GetKnownReg (E->Use, In)) { - case REG_TMP1: - Arg = MakeHexArg (In->Tmp1); - X = NewCodeEntry (OP65_LDY, AM65_IMM, Arg, 0, E->LI); - break; - - case REG_PTR1_LO: - Arg = MakeHexArg (In->Ptr1Lo); - X = NewCodeEntry (OP65_LDY, AM65_IMM, Arg, 0, E->LI); - break; - - case REG_PTR1_HI: - Arg = MakeHexArg (In->Ptr1Hi); - X = NewCodeEntry (OP65_LDY, AM65_IMM, Arg, 0, E->LI); - break; - - case REG_SREG_LO: - Arg = MakeHexArg (In->SRegLo); - X = NewCodeEntry (OP65_LDY, AM65_IMM, Arg, 0, E->LI); - break; - - case REG_SREG_HI: - Arg = MakeHexArg (In->SRegHi); - X = NewCodeEntry (OP65_LDY, AM65_IMM, Arg, 0, E->LI); - break; - } - } - break; - - case OP65_TAX: - if (E->RI->In.RegA >= 0) { - Arg = MakeHexArg (In->RegA); - X = NewCodeEntry (OP65_LDX, AM65_IMM, Arg, 0, E->LI); - } - break; - - case OP65_TAY: - if (E->RI->In.RegA >= 0) { - Arg = MakeHexArg (In->RegA); - X = NewCodeEntry (OP65_LDY, AM65_IMM, Arg, 0, E->LI); - } - break; - - case OP65_TXA: - if (E->RI->In.RegX >= 0) { - Arg = MakeHexArg (In->RegX); - X = NewCodeEntry (OP65_LDA, AM65_IMM, Arg, 0, E->LI); - } - break; - - case OP65_TYA: - if (E->RI->In.RegY >= 0) { - Arg = MakeHexArg (In->RegY); - X = NewCodeEntry (OP65_LDA, AM65_IMM, Arg, 0, E->LI); - } - break; - - default: - /* Avoid gcc warnings */ - break; - - } - - /* Insert the replacement if we have one */ - if (X) { - CS_InsertEntry (S, X, I+1); - CS_DelEntry (S, I); - ++Changes; - } - - /* Next entry */ - ++I; + CodeEntry* L[10], *X; + unsigned short adjustment; + const char* Arg; + + /* Get next entry */ + L[0] = CS_GetEntry (S, I); + + /* Check for the sequence generated by g_lateadjustSP */ + if (L[0]->OPC == OP65_PHA && + CS_GetEntries (S, L+1, I+1, 9) && + L[1]->OPC == OP65_LDA && + L[1]->AM == AM65_ABS && + L[2]->OPC == OP65_CLC && + L[3]->OPC == OP65_ADC && + strcmp (L[3]->Arg, "sp") == 0 && + L[6]->OPC == OP65_ADC && + strcmp (L[6]->Arg, "sp+1") == 0 && + L[9]->OPC == OP65_JMP) { + adjustment = FindSPAdjustment (L[1]->Arg); + + if (adjustment == 0) { + /* No SP adjustment needed, remove the whole sequence */ + CS_DelEntries (S, I, 9); + } + else if (adjustment >= 65536 - 8) { + /* If adjustment is in range [-8, 0) we use decsp* calls */ + char Buf[20]; + adjustment = 65536 - adjustment; + xsprintf (Buf, sizeof (Buf), "decsp%u", adjustment); + X = NewCodeEntry (OP65_JSR, AM65_ABS, Buf, 0, L[1]->LI); + CS_InsertEntry (S, X, I + 9); + + /* Delete the old code */ + CS_DelEntries (S, I, 9); + } + else if (adjustment >= 65536 - 255) { + /* For range [-255, -8) we have ldy #, jsr subysp */ + adjustment = 65536 - adjustment; + Arg = MakeHexArg (adjustment); + X = NewCodeEntry (OP65_LDY, AM65_IMM, Arg, 0, L[1]->LI); + CS_InsertEntry (S, X, I + 9); + X = NewCodeEntry (OP65_JSR, AM65_ABS, "subysp", 0, L[1]->LI); + CS_InsertEntry (S, X, I + 10); + + /* Delete the old code */ + CS_DelEntries (S, I, 9); + } + else if (adjustment > 255) { + /* For ranges [-32768, 255) and (255, 32767) the only modification + ** is to replace the absolute with immediate addressing + */ + Arg = MakeHexArg (adjustment & 0xff); + X = NewCodeEntry (OP65_LDA, AM65_IMM, Arg, 0, L[1]->LI); + CS_InsertEntry (S, X, I + 1); + Arg = MakeHexArg (adjustment >> 8); + X = NewCodeEntry (OP65_LDA, AM65_IMM, Arg, 0, L[5]->LI); + CS_InsertEntry (S, X, I + 6); + + /* Delete the old code */ + CS_DelEntry (S, I + 2); + CS_DelEntry (S, I + 6); + } + else if (adjustment > 8) { + /* For range (8, 255] we have ldy #, jsr addysp */ + Arg = MakeHexArg (adjustment & 0xff); + X = NewCodeEntry (OP65_LDY, AM65_IMM, Arg, 0, L[1]->LI); + CS_InsertEntry (S, X, I + 9); + X = NewCodeEntry (OP65_JSR, AM65_ABS, "addysp", 0, L[1]->LI); + CS_InsertEntry (S, X, I + 10); + + /* Delete the old code */ + CS_DelEntries (S, I, 9); + } + else { + /* If adjustment is in range (0, 8] we use incsp* calls */ + char Buf[20]; + xsprintf (Buf, sizeof (Buf), "incsp%u", adjustment); + X = NewCodeEntry (OP65_JSR, AM65_ABS, Buf, 0, L[1]->LI); + CS_InsertEntry (S, X, I + 9); + + /* Delete the old code */ + CS_DelEntries (S, I, 9); + } + /* Regenerate register info */ + CS_GenRegInfo (S); + + /* Remember we had changes */ + Changes++; - } + } else { - /* Free register info */ - CS_FreeRegInfo (S); + /* Next entry */ + ++I; + } + + } /* Return the number of changes made */ return Changes; } - - /*****************************************************************************/ -/* struct OptFunc */ +/* struct OptFunc */ /*****************************************************************************/ @@ -1705,7 +735,7 @@ struct OptFunc { unsigned (*Func) (CodeSeg*); /* Optimizer function */ const char* Name; /* Name of the function/group */ unsigned CodeSizeFactor; /* Code size factor for this opt func */ - unsigned long TotalRuns; /* Total number of runs */ + unsigned long TotalRuns; /* Total number of runs */ unsigned long LastRuns; /* Last number of runs */ unsigned long TotalChanges; /* Total number of changes */ unsigned long LastChanges; /* Last number of changes */ @@ -1715,79 +745,110 @@ struct OptFunc { /*****************************************************************************/ -/* Code */ +/* Code */ /*****************************************************************************/ /* A list of all the function descriptions */ static OptFunc DOpt65C02BitOps = { Opt65C02BitOps, "Opt65C02BitOps", 66, 0, 0, 0, 0, 0 }; -static OptFunc DOpt65C02Ind = { Opt65C02Ind, "Opt65C02Ind", 100, 0, 0, 0, 0, 0 }; +static OptFunc DOpt65C02Ind = { Opt65C02Ind, "Opt65C02Ind", 100, 0, 0, 0, 0, 0 }; static OptFunc DOpt65C02Stores = { Opt65C02Stores, "Opt65C02Stores", 100, 0, 0, 0, 0, 0 }; -static OptFunc DOptAdd1 = { OptAdd1, "OptAdd1", 125, 0, 0, 0, 0, 0 }; -static OptFunc DOptAdd2 = { OptAdd2, "OptAdd2", 200, 0, 0, 0, 0, 0 }; -static OptFunc DOptAdd3 = { OptAdd3, "OptAdd3", 90, 0, 0, 0, 0, 0 }; -static OptFunc DOptAdd4 = { OptAdd4, "OptAdd4", 100, 0, 0, 0, 0, 0 }; -static OptFunc DOptAdd5 = { OptAdd5, "OptAdd5", 40, 0, 0, 0, 0, 0 }; +static OptFunc DOptAdd1 = { OptAdd1, "OptAdd1", 125, 0, 0, 0, 0, 0 }; +static OptFunc DOptAdd2 = { OptAdd2, "OptAdd2", 200, 0, 0, 0, 0, 0 }; +static OptFunc DOptAdd3 = { OptAdd3, "OptAdd3", 65, 0, 0, 0, 0, 0 }; +static OptFunc DOptAdd4 = { OptAdd4, "OptAdd4", 90, 0, 0, 0, 0, 0 }; +static OptFunc DOptAdd5 = { OptAdd5, "OptAdd5", 100, 0, 0, 0, 0, 0 }; +static OptFunc DOptAdd6 = { OptAdd6, "OptAdd6", 40, 0, 0, 0, 0, 0 }; +static OptFunc DOptBNegA1 = { OptBNegA1, "OptBNegA1", 100, 0, 0, 0, 0, 0 }; +static OptFunc DOptBNegA2 = { OptBNegA2, "OptBNegA2", 100, 0, 0, 0, 0, 0 }; +static OptFunc DOptBNegAX1 = { OptBNegAX1, "OptBNegAX1", 100, 0, 0, 0, 0, 0 }; +static OptFunc DOptBNegAX2 = { OptBNegAX2, "OptBNegAX2", 100, 0, 0, 0, 0, 0 }; +static OptFunc DOptBNegAX3 = { OptBNegAX3, "OptBNegAX3", 100, 0, 0, 0, 0, 0 }; +static OptFunc DOptBNegAX4 = { OptBNegAX4, "OptBNegAX4", 100, 0, 0, 0, 0, 0 }; static OptFunc DOptBoolTrans = { OptBoolTrans, "OptBoolTrans", 100, 0, 0, 0, 0, 0 }; -static OptFunc DOptBranchDist = { OptBranchDist, "OptBranchDist", 0, 0, 0, 0, 0, 0 }; -static OptFunc DOptCmp1 = { OptCmp1, "OptCmp1", 42, 0, 0, 0, 0, 0 }; -static OptFunc DOptCmp2 = { OptCmp2, "OptCmp2", 85, 0, 0, 0, 0, 0 }; -static OptFunc DOptCmp3 = { OptCmp3, "OptCmp3", 75, 0, 0, 0, 0, 0 }; -static OptFunc DOptCmp4 = { OptCmp4, "OptCmp4", 75, 0, 0, 0, 0, 0 }; -static OptFunc DOptCmp5 = { OptCmp5, "OptCmp5", 100, 0, 0, 0, 0, 0 }; -static OptFunc DOptCmp6 = { OptCmp6, "OptCmp6", 100, 0, 0, 0, 0, 0 }; -static OptFunc DOptCmp7 = { OptCmp7, "OptCmp7", 85, 0, 0, 0, 0, 0 }; -static OptFunc DOptCmp8 = { OptCmp8, "OptCmp8", 50, 0, 0, 0, 0, 0 }; -static OptFunc DOptCondBranches = { OptCondBranches, "OptCondBranches", 80, 0, 0, 0, 0, 0 }; -static OptFunc DOptDeadCode = { OptDeadCode, "OptDeadCode", 100, 0, 0, 0, 0, 0 }; -static OptFunc DOptDeadJumps = { OptDeadJumps, "OptDeadJumps", 100, 0, 0, 0, 0, 0 }; +static OptFunc DOptBranchDist = { OptBranchDist, "OptBranchDist", 0, 0, 0, 0, 0, 0 }; +static OptFunc DOptCmp1 = { OptCmp1, "OptCmp1", 42, 0, 0, 0, 0, 0 }; +static OptFunc DOptCmp2 = { OptCmp2, "OptCmp2", 85, 0, 0, 0, 0, 0 }; +static OptFunc DOptCmp3 = { OptCmp3, "OptCmp3", 75, 0, 0, 0, 0, 0 }; +static OptFunc DOptCmp4 = { OptCmp4, "OptCmp4", 75, 0, 0, 0, 0, 0 }; +static OptFunc DOptCmp5 = { OptCmp5, "OptCmp5", 100, 0, 0, 0, 0, 0 }; +static OptFunc DOptCmp6 = { OptCmp6, "OptCmp6", 100, 0, 0, 0, 0, 0 }; +static OptFunc DOptCmp7 = { OptCmp7, "OptCmp7", 85, 0, 0, 0, 0, 0 }; +static OptFunc DOptCmp8 = { OptCmp8, "OptCmp8", 50, 0, 0, 0, 0, 0 }; +static OptFunc DOptCmp9 = { OptCmp9, "OptCmp9", 85, 0, 0, 0, 0, 0 }; +static OptFunc DOptComplAX1 = { OptComplAX1, "OptComplAX1", 65, 0, 0, 0, 0, 0 }; +static OptFunc DOptCondBranches1= { OptCondBranches1,"OptCondBranches1", 80, 0, 0, 0, 0, 0 }; +static OptFunc DOptCondBranches2= { OptCondBranches2,"OptCondBranches2", 0, 0, 0, 0, 0, 0 }; +static OptFunc DOptDeadCode = { OptDeadCode, "OptDeadCode", 100, 0, 0, 0, 0, 0 }; +static OptFunc DOptDeadJumps = { OptDeadJumps, "OptDeadJumps", 100, 0, 0, 0, 0, 0 }; static OptFunc DOptDecouple = { OptDecouple, "OptDecouple", 100, 0, 0, 0, 0, 0 }; static OptFunc DOptDupLoads = { OptDupLoads, "OptDupLoads", 0, 0, 0, 0, 0, 0 }; -static OptFunc DOptJumpCascades = { OptJumpCascades, "OptJumpCascades", 100, 0, 0, 0, 0, 0 }; -static OptFunc DOptJumpTarget = { OptJumpTarget, "OptJumpTarget", 100, 0, 0, 0, 0, 0 }; +static OptFunc DOptGotoSPAdj = { OptGotoSPAdj, "OptGotoSPAdj", 0, 0, 0, 0, 0, 0 }; +static OptFunc DOptIndLoads1 = { OptIndLoads1, "OptIndLoads1", 0, 0, 0, 0, 0, 0 }; +static OptFunc DOptIndLoads2 = { OptIndLoads2, "OptIndLoads2", 0, 0, 0, 0, 0, 0 }; +static OptFunc DOptJumpCascades = { OptJumpCascades, "OptJumpCascades", 100, 0, 0, 0, 0, 0 }; +static OptFunc DOptJumpTarget1 = { OptJumpTarget1, "OptJumpTarget1", 100, 0, 0, 0, 0, 0 }; +static OptFunc DOptJumpTarget2 = { OptJumpTarget2, "OptJumpTarget2", 100, 0, 0, 0, 0, 0 }; +static OptFunc DOptJumpTarget3 = { OptJumpTarget3, "OptJumpTarget3", 100, 0, 0, 0, 0, 0 }; static OptFunc DOptLoad1 = { OptLoad1, "OptLoad1", 100, 0, 0, 0, 0, 0 }; -static OptFunc DOptRTS = { OptRTS, "OptRTS", 100, 0, 0, 0, 0, 0 }; -static OptFunc DOptRTSJumps1 = { OptRTSJumps1, "OptRTSJumps1", 100, 0, 0, 0, 0, 0 }; -static OptFunc DOptRTSJumps2 = { OptRTSJumps2, "OptRTSJumps2", 100, 0, 0, 0, 0, 0 }; -static OptFunc DOptNegA1 = { OptNegA1, "OptNegA1", 100, 0, 0, 0, 0, 0 }; -static OptFunc DOptNegA2 = { OptNegA2, "OptNegA2", 100, 0, 0, 0, 0, 0 }; -static OptFunc DOptNegAX1 = { OptNegAX1, "OptNegAX1", 100, 0, 0, 0, 0, 0 }; -static OptFunc DOptNegAX2 = { OptNegAX2, "OptNegAX2", 100, 0, 0, 0, 0, 0 }; -static OptFunc DOptNegAX3 = { OptNegAX3, "OptNegAX3", 100, 0, 0, 0, 0, 0 }; -static OptFunc DOptNegAX4 = { OptNegAX4, "OptNegAX4", 100, 0, 0, 0, 0, 0 }; -static OptFunc DOptPrecalc = { OptPrecalc, "OptPrecalc", 100, 0, 0, 0, 0, 0 }; -static OptFunc DOptPtrLoad1 = { OptPtrLoad1, "OptPtrLoad1", 100, 0, 0, 0, 0, 0 }; -static OptFunc DOptPtrLoad2 = { OptPtrLoad2, "OptPtrLoad2", 100, 0, 0, 0, 0, 0 }; -static OptFunc DOptPtrLoad3 = { OptPtrLoad3, "OptPtrLoad3", 100, 0, 0, 0, 0, 0 }; -static OptFunc DOptPtrLoad4 = { OptPtrLoad4, "OptPtrLoad4", 100, 0, 0, 0, 0, 0 }; -static OptFunc DOptPtrLoad5 = { OptPtrLoad5, "OptPtrLoad5", 50, 0, 0, 0, 0, 0 }; -static OptFunc DOptPtrLoad6 = { OptPtrLoad6, "OptPtrLoad6", 65, 0, 0, 0, 0, 0 }; -static OptFunc DOptPtrLoad7 = { OptPtrLoad7, "OptPtrLoad7", 86, 0, 0, 0, 0, 0 }; -static OptFunc DOptPtrLoad8 = { OptPtrLoad8, "OptPtrLoad8", 100, 0, 0, 0, 0, 0 }; -static OptFunc DOptPtrStore1 = { OptPtrStore1, "OptPtrStore1", 100, 0, 0, 0, 0, 0 }; -static OptFunc DOptPtrStore2 = { OptPtrStore2, "OptPtrStore2", 40, 0, 0, 0, 0, 0 }; -static OptFunc DOptPush1 = { OptPush1, "OptPush1", 65, 0, 0, 0, 0, 0 }; -static OptFunc DOptPush2 = { OptPush2, "OptPush2", 50, 0, 0, 0, 0, 0 }; +static OptFunc DOptLoad2 = { OptLoad2, "OptLoad2", 200, 0, 0, 0, 0, 0 }; +static OptFunc DOptLoad3 = { OptLoad3, "OptLoad3", 0, 0, 0, 0, 0, 0 }; +static OptFunc DOptNegAX1 = { OptNegAX1, "OptNegAX1", 165, 0, 0, 0, 0, 0 }; +static OptFunc DOptNegAX2 = { OptNegAX2, "OptNegAX2", 200, 0, 0, 0, 0, 0 }; +static OptFunc DOptRTS = { OptRTS, "OptRTS", 100, 0, 0, 0, 0, 0 }; +static OptFunc DOptRTSJumps1 = { OptRTSJumps1, "OptRTSJumps1", 100, 0, 0, 0, 0, 0 }; +static OptFunc DOptRTSJumps2 = { OptRTSJumps2, "OptRTSJumps2", 100, 0, 0, 0, 0, 0 }; +static OptFunc DOptPrecalc = { OptPrecalc, "OptPrecalc", 100, 0, 0, 0, 0, 0 }; +static OptFunc DOptPtrLoad1 = { OptPtrLoad1, "OptPtrLoad1", 100, 0, 0, 0, 0, 0 }; +static OptFunc DOptPtrLoad2 = { OptPtrLoad2, "OptPtrLoad2", 100, 0, 0, 0, 0, 0 }; +static OptFunc DOptPtrLoad3 = { OptPtrLoad3, "OptPtrLoad3", 100, 0, 0, 0, 0, 0 }; +static OptFunc DOptPtrLoad4 = { OptPtrLoad4, "OptPtrLoad4", 100, 0, 0, 0, 0, 0 }; +static OptFunc DOptPtrLoad5 = { OptPtrLoad5, "OptPtrLoad5", 50, 0, 0, 0, 0, 0 }; +static OptFunc DOptPtrLoad6 = { OptPtrLoad6, "OptPtrLoad6", 60, 0, 0, 0, 0, 0 }; +static OptFunc DOptPtrLoad7 = { OptPtrLoad7, "OptPtrLoad7", 140, 0, 0, 0, 0, 0 }; +static OptFunc DOptPtrLoad11 = { OptPtrLoad11, "OptPtrLoad11", 92, 0, 0, 0, 0, 0 }; +static OptFunc DOptPtrLoad12 = { OptPtrLoad12, "OptPtrLoad12", 50, 0, 0, 0, 0, 0 }; +static OptFunc DOptPtrLoad13 = { OptPtrLoad13, "OptPtrLoad13", 65, 0, 0, 0, 0, 0 }; +static OptFunc DOptPtrLoad14 = { OptPtrLoad14, "OptPtrLoad14", 108, 0, 0, 0, 0, 0 }; +static OptFunc DOptPtrLoad15 = { OptPtrLoad15, "OptPtrLoad15", 86, 0, 0, 0, 0, 0 }; +static OptFunc DOptPtrLoad16 = { OptPtrLoad16, "OptPtrLoad16", 100, 0, 0, 0, 0, 0 }; +static OptFunc DOptPtrLoad17 = { OptPtrLoad17, "OptPtrLoad17", 190, 0, 0, 0, 0, 0 }; +static OptFunc DOptPtrLoad18 = { OptPtrLoad18, "OptPtrLoad18", 100, 0, 0, 0, 0, 0 }; +static OptFunc DOptPtrLoad19 = { OptPtrLoad19, "OptPtrLoad19", 65, 0, 0, 0, 0, 0 }; +static OptFunc DOptPtrStore1 = { OptPtrStore1, "OptPtrStore1", 65, 0, 0, 0, 0, 0 }; +static OptFunc DOptPtrStore2 = { OptPtrStore2, "OptPtrStore2", 65, 0, 0, 0, 0, 0 }; +static OptFunc DOptPtrStore3 = { OptPtrStore3, "OptPtrStore3", 100, 0, 0, 0, 0, 0 }; +static OptFunc DOptPush1 = { OptPush1, "OptPush1", 65, 0, 0, 0, 0, 0 }; +static OptFunc DOptPush2 = { OptPush2, "OptPush2", 50, 0, 0, 0, 0, 0 }; static OptFunc DOptPushPop = { OptPushPop, "OptPushPop", 0, 0, 0, 0, 0, 0 }; -static OptFunc DOptShift1 = { OptShift1, "OptShift1", 100, 0, 0, 0, 0, 0 }; -static OptFunc DOptShift2 = { OptShift2, "OptShift2", 100, 0, 0, 0, 0, 0 }; -static OptFunc DOptShift3 = { OptShift3, "OptShift3", 110, 0, 0, 0, 0, 0 }; +static OptFunc DOptShift1 = { OptShift1, "OptShift1", 100, 0, 0, 0, 0, 0 }; +static OptFunc DOptShift2 = { OptShift2, "OptShift2", 100, 0, 0, 0, 0, 0 }; +static OptFunc DOptShift3 = { OptShift3, "OptShift3", 17, 0, 0, 0, 0, 0 }; +static OptFunc DOptShift4 = { OptShift4, "OptShift4", 100, 0, 0, 0, 0, 0 }; +static OptFunc DOptShift5 = { OptShift5, "OptShift5", 110, 0, 0, 0, 0, 0 }; +static OptFunc DOptShift6 = { OptShift6, "OptShift6", 200, 0, 0, 0, 0, 0 }; static OptFunc DOptSize1 = { OptSize1, "OptSize1", 100, 0, 0, 0, 0, 0 }; static OptFunc DOptSize2 = { OptSize2, "OptSize2", 100, 0, 0, 0, 0, 0 }; -static OptFunc DOptStackOps = { OptStackOps, "OptStackOps", 100, 0, 0, 0, 0, 0 }; +static OptFunc DOptStackOps = { OptStackOps, "OptStackOps", 100, 0, 0, 0, 0, 0 }; +static OptFunc DOptStackPtrOps = { OptStackPtrOps, "OptStackPtrOps", 50, 0, 0, 0, 0, 0 }; static OptFunc DOptStore1 = { OptStore1, "OptStore1", 70, 0, 0, 0, 0, 0 }; -static OptFunc DOptStore2 = { OptStore2, "OptStore2", 220, 0, 0, 0, 0, 0 }; +static OptFunc DOptStore2 = { OptStore2, "OptStore2", 115, 0, 0, 0, 0, 0 }; static OptFunc DOptStore3 = { OptStore3, "OptStore3", 120, 0, 0, 0, 0, 0 }; static OptFunc DOptStore4 = { OptStore4, "OptStore4", 50, 0, 0, 0, 0, 0 }; -static OptFunc DOptStoreLoad = { OptStoreLoad, "OptStoreLoad", 0, 0, 0, 0, 0, 0 }; -static OptFunc DOptSub1 = { OptSub1, "OptSub1", 100, 0, 0, 0, 0, 0 }; -static OptFunc DOptSub2 = { OptSub2, "OptSub2", 100, 0, 0, 0, 0, 0 }; -static OptFunc DOptTest1 = { OptTest1, "OptTest1", 100, 0, 0, 0, 0, 0 }; -static OptFunc DOptTransfers1 = { OptTransfers1, "OptTransfers1", 0, 0, 0, 0, 0, 0 }; -static OptFunc DOptTransfers2 = { OptTransfers2, "OptTransfers2", 60, 0, 0, 0, 0, 0 }; -static OptFunc DOptUnusedLoads = { OptUnusedLoads, "OptUnusedLoads", 0, 0, 0, 0, 0, 0 }; -static OptFunc DOptUnusedStores = { OptUnusedStores, "OptUnusedStores", 0, 0, 0, 0, 0, 0 }; +static OptFunc DOptStore5 = { OptStore5, "OptStore5", 100, 0, 0, 0, 0, 0 }; +static OptFunc DOptStoreLoad = { OptStoreLoad, "OptStoreLoad", 0, 0, 0, 0, 0, 0 }; +static OptFunc DOptSub1 = { OptSub1, "OptSub1", 100, 0, 0, 0, 0, 0 }; +static OptFunc DOptSub2 = { OptSub2, "OptSub2", 100, 0, 0, 0, 0, 0 }; +static OptFunc DOptSub3 = { OptSub3, "OptSub3", 100, 0, 0, 0, 0, 0 }; +static OptFunc DOptTest1 = { OptTest1, "OptTest1", 65, 0, 0, 0, 0, 0 }; +static OptFunc DOptTest2 = { OptTest2, "OptTest2", 50, 0, 0, 0, 0, 0 }; +static OptFunc DOptTransfers1 = { OptTransfers1, "OptTransfers1", 0, 0, 0, 0, 0, 0 }; +static OptFunc DOptTransfers2 = { OptTransfers2, "OptTransfers2", 60, 0, 0, 0, 0, 0 }; +static OptFunc DOptTransfers3 = { OptTransfers3, "OptTransfers3", 65, 0, 0, 0, 0, 0 }; +static OptFunc DOptTransfers4 = { OptTransfers4, "OptTransfers4", 65, 0, 0, 0, 0, 0 }; +static OptFunc DOptUnusedLoads = { OptUnusedLoads, "OptUnusedLoads", 0, 0, 0, 0, 0, 0 }; +static OptFunc DOptUnusedStores = { OptUnusedStores, "OptUnusedStores", 0, 0, 0, 0, 0, 0 }; /* Table containing all the steps in alphabetical order */ @@ -1800,6 +861,13 @@ static OptFunc* OptFuncs[] = { &DOptAdd3, &DOptAdd4, &DOptAdd5, + &DOptAdd6, + &DOptBNegA1, + &DOptBNegA2, + &DOptBNegAX1, + &DOptBNegAX2, + &DOptBNegAX3, + &DOptBNegAX4, &DOptBoolTrans, &DOptBranchDist, &DOptCmp1, @@ -1810,31 +878,46 @@ static OptFunc* OptFuncs[] = { &DOptCmp6, &DOptCmp7, &DOptCmp8, - &DOptCondBranches, + &DOptCmp9, + &DOptComplAX1, + &DOptCondBranches1, + &DOptCondBranches2, &DOptDeadCode, &DOptDeadJumps, &DOptDecouple, &DOptDupLoads, + &DOptGotoSPAdj, + &DOptIndLoads1, + &DOptIndLoads2, &DOptJumpCascades, - &DOptJumpTarget, + &DOptJumpTarget1, + &DOptJumpTarget2, + &DOptJumpTarget3, &DOptLoad1, - &DOptNegA1, - &DOptNegA2, + &DOptLoad2, + &DOptLoad3, &DOptNegAX1, &DOptNegAX2, - &DOptNegAX3, - &DOptNegAX4, &DOptPrecalc, &DOptPtrLoad1, + &DOptPtrLoad11, + &DOptPtrLoad12, + &DOptPtrLoad13, + &DOptPtrLoad14, + &DOptPtrLoad15, + &DOptPtrLoad16, + &DOptPtrLoad17, + &DOptPtrLoad18, + &DOptPtrLoad19, &DOptPtrLoad2, &DOptPtrLoad3, &DOptPtrLoad4, &DOptPtrLoad5, &DOptPtrLoad6, &DOptPtrLoad7, - &DOptPtrLoad8, &DOptPtrStore1, &DOptPtrStore2, + &DOptPtrStore3, &DOptPush1, &DOptPush2, &DOptPushPop, @@ -1844,19 +927,28 @@ static OptFunc* OptFuncs[] = { &DOptShift1, &DOptShift2, &DOptShift3, + &DOptShift4, + &DOptShift5, + &DOptShift6, &DOptSize1, &DOptSize2, &DOptStackOps, + &DOptStackPtrOps, &DOptStore1, &DOptStore2, &DOptStore3, &DOptStore4, + &DOptStore5, &DOptStoreLoad, &DOptSub1, &DOptSub2, + &DOptSub3, &DOptTest1, + &DOptTest2, &DOptTransfers1, &DOptTransfers2, + &DOptTransfers3, + &DOptTransfers4, &DOptUnusedLoads, &DOptUnusedStores, }; @@ -1874,8 +966,8 @@ static int CmpOptStep (const void* Key, const void* Func) static OptFunc* FindOptFunc (const char* Name) /* Find an optimizer step by name in the table and return a pointer. Return - * NULL if no such step is found. - */ +** NULL if no such step is found. +*/ { /* Search for the function in the list */ OptFunc** O = bsearch (Name, OptFuncs, OPTFUNC_COUNT, sizeof (OptFuncs[0]), CmpOptStep); @@ -1886,14 +978,14 @@ static OptFunc* FindOptFunc (const char* Name) static OptFunc* GetOptFunc (const char* Name) /* Find an optimizer step by name in the table and return a pointer. Print an - * error and call AbEnd if not found. - */ +** error and call AbEnd if not found. +*/ { /* Search for the function in the list */ OptFunc* F = FindOptFunc (Name); if (F == 0) { - /* Not found */ - AbEnd ("Optimization step `%s' not found", Name); + /* Not found */ + AbEnd ("Optimization step '%s' not found", Name); } return F; } @@ -1904,12 +996,12 @@ void DisableOpt (const char* Name) /* Disable the optimization with the given name */ { if (strcmp (Name, "any") == 0) { - unsigned I; - for (I = 0; I < OPTFUNC_COUNT; ++I) { - OptFuncs[I]->Disabled = 1; - } + unsigned I; + for (I = 0; I < OPTFUNC_COUNT; ++I) { + OptFuncs[I]->Disabled = 1; + } } else { - GetOptFunc(Name)->Disabled = 1; + GetOptFunc(Name)->Disabled = 1; } } @@ -1919,12 +1011,12 @@ void EnableOpt (const char* Name) /* Enable the optimization with the given name */ { if (strcmp (Name, "any") == 0) { - unsigned I; - for (I = 0; I < OPTFUNC_COUNT; ++I) { - OptFuncs[I]->Disabled = 0; - } + unsigned I; + for (I = 0; I < OPTFUNC_COUNT; ++I) { + OptFuncs[I]->Disabled = 0; + } } else { - GetOptFunc(Name)->Disabled = 0; + GetOptFunc(Name)->Disabled = 0; } } @@ -1934,8 +1026,10 @@ void ListOptSteps (FILE* F) /* List all optimization steps */ { unsigned I; + + fprintf (F, "any\n"); for (I = 0; I < OPTFUNC_COUNT; ++I) { - fprintf (F, "%s\n", OptFuncs[I]->Name); + fprintf (F, "%s\n", OptFuncs[I]->Name); } } @@ -1950,60 +1044,60 @@ static void ReadOptStats (const char* Name) /* Try to open the file */ FILE* F = fopen (Name, "r"); if (F == 0) { - /* Ignore the error */ - return; + /* Ignore the error */ + return; } /* Read and parse the lines */ Lines = 0; while (fgets (Buf, sizeof (Buf), F) != 0) { - char* B; - unsigned Len; - OptFunc* Func; - - /* Fields */ - char Name[32]; - unsigned long TotalRuns; - unsigned long TotalChanges; - - /* Count lines */ - ++Lines; - - /* Remove trailing white space including the line terminator */ - B = Buf; - Len = strlen (B); - while (Len > 0 && IsSpace (B[Len-1])) { - --Len; - } - B[Len] = '\0'; - - /* Remove leading whitespace */ - while (IsSpace (*B)) { - ++B; - } - - /* Check for empty and comment lines */ - if (*B == '\0' || *B == ';' || *B == '#') { - continue; - } - - /* Parse the line */ - if (sscanf (B, "%31s %lu %*u %lu %*u", Name, &TotalRuns, &TotalChanges) != 3) { - /* Syntax error */ - continue; - } - - /* Search for the optimizer step. */ - Func = FindOptFunc (Name); - if (Func == 0) { - /* Not found */ - continue; - } - - /* Found the step, set the fields */ - Func->TotalRuns = TotalRuns; - Func->TotalChanges = TotalChanges; + char* B; + unsigned Len; + OptFunc* Func; + + /* Fields */ + char Name[32]; + unsigned long TotalRuns; + unsigned long TotalChanges; + + /* Count lines */ + ++Lines; + + /* Remove trailing white space including the line terminator */ + B = Buf; + Len = strlen (B); + while (Len > 0 && IsSpace (B[Len-1])) { + --Len; + } + B[Len] = '\0'; + + /* Remove leading whitespace */ + while (IsSpace (*B)) { + ++B; + } + + /* Check for empty and comment lines */ + if (*B == '\0' || *B == ';' || *B == '#') { + continue; + } + + /* Parse the line */ + if (sscanf (B, "%31s %lu %*u %lu %*u", Name, &TotalRuns, &TotalChanges) != 3) { + /* Syntax error */ + continue; + } + + /* Search for the optimizer step. */ + Func = FindOptFunc (Name); + if (Func == 0) { + /* Not found */ + continue; + } + + /* Found the step, set the fields */ + Func->TotalRuns = TotalRuns; + Func->TotalChanges = TotalChanges; } @@ -2021,26 +1115,26 @@ static void WriteOptStats (const char* Name) /* Try to open the file */ FILE* F = fopen (Name, "w"); if (F == 0) { - /* Ignore the error */ - return; + /* Ignore the error */ + return; } /* Write a header */ fprintf (F, - "; Optimizer Total Last Total Last\n" - "; Step Runs Runs Chg Chg\n"); + "; Optimizer Total Last Total Last\n" + "; Step Runs Runs Chg Chg\n"); /* Write the data */ for (I = 0; I < OPTFUNC_COUNT; ++I) { - const OptFunc* O = OptFuncs[I]; - fprintf (F, - "%-20s %10lu %10lu %10lu %10lu\n", - O->Name, - O->TotalRuns, - O->LastRuns, - O->TotalChanges, - O->LastChanges); + const OptFunc* O = OptFuncs[I]; + fprintf (F, + "%-20s %10lu %10lu %10lu %10lu\n", + O->Name, + O->TotalRuns, + O->LastRuns, + O->TotalChanges, + O->LastChanges); } /* Close the file, ignore errors here. */ @@ -2049,31 +1143,82 @@ static void WriteOptStats (const char* Name) +static void OpenDebugFile (const CodeSeg* S) +/* Open the debug file for the given segment if the flag is on */ +{ + if (DebugOptOutput) { + StrBuf Name = AUTO_STRBUF_INITIALIZER; + if (S->Func) { + SB_CopyStr (&Name, S->Func->Name); + } else { + SB_CopyStr (&Name, "global"); + } + SB_AppendStr (&Name, ".opt"); + SB_Terminate (&Name); + OpenDebugOutputFile (SB_GetConstBuf (&Name)); + SB_Done (&Name); + } +} + + + +static void WriteDebugOutput (CodeSeg* S, const char* Step) +/* Write a separator line into the debug file if the flag is on */ +{ + if (DebugOptOutput) { + /* Output a separator */ + WriteOutput ("=========================================================================\n"); + + /* Output a header line */ + if (Step == 0) { + /* Initial output */ + WriteOutput ("Initial code for function '%s':\n", + S->Func? S->Func->Name : ""); + } else { + WriteOutput ("Code after applying '%s':\n", Step); + } + + /* Output the code segment */ + CS_Output (S); + } +} + + + static unsigned RunOptFunc (CodeSeg* S, OptFunc* F, unsigned Max) /* Run one optimizer function Max times or until there are no more changes */ { unsigned Changes, C; /* Don't run the function if it is disabled or if it is prohibited by the - * code size factor - */ + ** code size factor + */ if (F->Disabled || F->CodeSizeFactor > S->CodeSizeFactor) { - return 0; + return 0; } /* Run this until there are no more changes */ Changes = 0; do { - /* Run the function */ - C = F->Func (S); - Changes += C; + /* Run the function */ + C = F->Func (S); + Changes += C; + + /* Do statistics */ + ++F->TotalRuns; + ++F->LastRuns; + F->TotalChanges += C; + F->LastChanges += C; - /* Do statistics */ - ++F->TotalRuns; - ++F->LastRuns; - F->TotalChanges += C; - F->LastChanges += C; + /* If we had changes, output stuff and regenerate register info */ + if (C) { + if (Debug) { + printf ("Applied %s: %u changes\n", F->Name, C); + } + WriteDebugOutput (S, F->Name); + CS_GenRegInfo (S); + } } while (--Max && C > 0); @@ -2085,15 +1230,19 @@ static unsigned RunOptFunc (CodeSeg* S, OptFunc* F, unsigned Max) static unsigned RunOptGroup1 (CodeSeg* S) /* Run the first group of optimization steps. These steps translate known - * patterns emitted by the code generator into more optimal patterns. Order - * of the steps is important, because some of the steps done earlier cover - * the same patterns as later steps as subpatterns. - */ +** patterns emitted by the code generator into more optimal patterns. Order +** of the steps is important, because some of the steps done earlier cover +** the same patterns as later steps as subpatterns. +*/ { unsigned Changes = 0; + Changes += RunOptFunc (S, &DOptGotoSPAdj, 1); + Changes += RunOptFunc (S, &DOptStackPtrOps, 5); Changes += RunOptFunc (S, &DOptPtrStore1, 1); Changes += RunOptFunc (S, &DOptPtrStore2, 1); + Changes += RunOptFunc (S, &DOptPtrStore3, 1); + Changes += RunOptFunc (S, &DOptAdd3, 1); /* Before OptPtrLoad5! */ Changes += RunOptFunc (S, &DOptPtrLoad1, 1); Changes += RunOptFunc (S, &DOptPtrLoad2, 1); Changes += RunOptFunc (S, &DOptPtrLoad3, 1); @@ -2101,17 +1250,32 @@ static unsigned RunOptGroup1 (CodeSeg* S) Changes += RunOptFunc (S, &DOptPtrLoad5, 1); Changes += RunOptFunc (S, &DOptPtrLoad6, 1); Changes += RunOptFunc (S, &DOptPtrLoad7, 1); - Changes += RunOptFunc (S, &DOptNegAX1, 1); - Changes += RunOptFunc (S, &DOptNegAX2, 1); - Changes += RunOptFunc (S, &DOptNegAX3, 1); - Changes += RunOptFunc (S, &DOptNegAX4, 1); + Changes += RunOptFunc (S, &DOptPtrLoad18, 1); /* Before OptPtrLoad11 */ + Changes += RunOptFunc (S, &DOptPtrLoad11, 1); + Changes += RunOptFunc (S, &DOptPtrLoad12, 1); + Changes += RunOptFunc (S, &DOptPtrLoad13, 1); + Changes += RunOptFunc (S, &DOptPtrLoad14, 1); + Changes += RunOptFunc (S, &DOptPtrLoad15, 1); + Changes += RunOptFunc (S, &DOptPtrLoad16, 1); + Changes += RunOptFunc (S, &DOptPtrLoad17, 1); + Changes += RunOptFunc (S, &DOptPtrLoad19, 1); + Changes += RunOptFunc (S, &DOptBNegAX1, 1); + Changes += RunOptFunc (S, &DOptBNegAX2, 1); + Changes += RunOptFunc (S, &DOptBNegAX3, 1); + Changes += RunOptFunc (S, &DOptBNegAX4, 1); Changes += RunOptFunc (S, &DOptAdd1, 1); Changes += RunOptFunc (S, &DOptAdd2, 1); - Changes += RunOptFunc (S, &DOptAdd3, 1); + Changes += RunOptFunc (S, &DOptAdd4, 1); + Changes += RunOptFunc (S, &DOptAdd5, 1); + Changes += RunOptFunc (S, &DOptAdd6, 1); + Changes += RunOptFunc (S, &DOptSub1, 1); + Changes += RunOptFunc (S, &DOptSub3, 1); Changes += RunOptFunc (S, &DOptStore4, 1); + Changes += RunOptFunc (S, &DOptStore5, 1); Changes += RunOptFunc (S, &DOptShift1, 1); Changes += RunOptFunc (S, &DOptShift2, 1); - Changes += RunOptFunc (S, &DOptShift3, 1); + Changes += RunOptFunc (S, &DOptShift5, 1); + Changes += RunOptFunc (S, &DOptShift6, 1); Changes += RunOptFunc (S, &DOptStore1, 1); Changes += RunOptFunc (S, &DOptStore2, 5); Changes += RunOptFunc (S, &DOptStore3, 5); @@ -2124,10 +1288,10 @@ static unsigned RunOptGroup1 (CodeSeg* S) static unsigned RunOptGroup2 (CodeSeg* S) /* Run one group of optimization steps. This step involves just decoupling - * instructions by replacing them by instructions that do not depend on - * previous instructions. This makes it easier to find instructions that - * aren't used. - */ +** instructions by replacing them by instructions that do not depend on +** previous instructions. This makes it easier to find instructions that +** aren't used. +*/ { unsigned Changes = 0; @@ -2141,51 +1305,64 @@ static unsigned RunOptGroup2 (CodeSeg* S) static unsigned RunOptGroup3 (CodeSeg* S) /* Run one group of optimization steps. These steps depend on each other, - * that means that one step may allow another step to do additional work, - * so we will repeat the steps as long as we see any changes. - */ +** that means that one step may allow another step to do additional work, +** so we will repeat the steps as long as we see any changes. +*/ { unsigned Changes, C; Changes = 0; do { - C = 0; - - C += RunOptFunc (S, &DOptPtrLoad8, 1); - C += RunOptFunc (S, &DOptNegA1, 1); - C += RunOptFunc (S, &DOptNegA2, 1); - C += RunOptFunc (S, &DOptSub1, 1); - C += RunOptFunc (S, &DOptSub2, 1); - C += RunOptFunc (S, &DOptAdd4, 1); - C += RunOptFunc (S, &DOptAdd5, 1); - C += RunOptFunc (S, &DOptStackOps, 1); - C += RunOptFunc (S, &DOptJumpCascades, 1); - C += RunOptFunc (S, &DOptDeadJumps, 1); - C += RunOptFunc (S, &DOptRTS, 1); - C += RunOptFunc (S, &DOptDeadCode, 1); - C += RunOptFunc (S, &DOptJumpTarget, 1); - C += RunOptFunc (S, &DOptCondBranches, 1); - C += RunOptFunc (S, &DOptRTSJumps1, 1); - C += RunOptFunc (S, &DOptBoolTrans, 1); - C += RunOptFunc (S, &DOptCmp1, 1); - C += RunOptFunc (S, &DOptCmp2, 1); - C += RunOptFunc (S, &DOptCmp3, 1); - C += RunOptFunc (S, &DOptCmp4, 1); - C += RunOptFunc (S, &DOptCmp5, 1); - C += RunOptFunc (S, &DOptCmp6, 1); - C += RunOptFunc (S, &DOptCmp7, 1); - C += RunOptFunc (S, &DOptCmp8, 1); - C += RunOptFunc (S, &DOptTest1, 1); + C = 0; + + C += RunOptFunc (S, &DOptBNegA1, 1); + C += RunOptFunc (S, &DOptBNegA2, 1); + C += RunOptFunc (S, &DOptNegAX1, 1); + C += RunOptFunc (S, &DOptNegAX2, 1); + C += RunOptFunc (S, &DOptStackOps, 3); + C += RunOptFunc (S, &DOptShift1, 1); + C += RunOptFunc (S, &DOptShift4, 1); + C += RunOptFunc (S, &DOptComplAX1, 1); + C += RunOptFunc (S, &DOptSub1, 1); + C += RunOptFunc (S, &DOptSub2, 1); + C += RunOptFunc (S, &DOptSub3, 1); + C += RunOptFunc (S, &DOptAdd5, 1); + C += RunOptFunc (S, &DOptAdd6, 1); + C += RunOptFunc (S, &DOptJumpCascades, 1); + C += RunOptFunc (S, &DOptDeadJumps, 1); + C += RunOptFunc (S, &DOptRTS, 1); + C += RunOptFunc (S, &DOptDeadCode, 1); + C += RunOptFunc (S, &DOptBoolTrans, 1); + C += RunOptFunc (S, &DOptJumpTarget1, 1); + C += RunOptFunc (S, &DOptJumpTarget2, 1); + C += RunOptFunc (S, &DOptCondBranches1, 1); + C += RunOptFunc (S, &DOptCondBranches2, 1); + C += RunOptFunc (S, &DOptRTSJumps1, 1); + C += RunOptFunc (S, &DOptCmp1, 1); + C += RunOptFunc (S, &DOptCmp2, 1); + C += RunOptFunc (S, &DOptCmp8, 1); /* Must run before OptCmp3 */ + C += RunOptFunc (S, &DOptCmp3, 1); + C += RunOptFunc (S, &DOptCmp4, 1); + C += RunOptFunc (S, &DOptCmp5, 1); + C += RunOptFunc (S, &DOptCmp6, 1); + C += RunOptFunc (S, &DOptCmp7, 1); + C += RunOptFunc (S, &DOptCmp9, 1); + C += RunOptFunc (S, &DOptTest1, 1); C += RunOptFunc (S, &DOptLoad1, 1); - C += RunOptFunc (S, &DOptUnusedLoads, 1); - C += RunOptFunc (S, &DOptUnusedStores, 1); - C += RunOptFunc (S, &DOptDupLoads, 1); - C += RunOptFunc (S, &DOptStoreLoad, 1); - C += RunOptFunc (S, &DOptTransfers1, 1); + C += RunOptFunc (S, &DOptJumpTarget3, 1); /* After OptCondBranches2 */ + C += RunOptFunc (S, &DOptUnusedLoads, 1); + C += RunOptFunc (S, &DOptUnusedStores, 1); + C += RunOptFunc (S, &DOptDupLoads, 1); + C += RunOptFunc (S, &DOptStoreLoad, 1); + C += RunOptFunc (S, &DOptTransfers1, 1); + C += RunOptFunc (S, &DOptTransfers3, 1); + C += RunOptFunc (S, &DOptTransfers4, 1); + C += RunOptFunc (S, &DOptStore1, 1); + C += RunOptFunc (S, &DOptStore5, 1); C += RunOptFunc (S, &DOptPushPop, 1); C += RunOptFunc (S, &DOptPrecalc, 1); - Changes += C; + Changes += C; } while (C); @@ -2196,21 +1373,45 @@ static unsigned RunOptGroup3 (CodeSeg* S) static unsigned RunOptGroup4 (CodeSeg* S) +/* Run another round of pattern replacements. These are done late, since there +** may be better replacements before. +*/ +{ + unsigned Changes = 0; + + /* Repeat some of the steps here */ + Changes += RunOptFunc (S, &DOptShift3, 1); + Changes += RunOptFunc (S, &DOptPush1, 1); + Changes += RunOptFunc (S, &DOptPush2, 1); + Changes += RunOptFunc (S, &DOptUnusedLoads, 1); + Changes += RunOptFunc (S, &DOptTest2, 1); + Changes += RunOptFunc (S, &DOptTransfers2, 1); + Changes += RunOptFunc (S, &DOptLoad2, 1); + Changes += RunOptFunc (S, &DOptLoad3, 1); + Changes += RunOptFunc (S, &DOptDupLoads, 1); + + /* Return the number of changes */ + return Changes; +} + + + +static unsigned RunOptGroup5 (CodeSeg* S) /* 65C02 specific optimizations. */ { unsigned Changes = 0; if (CPUIsets[CPU] & CPU_ISET_65SC02) { Changes += RunOptFunc (S, &DOpt65C02BitOps, 1); - Changes += RunOptFunc (S, &DOpt65C02Ind, 1); + Changes += RunOptFunc (S, &DOpt65C02Ind, 1); Changes += RunOptFunc (S, &DOpt65C02Stores, 1); - if (Changes) { + if (Changes) { /* The 65C02 replacement codes do often make the use of a register - * value unnecessary, so if we have changes, run another load - * removal pass. - */ - Changes += RunOptFunc (S, &DOptUnusedLoads, 1); - } + ** value unnecessary, so if we have changes, run another load + ** removal pass. + */ + Changes += RunOptFunc (S, &DOptUnusedLoads, 1); + } } /* Return the number of changes */ @@ -2219,17 +1420,25 @@ static unsigned RunOptGroup4 (CodeSeg* S) -static unsigned RunOptGroup5 (CodeSeg* S) -/* Run another round of pattern replacements. These are done late, since there - * may be better replacements before. - */ +static unsigned RunOptGroup6 (CodeSeg* S) +/* This one is quite special. It tries to replace "lda (sp),y" by "lda (sp,x)". +** The latter is ony cycle slower, but if we're able to remove the necessary +** load of the Y register, because X is zero anyway, we gain 1 cycle and +** shorten the code by one (transfer) or two bytes (load). So what we do is +** to replace the insns, remove unused loads, and then change back all insns +** where Y is still zero (meaning that the load has not been removed). +*/ { unsigned Changes = 0; - Changes += RunOptFunc (S, &DOptPush1, 1); - Changes += RunOptFunc (S, &DOptPush2, 1); - Changes += RunOptFunc (S, &DOptUnusedLoads, 1); - Changes += RunOptFunc (S, &DOptTransfers2, 1); + /* This group will only run for a standard 6502, because the 65C02 has a + ** better addressing mode that covers this case. + */ + if ((CPUIsets[CPU] & CPU_ISET_65SC02) == 0) { + Changes += RunOptFunc (S, &DOptIndLoads1, 1); + Changes += RunOptFunc (S, &DOptUnusedLoads, 1); + Changes += RunOptFunc (S, &DOptIndLoads2, 1); + } /* Return the number of changes */ return Changes; @@ -2237,48 +1446,52 @@ static unsigned RunOptGroup5 (CodeSeg* S) -static unsigned RunOptGroup6 (CodeSeg* S) +static unsigned RunOptGroup7 (CodeSeg* S) /* The last group of optimization steps. Adjust branches, do size optimizations. - */ +*/ { unsigned Changes = 0; unsigned C; - if (S->CodeSizeFactor <= 100) { - /* Optimize for size, that is replace operations by shorter ones, even - * if this does hinder further optimizations (no problem since we're - * done soon). - */ - C = RunOptFunc (S, &DOptSize1, 1); - if (C) { - Changes += C; - /* Run some optimization passes again, since the size optimizations - * may have opened new oportunities. - */ - Changes += RunOptFunc (S, &DOptUnusedLoads, 1); - Changes += RunOptFunc (S, &DOptJumpTarget, 5); - } + /* Optimize for size, that is replace operations by shorter ones, even + ** if this does hinder further optimizations (no problem since we're + ** done soon). + */ + C = RunOptFunc (S, &DOptSize1, 1); + if (C) { + Changes += C; + /* Run some optimization passes again, since the size optimizations + ** may have opened new oportunities. + */ + Changes += RunOptFunc (S, &DOptUnusedLoads, 1); + Changes += RunOptFunc (S, &DOptUnusedStores, 1); + Changes += RunOptFunc (S, &DOptJumpTarget1, 5); + Changes += RunOptFunc (S, &DOptStore5, 1); } + C = RunOptFunc (S, &DOptSize2, 1); if (C) { Changes += C; /* Run some optimization passes again, since the size optimizations - * may have opened new oportunities. - */ + ** may have opened new oportunities. + */ Changes += RunOptFunc (S, &DOptUnusedLoads, 1); - Changes += RunOptFunc (S, &DOptJumpTarget, 5); + Changes += RunOptFunc (S, &DOptJumpTarget1, 5); + Changes += RunOptFunc (S, &DOptStore5, 1); + Changes += RunOptFunc (S, &DOptTransfers1, 1); + Changes += RunOptFunc (S, &DOptTransfers3, 1); } /* Adjust branch distances */ Changes += RunOptFunc (S, &DOptBranchDist, 3); /* Replace conditional branches to RTS. If we had changes, we must run dead - * code elimination again, since the change may have introduced dead code. - */ + ** code elimination again, since the change may have introduced dead code. + */ C = RunOptFunc (S, &DOptRTSJumps2, 1); Changes += C; if (C) { - Changes += RunOptFunc (S, &DOptDeadCode, 1); + Changes += RunOptFunc (S, &DOptDeadCode, 1); } /* Return the number of changes */ @@ -2294,22 +1507,29 @@ void RunOpt (CodeSeg* S) /* If we shouldn't run the optimizer, bail out */ if (!S->Optimize) { - return; + return; } /* Check if we are requested to write optimizer statistics */ StatFileName = getenv ("CC65_OPTSTATS"); if (StatFileName) { - ReadOptStats (StatFileName); + ReadOptStats (StatFileName); } /* Print the name of the function we are working on */ if (S->Func) { - Print (stdout, 1, "Running optimizer for function `%s'\n", S->Func->Name); + Print (stdout, 1, "Running optimizer for function '%s'\n", S->Func->Name); } else { - Print (stdout, 1, "Running optimizer for global code segment\n"); + Print (stdout, 1, "Running optimizer for global code segment\n"); } + /* If requested, open an output file */ + OpenDebugFile (S); + WriteDebugOutput (S, 0); + + /* Generate register info for all instructions */ + CS_GenRegInfo (S); + /* Run groups of optimizations */ RunOptGroup1 (S); RunOptGroup2 (S); @@ -2317,12 +1537,18 @@ void RunOpt (CodeSeg* S) RunOptGroup4 (S); RunOptGroup5 (S); RunOptGroup6 (S); + RunOptGroup7 (S); + + /* Free register info */ + CS_FreeRegInfo (S); + + /* Close output file if necessary */ + if (DebugOptOutput) { + CloseOutputFile (); + } /* Write statistics */ if (StatFileName) { - WriteOptStats (StatFileName); + WriteOptStats (StatFileName); } } - - -