X-Git-Url: https://git.sur5r.net/?a=blobdiff_plain;f=src%2Fcc65%2Fcoptstop.c;h=87514cfdd3ebb06049f684da5e1732200350c1e7;hb=37f80534c9953e168f2bc512b7b6484aeec50e18;hp=47825f059da2a967073367efaf560b6d4ccd367e;hpb=21af43a80065a0fec151178c8547e2e2a8e4eb73;p=cc65 diff --git a/src/cc65/coptstop.c b/src/cc65/coptstop.c index 47825f059..87514cfdd 100644 --- a/src/cc65/coptstop.c +++ b/src/cc65/coptstop.c @@ -1,15 +1,15 @@ /*****************************************************************************/ /* */ -/* coptstop.c */ +/* coptstop.c */ /* */ -/* Optimize operations that take operands via the stack */ +/* Optimize operations that take operands via the stack */ /* */ /* */ /* */ -/* (C) 2001-2004 Ullrich von Bassewitz */ -/* Römerstrasse 52 */ -/* D-70794 Filderstadt */ -/* EMail: uz@cc65.org */ +/* (C) 2001-2019, Ullrich von Bassewitz */ +/* Roemerstrasse 52 */ +/* D-70794 Filderstadt */ +/* EMail: uz@cc65.org */ /* */ /* */ /* This software is provided 'as-is', without any expressed or implied */ @@ -42,6 +42,44 @@ #include "codeent.h" #include "codeinfo.h" #include "coptstop.h" +#include "error.h" + + + +/*****************************************************************************/ +/* Load tracking data */ +/*****************************************************************************/ + + + +/* LoadRegInfo flags set by DirectOp */ +typedef enum { + LI_NONE = 0x00, + LI_DIRECT = 0x01, /* Direct op may be used */ + LI_RELOAD_Y = 0x02, /* Reload index register Y */ + LI_REMOVE = 0x04, /* Load may be removed */ + LI_DONT_REMOVE = 0x08, /* Load may not be removed */ + LI_DUP_LOAD = 0x10, /* Duplicate load */ +} LI_FLAGS; + +/* Structure that tells us how to load the lhs values */ +typedef struct LoadRegInfo LoadRegInfo; +struct LoadRegInfo { + LI_FLAGS Flags; /* Tells us how to load */ + int LoadIndex; /* Index of load insn, -1 if invalid */ + CodeEntry* LoadEntry; /* The actual entry, 0 if invalid */ + int XferIndex; /* Index of transfer insn */ + CodeEntry* XferEntry; /* The actual transfer entry */ + int Offs; /* Stack offset if data is on stack */ +}; + +/* Now combined for both registers */ +typedef struct LoadInfo LoadInfo; +struct LoadInfo { + LoadRegInfo A; /* Info for A register */ + LoadRegInfo X; /* Info for X register */ + LoadRegInfo Y; /* Info for Y register */ +}; @@ -51,135 +89,305 @@ -/* Structure that holds the needed data */ +/* Flags for the functions */ +typedef enum { + OP_NONE = 0x00, /* Nothing special */ + OP_A_KNOWN = 0x01, /* Value of A must be known */ + OP_X_ZERO = 0x02, /* X must be zero */ + OP_LHS_LOAD = 0x04, /* Must have load insns for LHS */ + OP_LHS_LOAD_DIRECT = 0x0C, /* Must have direct load insn for LHS */ + OP_RHS_LOAD = 0x10, /* Must have load insns for RHS */ + OP_RHS_LOAD_DIRECT = 0x30, /* Must have direct load insn for RHS */ +} OP_FLAGS; + +/* Structure forward decl */ typedef struct StackOpData StackOpData; -struct StackOpData { - CodeSeg* Code; /* Pointer to code segment */ - unsigned Flags; /* Flags to remember things */ - unsigned PushIndex; /* Index of call to pushax in codeseg */ - unsigned OpIndex; /* Index of actual operation */ - CodeEntry* PrevEntry; /* Entry before the call to pushax */ - CodeEntry* PushEntry; /* Pointer to entry with call to pushax */ - CodeEntry* OpEntry; /* Pointer to entry with op */ - CodeEntry* NextEntry; /* Entry after the op */ - const char* ZPLo; /* Lo byte of zero page loc to use */ - const char* ZPHi; /* Hi byte of zero page loc to use */ - unsigned IP; /* Insertion point used by some routines */ + +/* Structure that describes an optimizer subfunction for a specific op */ +typedef unsigned (*OptFunc) (StackOpData* D); +typedef struct OptFuncDesc OptFuncDesc; +struct OptFuncDesc { + const char* Name; /* Name of the replaced runtime function */ + OptFunc Func; /* Function pointer */ + unsigned UnusedRegs; /* Regs that must not be used later */ + OP_FLAGS Flags; /* Flags */ }; -/* Flags returned by DirectOp */ -#define OP_DIRECT 0x01 /* Direct op may be used */ -#define OP_RELOAD_Y 0x02 /* Must reload index register Y */ +/* Structure that holds the needed data */ +struct StackOpData { + CodeSeg* Code; /* Pointer to code segment */ + unsigned Flags; /* Flags to remember things */ + + /* Pointer to optimizer subfunction description */ + const OptFuncDesc* OptFunc; + + /* ZP register usage inside the sequence */ + unsigned ZPUsage; + + /* Register load information for lhs and rhs */ + LoadInfo Lhs; + LoadInfo Rhs; + + /* Several indices of insns in the code segment */ + int PushIndex; /* Index of call to pushax in codeseg */ + int OpIndex; /* Index of actual operation */ + + /* Pointers to insns in the code segment */ + CodeEntry* PrevEntry; /* Entry before the call to pushax */ + CodeEntry* PushEntry; /* Pointer to entry with call to pushax */ + CodeEntry* OpEntry; /* Pointer to entry with op */ + CodeEntry* NextEntry; /* Entry after the op */ + + const char* ZPLo; /* Lo byte of zero page loc to use */ + const char* ZPHi; /* Hi byte of zero page loc to use */ + unsigned IP; /* Insertion point used by some routines */ +}; /*****************************************************************************/ -/* Helpers */ +/* Load tracking code */ /*****************************************************************************/ -static unsigned AdjustStackOffset (CodeSeg* S, unsigned Start, unsigned Stop, - unsigned Offs) -/* Adjust the offset for all stack accesses in the range Start to Stop, both - * inclusive. The function returns the number of instructions that have been - * inserted. - */ +static void ClearLoadRegInfo (LoadRegInfo* RI) +/* Clear a LoadRegInfo struct */ { - /* Number of inserted instructions */ - unsigned Inserted = 0; + RI->Flags = LI_NONE; + RI->LoadIndex = -1; + RI->XferIndex = -1; + RI->Offs = 0; +} + - /* Walk over all entries */ - unsigned I = Start; - while (I <= Stop) { - CodeEntry* E = CS_GetEntry (S, I); +static void FinalizeLoadRegInfo (LoadRegInfo* RI, CodeSeg* S) +/* Prepare a LoadRegInfo struct for use */ +{ + /* Get the entries */ + if (RI->LoadIndex >= 0) { + RI->LoadEntry = CS_GetEntry (S, RI->LoadIndex); + } else { + RI->LoadEntry = 0; + } + if (RI->XferIndex >= 0) { + RI->XferEntry = CS_GetEntry (S, RI->XferIndex); + } else { + RI->XferEntry = 0; + } +} - int NeedCorrection = 0; - if ((E->Use & REG_SP) != 0) { - /* Check for some things that should not happen */ - CHECK (E->AM == AM65_ZP_INDY || E->RI->In.RegY >= (short) Offs); - CHECK (strcmp (E->Arg, "sp") == 0); - /* We need to correct this one */ - NeedCorrection = 1; +static void ClearLoadInfo (LoadInfo* LI) +/* Clear a LoadInfo struct */ +{ + ClearLoadRegInfo (&LI->A); + ClearLoadRegInfo (&LI->X); + ClearLoadRegInfo (&LI->Y); +} - } else if (CE_IsCallTo (E, "ldaxysp")) { - /* We need to correct this one */ - NeedCorrection = 1; +static void AdjustLoadRegInfo (LoadRegInfo* RI, int Index, int Change) +/* Adjust a load register info struct after deleting or inserting an entry +** with a given index +*/ +{ + CHECK (abs (Change) == 1); + if (Change < 0) { + /* Deletion */ + if (Index < RI->LoadIndex) { + --RI->LoadIndex; + } else if (Index == RI->LoadIndex) { + /* Has been removed */ + RI->LoadIndex = -1; + RI->LoadEntry = 0; + } + if (Index < RI->XferIndex) { + --RI->XferIndex; + } else if (Index == RI->XferIndex) { + /* Has been removed */ + RI->XferIndex = -1; + RI->XferEntry = 0; + } + } else { + /* Insertion */ + if (Index <= RI->LoadIndex) { + ++RI->LoadIndex; + } + if (Index <= RI->XferIndex) { + ++RI->XferIndex; } + } +} - if (NeedCorrection) { - CodeEntry* P; - /* If the Y register value is needed later, we have to reload the - * register after changing it. - */ - int NeedY = RegYUsed (S, I+1); - unsigned YVal = E->RI->In.RegY; +static void FinalizeLoadInfo (LoadInfo* LI, CodeSeg* S) +/* Prepare a LoadInfo struct for use */ +{ + /* Get the entries */ + FinalizeLoadRegInfo (&LI->A, S); + FinalizeLoadRegInfo (&LI->X, S); + FinalizeLoadRegInfo (&LI->Y, S); +} + - /* Get the code entry before this one. If it's a LDY, adjust the - * value. - */ - P = CS_GetPrevEntry (S, I); - if (P && P->OPC == OP65_LDY && CE_IsConstImm (P)) { - /* The Y load is just before the stack access, adjust it */ - CE_SetNumArg (P, P->Num - Offs); +static void AdjustLoadInfo (LoadInfo* LI, int Index, int Change) +/* Adjust a load info struct after deleting entry with a given index */ +{ + AdjustLoadRegInfo (&LI->A, Index, Change); + AdjustLoadRegInfo (&LI->X, Index, Change); + AdjustLoadRegInfo (&LI->Y, Index, Change); +} - } else { - /* Insert a new load instruction before the stack access */ - const char* Arg = MakeHexArg (YVal - Offs); - CodeEntry* X = NewCodeEntry (OP65_LDY, AM65_IMM, Arg, 0, E->LI); - CS_InsertEntry (S, X, I); - /* One more inserted entries */ - ++Inserted; - ++Stop; +static void HonourUseAndChg (LoadRegInfo* RI, unsigned Reg, const CodeEntry* E) +/* Honour use and change flags for an instruction */ +{ + if (E->Chg & Reg) { + ClearLoadRegInfo (RI); + } else if ((E->Use & Reg) && RI->LoadIndex >= 0) { + RI->Flags |= LI_DONT_REMOVE; + } +} - /* Be sure to skip the stack access for the next round */ - ++I; - } - /* If we need the value of Y later, be sure to reload it */ - if (NeedY) { - const char* Arg = MakeHexArg (YVal); - CodeEntry* X = NewCodeEntry (OP65_LDY, AM65_IMM, Arg, 0, E->LI); - CS_InsertEntry (S, X, I+1); +static void TrackLoads (LoadInfo* LI, CodeEntry* E, int I) +/* Track loads for a code entry */ +{ + if (E->Info & OF_LOAD) { - /* One more inserted entries */ - ++Inserted; - ++Stop; + LoadRegInfo* RI = 0; - /* Skip this instruction int the next round */ - ++I; - } - } + /* Determine, which register was loaded */ + if (E->Chg & REG_A) { + RI = &LI->A; + } else if (E->Chg & REG_X) { + RI = &LI->X; + } else if (E->Chg & REG_Y) { + RI = &LI->Y; + } + CHECK (RI != 0); + + /* If we had a load or xfer op before, this is a duplicate load which + ** can cause problems if it encountered between the pushax and the op, + ** so remember it. + */ + if (RI->LoadIndex >= 0 || RI->XferIndex >= 0) { + RI->Flags |= LI_DUP_LOAD; + } - /* Next entry */ - ++I; - } + /* Remember the load */ + RI->LoadIndex = I; + RI->XferIndex = -1; - /* Return the number of inserted entries */ - return Inserted; + /* Set load flags */ + RI->Flags &= ~(LI_DIRECT | LI_RELOAD_Y); + if (E->AM == AM65_IMM || E->AM == AM65_ZP || E->AM == AM65_ABS) { + /* These insns are all ok and replaceable */ + RI->Flags |= LI_DIRECT; + } else if (E->AM == AM65_ZP_INDY && + RegValIsKnown (E->RI->In.RegY) && + strcmp (E->Arg, "sp") == 0) { + /* A load from the stack with known offset is also ok, but in this + ** case we must reload the index register later. Please note that + ** a load indirect via other zero page locations is not ok, since + ** these locations may change between the push and the actual + ** operation. + */ + RI->Offs = (unsigned char) E->RI->In.RegY; + RI->Flags |= (LI_DIRECT | LI_RELOAD_Y); + } + + + } else if (E->Info & OF_XFR) { + + /* Determine source and target of the transfer and handle the TSX insn */ + LoadRegInfo* Src; + LoadRegInfo* Tgt; + switch (E->OPC) { + case OP65_TAX: Src = &LI->A; Tgt = &LI->X; break; + case OP65_TAY: Src = &LI->A; Tgt = &LI->Y; break; + case OP65_TXA: Src = &LI->X; Tgt = &LI->A; break; + case OP65_TYA: Src = &LI->Y; Tgt = &LI->A; break; + case OP65_TSX: ClearLoadRegInfo (&LI->X); return; + case OP65_TXS: return; + default: Internal ("Unknown XFR insn in TrackLoads"); + } + + /* If we had a load or xfer op before, this is a duplicate load which + ** can cause problems if it encountered between the pushax and the op, + ** so remember it. + */ + if (Tgt->LoadIndex >= 0 || Tgt->XferIndex >= 0) { + Tgt->Flags |= LI_DUP_LOAD; + } + + /* Transfer the data */ + Tgt->LoadIndex = Src->LoadIndex; + Tgt->XferIndex = I; + Tgt->Offs = Src->Offs; + Tgt->Flags &= ~(LI_DIRECT | LI_RELOAD_Y); + Tgt->Flags |= Src->Flags & (LI_DIRECT | LI_RELOAD_Y); + + } else if (CE_IsCallTo (E, "ldaxysp") && RegValIsKnown (E->RI->In.RegY)) { + + /* If we had a load or xfer op before, this is a duplicate load which + ** can cause problems if it encountered between the pushax and the op, + ** so remember it for both registers involved. + */ + if (LI->A.LoadIndex >= 0 || LI->A.XferIndex >= 0) { + LI->A.Flags |= LI_DUP_LOAD; + } + if (LI->X.LoadIndex >= 0 || LI->X.XferIndex >= 0) { + LI->X.Flags |= LI_DUP_LOAD; + } + + /* Both registers set, Y changed */ + LI->A.LoadIndex = I; + LI->A.XferIndex = -1; + LI->A.Flags |= (LI_DIRECT | LI_RELOAD_Y); + LI->A.Offs = (unsigned char) E->RI->In.RegY - 1; + + LI->X.LoadIndex = I; + LI->X.XferIndex = -1; + LI->X.Flags |= (LI_DIRECT | LI_RELOAD_Y); + LI->X.Offs = (unsigned char) E->RI->In.RegY; + + ClearLoadRegInfo (&LI->Y); + } else { + HonourUseAndChg (&LI->A, REG_A, E); + HonourUseAndChg (&LI->X, REG_X, E); + HonourUseAndChg (&LI->Y, REG_Y, E); + } } -static void InsertEntry (StackOpData* D, CodeEntry* E, unsigned Index) +/*****************************************************************************/ +/* Helpers */ +/*****************************************************************************/ + + + +static void InsertEntry (StackOpData* D, CodeEntry* E, int Index) /* Insert a new entry. Depending on Index, D->PushIndex and D->OpIndex will - * be adjusted by this function. - */ +** be adjusted by this function. +*/ { /* Insert the entry into the code segment */ CS_InsertEntry (D->Code, E, Index); + /* Adjust register loads if necessary */ + AdjustLoadInfo (&D->Lhs, Index, 1); + AdjustLoadInfo (&D->Rhs, Index, 1); + /* Adjust the indices if necessary */ if (D->PushEntry && Index <= D->PushIndex) { ++D->PushIndex; @@ -191,15 +399,19 @@ static void InsertEntry (StackOpData* D, CodeEntry* E, unsigned Index) -static void DelEntry (StackOpData* D, unsigned Index) +static void DelEntry (StackOpData* D, int Index) /* Delete an entry. Depending on Index, D->PushIndex and D->OpIndex will be - * adjusted by this function, and PushEntry/OpEntry may get invalidated. - */ +** adjusted by this function, and PushEntry/OpEntry may get invalidated. +*/ { /* Delete the entry from the code segment */ CS_DelEntry (D->Code, Index); - /* Adjust the indices if necessary */ + /* Adjust register loads if necessary */ + AdjustLoadInfo (&D->Lhs, Index, -1); + AdjustLoadInfo (&D->Rhs, Index, -1); + + /* Adjust the other indices if necessary */ if (Index < D->PushIndex) { --D->PushIndex; } else if (Index == D->PushIndex) { @@ -214,109 +426,288 @@ static void DelEntry (StackOpData* D, unsigned Index) -static void CheckDirectOp (StackOpData* D) -/* Check if the given entry is a lda instruction with an addressing mode - * that allows us to replace it by another operation (like ora). If so, we may - * use this location for the or and must not save the value in the zero - * page location. - */ +static void AdjustStackOffset (StackOpData* D, unsigned Offs) +/* Adjust the offset for all stack accesses in the range PushIndex to OpIndex. +** OpIndex is adjusted according to the insertions. +*/ { - /* We need the entry before the push */ - CodeEntry* E; - CHECK ((E = D->PrevEntry) != 0); + /* Walk over all entries */ + int I = D->PushIndex + 1; + while (I < D->OpIndex) { - if (E->OPC == OP65_LDA) { - if (E->AM == AM65_IMM || E->AM == AM65_ZP || E->AM == AM65_ABS) { - /* These insns are all ok and replaceable */ - D->Flags |= OP_DIRECT; - } else if (E->AM == AM65_ZP_INDY && RegValIsKnown (E->RI->In.RegY) && - strcmp (E->Arg, "sp") == 0) { - /* A load from the stack with known offset is also ok, but in this - * case we must reload the index register later. Please note that - * a load indirect via other zero page locations is not ok, since - * these locations may change between the push and the actual - * operation. - */ - D->Flags |= (OP_DIRECT | OP_RELOAD_Y); + CodeEntry* E = CS_GetEntry (D->Code, I); + + /* Check if this entry does a stack access, and if so, if it's a plain + ** load from stack, since this is needed later. + */ + int Correction = 0; + if ((E->Use & REG_SP) != 0) { + + /* Check for some things that should not happen */ + CHECK (E->AM == AM65_ZP_INDY || E->RI->In.RegY >= (short) Offs); + CHECK (strcmp (E->Arg, "sp") == 0); + /* We need to correct this one */ + Correction = (E->OPC == OP65_LDA)? 2 : 1; + + } else if (CE_IsCallTo (E, "ldaxysp")) { + /* We need to correct this one */ + Correction = 1; + } + + if (Correction) { + /* Get the code entry before this one. If it's a LDY, adjust the + ** value. + */ + CodeEntry* P = CS_GetPrevEntry (D->Code, I); + if (P && P->OPC == OP65_LDY && CE_IsConstImm (P)) { + /* The Y load is just before the stack access, adjust it */ + CE_SetNumArg (P, P->Num - Offs); + } else { + /* Insert a new load instruction before the stack access */ + const char* Arg = MakeHexArg (E->RI->In.RegY - Offs); + CodeEntry* X = NewCodeEntry (OP65_LDY, AM65_IMM, Arg, 0, E->LI); + InsertEntry (D, X, I++); + } + + /* If we need the value of Y later, be sure to reload it */ + if (RegYUsed (D->Code, I+1)) { + CodeEntry* N; + const char* Arg = MakeHexArg (E->RI->In.RegY); + if (Correction == 2 && (N = CS_GetNextEntry(D->Code, I)) != 0 && + ((N->Info & OF_ZBRA) != 0) && N->JumpTo != 0) { + /* The Y register is used but the load instruction loads A + ** and is followed by a branch that evaluates the zero flag. + ** This means that we cannot just insert the load insn + ** for the Y register at this place, because it would + ** destroy the Z flag. Instead place load insns at the + ** target of the branch and after it. + ** Note: There is a chance that this code won't work. The + ** jump may be a backwards jump (in which case the stack + ** offset has already been adjusted) or there may be other + ** instructions between the load and the conditional jump. + ** Currently the compiler does not generate such code, but + ** it is possible to force the optimizer into something + ** invalid by use of inline assembler. + */ + + /* Add load insn after the branch */ + CodeEntry* X = NewCodeEntry (OP65_LDY, AM65_IMM, Arg, 0, E->LI); + InsertEntry (D, X, I+2); + + /* Add load insn before branch target */ + CodeEntry* Y = NewCodeEntry (OP65_LDY, AM65_IMM, Arg, 0, E->LI); + int J = CS_GetEntryIndex (D->Code, N->JumpTo->Owner); + CHECK (J > I); /* Must not happen */ + InsertEntry (D, Y, J); + + /* Move the label to the new insn */ + CodeLabel* L = CS_GenLabel (D->Code, Y); + CS_MoveLabelRef (D->Code, N, L); + } else { + CodeEntry* X = NewCodeEntry (OP65_LDY, AM65_IMM, Arg, 0, E->LI); + InsertEntry (D, X, I+1); + /* Skip this instruction in the next round */ + ++I; + } + } } + + /* Next entry */ + ++I; + } + + /* If we have rhs load insns that load from stack, we'll have to adjust + ** the offsets for these also. + */ + if (D->Rhs.A.Flags & LI_RELOAD_Y) { + D->Rhs.A.Offs -= Offs; + } + if (D->Rhs.X.Flags & LI_RELOAD_Y) { + D->Rhs.X.Offs -= Offs; } } -static void ReplacePushByStore (StackOpData* D) -/* Replace the call to the push subroutine by a store into the zero page - * location (actually, the push is not replaced, because we need it for - * later, but the name is still ok since the push will get removed at the - * end of each routine). - */ +static void AddStoreA (StackOpData* D) +/* Add a store to zero page after the push insn */ { - CodeEntry* X; + CodeEntry* X = NewCodeEntry (OP65_STA, AM65_ZP, D->ZPLo, 0, D->PushEntry->LI); + InsertEntry (D, X, D->PushIndex+1); +} - /* Store the value into the zeropage instead of pushing it */ - X = NewCodeEntry (OP65_STX, AM65_ZP, D->ZPHi, 0, D->PushEntry->LI); + + +static void AddStoreX (StackOpData* D) +/* Add a store to zero page after the push insn */ +{ + CodeEntry* X = NewCodeEntry (OP65_STX, AM65_ZP, D->ZPHi, 0, D->PushEntry->LI); InsertEntry (D, X, D->PushIndex+1); - if ((D->Flags & OP_DIRECT) == 0) { - X = NewCodeEntry (OP65_STA, AM65_ZP, D->ZPLo, 0, D->PushEntry->LI); - InsertEntry (D, X, D->PushIndex+1); +} + + + +static void ReplacePushByStore (StackOpData* D) +/* Replace the call to the push subroutine by a store into the zero page +** location (actually, the push is not replaced, because we need it for +** later, but the name is still ok since the push will get removed at the +** end of each routine). +*/ +{ + /* Store the value into the zeropage instead of pushing it. Check high + ** byte first so that the store is later in A/X order. + */ + if ((D->Lhs.X.Flags & LI_DIRECT) == 0) { + AddStoreX (D); + } + if ((D->Lhs.A.Flags & LI_DIRECT) == 0) { + AddStoreA (D); } } -static void AddOpLow (StackOpData* D, opc_t OPC) +static void AddOpLow (StackOpData* D, opc_t OPC, LoadInfo* LI) /* Add an op for the low byte of an operator. This function honours the - * OP_DIRECT and OP_RELOAD_Y flags and generates the necessary instructions. - * All code is inserted at the current insertion point. - */ +** OP_DIRECT and OP_RELOAD_Y flags and generates the necessary instructions. +** All code is inserted at the current insertion point. +*/ { CodeEntry* X; - if ((D->Flags & OP_DIRECT) != 0) { - /* Op with a variable location. If the location is on the stack, we - * need to reload the Y register. - */ - if ((D->Flags & OP_RELOAD_Y) != 0) { - const char* Arg = MakeHexArg (D->PrevEntry->RI->In.RegY); + if ((LI->A.Flags & LI_DIRECT) != 0) { + /* Op with a variable location. If the location is on the stack, we + ** need to reload the Y register. + */ + if ((LI->A.Flags & LI_RELOAD_Y) == 0) { + + /* opc ... */ + CodeEntry* LoadA = LI->A.LoadEntry; + X = NewCodeEntry (OPC, LoadA->AM, LoadA->Arg, 0, D->OpEntry->LI); + InsertEntry (D, X, D->IP++); + + } else { + + /* ldy #offs */ + const char* Arg = MakeHexArg (LI->A.Offs); X = NewCodeEntry (OP65_LDY, AM65_IMM, Arg, 0, D->OpEntry->LI); InsertEntry (D, X, D->IP++); + + /* opc (sp),y */ + X = NewCodeEntry (OPC, AM65_ZP_INDY, "sp", 0, D->OpEntry->LI); + InsertEntry (D, X, D->IP++); + } - X = NewCodeEntry (OPC, D->PrevEntry->AM, D->PrevEntry->Arg, 0, D->OpEntry->LI); + + /* In both cases, we can remove the load */ + LI->A.Flags |= LI_REMOVE; + } else { - /* Op with temp storage */ - X = NewCodeEntry (OPC, AM65_ZP, D->ZPLo, 0, D->OpEntry->LI); + + /* Op with temp storage */ + X = NewCodeEntry (OPC, AM65_ZP, D->ZPLo, 0, D->OpEntry->LI); + InsertEntry (D, X, D->IP++); + } - InsertEntry (D, X, D->IP++); } -static void AddOpHigh (StackOpData* D, opc_t OPC) +static void AddOpHigh (StackOpData* D, opc_t OPC, LoadInfo* LI, int KeepResult) /* Add an op for the high byte of an operator. Special cases (constant values - * or similar) have to be checked separately, the function covers only the - * generic case. Code is inserted at the insertion point. - */ +** or similar) have to be checked separately, the function covers only the +** generic case. Code is inserted at the insertion point. +*/ { CodeEntry* X; - /* High byte is unknown */ - X = NewCodeEntry (OP65_STA, AM65_ZP, D->ZPLo, 0, D->OpEntry->LI); - InsertEntry (D, X, D->IP++); + if (KeepResult) { + /* pha */ + X = NewCodeEntry (OP65_PHA, AM65_IMP, 0, 0, D->OpEntry->LI); + InsertEntry (D, X, D->IP++); + } + + /* txa */ X = NewCodeEntry (OP65_TXA, AM65_IMP, 0, 0, D->OpEntry->LI); InsertEntry (D, X, D->IP++); - X = NewCodeEntry (OPC, AM65_ZP, D->ZPHi, 0, D->OpEntry->LI); - InsertEntry (D, X, D->IP++); - X = NewCodeEntry (OP65_TAX, AM65_IMP, 0, 0, D->OpEntry->LI); - InsertEntry (D, X, D->IP++); - X = NewCodeEntry (OP65_LDA, AM65_ZP, D->ZPLo, 0, D->OpEntry->LI); - InsertEntry (D, X, D->IP++); + + if ((LI->X.Flags & LI_DIRECT) != 0) { + + if ((LI->X.Flags & LI_RELOAD_Y) == 0) { + + /* opc xxx */ + CodeEntry* LoadX = LI->X.LoadEntry; + X = NewCodeEntry (OPC, LoadX->AM, LoadX->Arg, 0, D->OpEntry->LI); + InsertEntry (D, X, D->IP++); + + } else { + + /* ldy #const */ + const char* Arg = MakeHexArg (LI->X.Offs); + X = NewCodeEntry (OP65_LDY, AM65_IMM, Arg, 0, D->OpEntry->LI); + InsertEntry (D, X, D->IP++); + + /* opc (sp),y */ + X = NewCodeEntry (OPC, AM65_ZP_INDY, "sp", 0, D->OpEntry->LI); + InsertEntry (D, X, D->IP++); + } + + /* In both cases, we can remove the load */ + LI->X.Flags |= LI_REMOVE; + + } else { + /* opc zphi */ + X = NewCodeEntry (OPC, AM65_ZP, D->ZPHi, 0, D->OpEntry->LI); + InsertEntry (D, X, D->IP++); + } + + if (KeepResult) { + /* tax */ + X = NewCodeEntry (OP65_TAX, AM65_IMP, 0, 0, D->OpEntry->LI); + InsertEntry (D, X, D->IP++); + + /* pla */ + X = NewCodeEntry (OP65_PLA, AM65_IMP, 0, 0, D->OpEntry->LI); + InsertEntry (D, X, D->IP++); + } +} + + + +static void RemoveRegLoads (StackOpData* D, LoadInfo* LI) +/* Remove register load insns */ +{ + /* Both registers may be loaded with one insn, but DelEntry will in this + ** case clear the other one. + */ + if ((LI->A.Flags & (LI_REMOVE | LI_DONT_REMOVE)) == LI_REMOVE) { + if (LI->A.LoadIndex >= 0) { + DelEntry (D, LI->A.LoadIndex); + } + if (LI->A.XferIndex >= 0) { + DelEntry (D, LI->A.XferIndex); + } + } + if ((LI->X.Flags & (LI_REMOVE | LI_DONT_REMOVE)) == LI_REMOVE) { + if (LI->X.LoadIndex >= 0) { + DelEntry (D, LI->X.LoadIndex); + } + if (LI->X.XferIndex >= 0) { + DelEntry (D, LI->X.XferIndex); + } + } } -static void RemovePushAndOp (StackOpData* D) -/* Remove the call to pushax and the call to the operator subroutine */ +static void RemoveRemainders (StackOpData* D) +/* Remove the code that is unnecessary after translation of the sequence */ { + /* Remove the register loads for lhs and rhs */ + RemoveRegLoads (D, &D->Lhs); + RemoveRegLoads (D, &D->Rhs); + + /* Remove the push and the operator routine */ DelEntry (D, D->OpIndex); DelEntry (D, D->PushIndex); } @@ -324,118 +715,722 @@ static void RemovePushAndOp (StackOpData* D) static int IsRegVar (StackOpData* D) -/* If the value pushed is that of a register variable, replace ZPLo and ZPHi - * in the given StackOpData struct by the register variables and return true. - * Otherwise leave D untouched and return false. - */ -{ - CodeEntry* P; - - if (D->PushIndex >= 2 && - (P = D->PrevEntry) != 0 && - P->OPC == OP65_LDX && - P->AM == AM65_ZP && - strncmp (P->Arg, "regbank+", 7) == 0 && - IsDigit (P->Arg[8]) && - (P = CS_GetEntry (D->Code, D->PushIndex-2)) != 0 && - P->OPC == OP65_LDA && - P->AM == AM65_ZP && - strncmp (P->Arg, "regbank+", 7) == 0 && - IsDigit (P->Arg[8])) { - /* Ok, it loads the register variable */ - D->ZPHi = D->PrevEntry->Arg; - D->ZPLo = P->Arg; - return 1; - } else { +/* If the value pushed is that of a zeropage variable, replace ZPLo and ZPHi +** in the given StackOpData struct by the variable and return true. Otherwise +** leave D untouched and return false. +*/ +{ + CodeEntry* LoadA = D->Lhs.A.LoadEntry; + CodeEntry* LoadX = D->Lhs.X.LoadEntry; + unsigned Len; + + /* Must have both load insns */ + if (LoadA == 0 || LoadX == 0) { + return 0; + } + + /* Must be loads from zp */ + if (LoadA->AM != AM65_ZP || LoadX->AM != AM65_ZP) { return 0; } + + /* Must be the same zp loc with high byte in X */ + Len = strlen (LoadA->Arg); + if (strncmp (LoadA->Arg, LoadX->Arg, Len) != 0 || + strcmp (LoadX->Arg + Len, "+1") != 0) { + return 0; + } + + /* Use the zero page location directly */ + D->ZPLo = LoadA->Arg; + D->ZPHi = LoadX->Arg; + return 1; } /*****************************************************************************/ -/* Actual optimization functions */ +/* Actual optimization functions */ /*****************************************************************************/ -static unsigned Opt___bzero (StackOpData* D) -/* Optimize the __bzero sequence if possible */ +static unsigned Opt_toseqax_tosneax (StackOpData* D, const char* BoolTransformer) +/* Optimize the toseqax and tosneax sequences. */ { CodeEntry* X; - const char* Arg; - CodeLabel* L; + CodeLabel* L; - /* Check if we're using a register variable */ - if (!IsRegVar (D)) { - /* Store the value into the zeropage instead of pushing it */ - ReplacePushByStore (D); - } + /* Create a call to the boolean transformer function and a label for this + ** insn. This is needed for all variants. Other insns are inserted *before* + ** the call. + */ + X = NewCodeEntry (OP65_JSR, AM65_ABS, BoolTransformer, 0, D->OpEntry->LI); + InsertEntry (D, X, D->OpIndex + 1); + L = CS_GenLabel (D->Code, X); - /* If the return value of __bzero is used, we have to add code to reload - * a/x from the pointer variable. - */ - if (RegAXUsed (D->Code, D->OpIndex+1)) { - X = NewCodeEntry (OP65_LDA, AM65_ZP, D->ZPLo, 0, D->OpEntry->LI); - InsertEntry (D, X, D->OpIndex+1); - X = NewCodeEntry (OP65_LDX, AM65_ZP, D->ZPHi, 0, D->OpEntry->LI); - InsertEntry (D, X, D->OpIndex+2); - } + /* If the lhs is direct (but not stack relative), encode compares with lhs + ** effectively reverting the order (which doesn't matter for ==). + */ + if ((D->Lhs.A.Flags & (LI_DIRECT | LI_RELOAD_Y)) == LI_DIRECT && + (D->Lhs.X.Flags & (LI_DIRECT | LI_RELOAD_Y)) == LI_DIRECT) { - /* X is always zero, A contains the size of the data area to zero. - * Note: A may be zero, in which case the operation is null op. - */ - if (D->OpEntry->RI->In.RegA != 0) { + CodeEntry* LoadX = D->Lhs.X.LoadEntry; + CodeEntry* LoadA = D->Lhs.A.LoadEntry; - /* The value of A is known */ - if (D->OpEntry->RI->In.RegA <= 0x81) { + D->IP = D->OpIndex+1; - /* Loop using the sign bit */ - X = NewCodeEntry (OP65_LDA, AM65_IMM, "$00", 0, D->OpEntry->LI); - InsertEntry (D, X, D->OpIndex+1); + /* cpx */ + X = NewCodeEntry (OP65_CPX, LoadX->AM, LoadX->Arg, 0, D->OpEntry->LI); + InsertEntry (D, X, D->IP++); - Arg = MakeHexArg (D->OpEntry->RI->In.RegA - 1); - X = NewCodeEntry (OP65_LDY, AM65_IMM, Arg, 0, D->OpEntry->LI); - InsertEntry (D, X, D->OpIndex+2); + /* bne L */ + X = NewCodeEntry (OP65_BNE, AM65_BRA, L->Name, L, D->OpEntry->LI); + InsertEntry (D, X, D->IP++); - X = NewCodeEntry (OP65_STA, AM65_ZP_INDY, D->ZPLo, 0, D->OpEntry->LI); - InsertEntry (D, X, D->OpIndex+3); - L = CS_GenLabel (D->Code, X); + /* cmp */ + X = NewCodeEntry (OP65_CMP, LoadA->AM, LoadA->Arg, 0, D->OpEntry->LI); + InsertEntry (D, X, D->IP++); + + /* Lhs load entries can be removed */ + D->Lhs.X.Flags |= LI_REMOVE; + D->Lhs.A.Flags |= LI_REMOVE; + + } else if ((D->Rhs.A.Flags & (LI_DIRECT | LI_RELOAD_Y)) == LI_DIRECT && + (D->Rhs.X.Flags & (LI_DIRECT | LI_RELOAD_Y)) == LI_DIRECT) { + + CodeEntry* LoadX = D->Rhs.X.LoadEntry; + CodeEntry* LoadA = D->Rhs.A.LoadEntry; + + D->IP = D->OpIndex+1; + + /* cpx */ + X = NewCodeEntry (OP65_CPX, LoadX->AM, LoadX->Arg, 0, D->OpEntry->LI); + InsertEntry (D, X, D->IP++); + + /* bne L */ + X = NewCodeEntry (OP65_BNE, AM65_BRA, L->Name, L, D->OpEntry->LI); + InsertEntry (D, X, D->IP++); + + /* cmp */ + X = NewCodeEntry (OP65_CMP, LoadA->AM, LoadA->Arg, 0, D->OpEntry->LI); + InsertEntry (D, X, D->IP++); + + /* Rhs load entries can be removed */ + D->Rhs.X.Flags |= LI_REMOVE; + D->Rhs.A.Flags |= LI_REMOVE; + + } else if ((D->Rhs.A.Flags & LI_DIRECT) != 0 && + (D->Rhs.X.Flags & LI_DIRECT) != 0) { + + D->IP = D->OpIndex+1; + + /* Add operand for low byte */ + AddOpLow (D, OP65_CMP, &D->Rhs); + + /* bne L */ + X = NewCodeEntry (OP65_BNE, AM65_BRA, L->Name, L, D->OpEntry->LI); + InsertEntry (D, X, D->IP++); + + /* Add operand for high byte */ + AddOpHigh (D, OP65_CMP, &D->Rhs, 0); + + } else { + + /* Save lhs into zeropage, then compare */ + AddStoreX (D); + AddStoreA (D); + + D->IP = D->OpIndex+1; + + /* cpx */ + X = NewCodeEntry (OP65_CPX, AM65_ZP, D->ZPHi, 0, D->OpEntry->LI); + InsertEntry (D, X, D->IP++); + + /* bne L */ + X = NewCodeEntry (OP65_BNE, AM65_BRA, L->Name, L, D->OpEntry->LI); + InsertEntry (D, X, D->IP++); + + /* cmp */ + X = NewCodeEntry (OP65_CMP, AM65_ZP, D->ZPLo, 0, D->OpEntry->LI); + InsertEntry (D, X, D->IP++); + + } + + /* Remove the push and the call to the tosgeax function */ + RemoveRemainders (D); + + /* We changed the sequence */ + return 1; +} + + + +static unsigned Opt_tosshift (StackOpData* D, const char* Name) +/* Optimize shift sequences. */ +{ + CodeEntry* X; + + /* Store the value into the zeropage instead of pushing it */ + ReplacePushByStore (D); + + /* If the lhs is direct (but not stack relative), we can just reload the + ** data later. + */ + if ((D->Lhs.A.Flags & (LI_DIRECT | LI_RELOAD_Y)) == LI_DIRECT && + (D->Lhs.X.Flags & (LI_DIRECT | LI_RELOAD_Y)) == LI_DIRECT) { + + CodeEntry* LoadX = D->Lhs.X.LoadEntry; + CodeEntry* LoadA = D->Lhs.A.LoadEntry; + + /* Inline the shift */ + D->IP = D->OpIndex+1; + + /* tay */ + X = NewCodeEntry (OP65_TAY, AM65_IMP, 0, 0, D->OpEntry->LI); + InsertEntry (D, X, D->IP++); + + /* lda */ + X = NewCodeEntry (OP65_LDA, LoadA->AM, LoadA->Arg, 0, D->OpEntry->LI); + InsertEntry (D, X, D->IP++); + + /* ldx */ + X = NewCodeEntry (OP65_LDX, LoadX->AM, LoadX->Arg, 0, D->OpEntry->LI); + InsertEntry (D, X, D->IP++); + + /* Lhs load entries can be removed */ + D->Lhs.X.Flags |= LI_REMOVE; + D->Lhs.A.Flags |= LI_REMOVE; + + } else { + + /* Save lhs into zeropage and reload later */ + AddStoreX (D); + AddStoreA (D); + + /* Be sure to setup IP after adding the stores, otherwise it will get + ** messed up. + */ + D->IP = D->OpIndex+1; + + /* tay */ + X = NewCodeEntry (OP65_TAY, AM65_IMP, 0, 0, D->OpEntry->LI); + InsertEntry (D, X, D->IP++); + + /* lda zp */ + X = NewCodeEntry (OP65_LDA, AM65_ZP, D->ZPLo, 0, D->OpEntry->LI); + InsertEntry (D, X, D->IP++); + + /* ldx zp+1 */ + X = NewCodeEntry (OP65_LDX, AM65_ZP, D->ZPHi, 0, D->OpEntry->LI); + InsertEntry (D, X, D->IP++); + + } + + /* jsr shlaxy/aslaxy/whatever */ + X = NewCodeEntry (OP65_JSR, AM65_ABS, Name, 0, D->OpEntry->LI); + InsertEntry (D, X, D->IP++); + + /* Remove the push and the call to the shift function */ + RemoveRemainders (D); + + /* We changed the sequence */ + return 1; +} + + + +static unsigned Opt___bzero (StackOpData* D) +/* Optimize the __bzero sequence */ +{ + CodeEntry* X; + const char* Arg; + CodeLabel* L; + + /* Check if we're using a register variable */ + if (!IsRegVar (D)) { + /* Store the value into the zeropage instead of pushing it */ + AddStoreX (D); + AddStoreA (D); + } + + /* If the return value of __bzero is used, we have to add code to reload + ** a/x from the pointer variable. + */ + if (RegAXUsed (D->Code, D->OpIndex+1)) { + X = NewCodeEntry (OP65_LDA, AM65_ZP, D->ZPLo, 0, D->OpEntry->LI); + InsertEntry (D, X, D->OpIndex+1); + X = NewCodeEntry (OP65_LDX, AM65_ZP, D->ZPHi, 0, D->OpEntry->LI); + InsertEntry (D, X, D->OpIndex+2); + } + + /* X is always zero, A contains the size of the data area to zero. + ** Note: A may be zero, in which case the operation is null op. + */ + if (D->OpEntry->RI->In.RegA != 0) { + + /* lda #$00 */ + X = NewCodeEntry (OP65_LDA, AM65_IMM, "$00", 0, D->OpEntry->LI); + InsertEntry (D, X, D->OpIndex+1); + + /* The value of A is known */ + if (D->OpEntry->RI->In.RegA <= 0x81) { + + /* Loop using the sign bit */ + + /* ldy #count-1 */ + Arg = MakeHexArg (D->OpEntry->RI->In.RegA - 1); + X = NewCodeEntry (OP65_LDY, AM65_IMM, Arg, 0, D->OpEntry->LI); + InsertEntry (D, X, D->OpIndex+2); + + /* L: sta (zp),y */ + X = NewCodeEntry (OP65_STA, AM65_ZP_INDY, D->ZPLo, 0, D->OpEntry->LI); + InsertEntry (D, X, D->OpIndex+3); + L = CS_GenLabel (D->Code, X); + + /* dey */ + X = NewCodeEntry (OP65_DEY, AM65_IMP, 0, 0, D->OpEntry->LI); + InsertEntry (D, X, D->OpIndex+4); + + /* bpl L */ + X = NewCodeEntry (OP65_BPL, AM65_BRA, L->Name, L, D->OpEntry->LI); + InsertEntry (D, X, D->OpIndex+5); + + } else { + + /* Loop using an explicit compare */ + + /* ldy #$00 */ + X = NewCodeEntry (OP65_LDY, AM65_IMM, "$00", 0, D->OpEntry->LI); + InsertEntry (D, X, D->OpIndex+2); + + /* L: sta (zp),y */ + X = NewCodeEntry (OP65_STA, AM65_ZP_INDY, D->ZPLo, 0, D->OpEntry->LI); + InsertEntry (D, X, D->OpIndex+3); + L = CS_GenLabel (D->Code, X); + + /* iny */ + X = NewCodeEntry (OP65_INY, AM65_IMP, 0, 0, D->OpEntry->LI); + InsertEntry (D, X, D->OpIndex+4); + + /* cpy #count */ + Arg = MakeHexArg (D->OpEntry->RI->In.RegA); + X = NewCodeEntry (OP65_CPY, AM65_IMM, Arg, 0, D->OpEntry->LI); + InsertEntry (D, X, D->OpIndex+5); + + /* bne L */ + X = NewCodeEntry (OP65_BNE, AM65_BRA, L->Name, L, D->OpEntry->LI); + InsertEntry (D, X, D->OpIndex+6); + } + + } + + /* Remove the push and the call to the __bzero function */ + RemoveRemainders (D); + + /* We changed the sequence */ + return 1; +} + + + +static unsigned Opt_staspidx (StackOpData* D) +/* Optimize the staspidx sequence */ +{ + CodeEntry* X; + + /* Check if we're using a register variable */ + if (!IsRegVar (D)) { + /* Store the value into the zeropage instead of pushing it */ + AddStoreX (D); + AddStoreA (D); + } + + /* Replace the store subroutine call by a direct op */ + X = NewCodeEntry (OP65_STA, AM65_ZP_INDY, D->ZPLo, 0, D->OpEntry->LI); + InsertEntry (D, X, D->OpIndex+1); + + /* Remove the push and the call to the staspidx function */ + RemoveRemainders (D); + + /* We changed the sequence */ + return 1; +} + + + +static unsigned Opt_staxspidx (StackOpData* D) +/* Optimize the staxspidx sequence */ +{ + CodeEntry* X; + + /* Check if we're using a register variable */ + if (!IsRegVar (D)) { + /* Store the value into the zeropage instead of pushing it */ + AddStoreX (D); + AddStoreA (D); + } + + /* Inline the store */ + + /* sta (zp),y */ + X = NewCodeEntry (OP65_STA, AM65_ZP_INDY, D->ZPLo, 0, D->OpEntry->LI); + InsertEntry (D, X, D->OpIndex+1); + + if (RegValIsKnown (D->OpEntry->RI->In.RegY)) { + /* Value of Y is known */ + const char* Arg = MakeHexArg (D->OpEntry->RI->In.RegY + 1); + X = NewCodeEntry (OP65_LDY, AM65_IMM, Arg, 0, D->OpEntry->LI); + } else { + X = NewCodeEntry (OP65_INY, AM65_IMP, 0, 0, D->OpEntry->LI); + } + InsertEntry (D, X, D->OpIndex+2); + + if (RegValIsKnown (D->OpEntry->RI->In.RegX)) { + /* Value of X is known */ + const char* Arg = MakeHexArg (D->OpEntry->RI->In.RegX); + X = NewCodeEntry (OP65_LDA, AM65_IMM, Arg, 0, D->OpEntry->LI); + } else { + /* Value unknown */ + X = NewCodeEntry (OP65_TXA, AM65_IMP, 0, 0, D->OpEntry->LI); + } + InsertEntry (D, X, D->OpIndex+3); + + /* sta (zp),y */ + X = NewCodeEntry (OP65_STA, AM65_ZP_INDY, D->ZPLo, 0, D->OpEntry->LI); + InsertEntry (D, X, D->OpIndex+4); + + /* If we remove staxspidx, we must restore the Y register to what the + ** function would return. + */ + X = NewCodeEntry (OP65_LDY, AM65_IMM, "$00", 0, D->OpEntry->LI); + InsertEntry (D, X, D->OpIndex+5); + + /* Remove the push and the call to the staxspidx function */ + RemoveRemainders (D); + + /* We changed the sequence */ + return 1; +} + + + +static unsigned Opt_tosaddax (StackOpData* D) +/* Optimize the tosaddax sequence */ +{ + CodeEntry* X; + CodeEntry* N; + + /* We need the entry behind the add */ + CHECK (D->NextEntry != 0); + + /* Check if the X register is known and zero when the add is done, and + ** if the add is followed by + ** + ** ldy #$00 + ** jsr ldauidx ; or ldaidx + ** + ** If this is true, the addition does actually add an offset to a pointer + ** before it is dereferenced. Since both subroutines take an offset in Y, + ** we can pass the offset (instead of #$00) and remove the addition + ** alltogether. + */ + if (D->OpEntry->RI->In.RegX == 0 && + D->NextEntry->OPC == OP65_LDY && + CE_IsKnownImm (D->NextEntry, 0) && + !CE_HasLabel (D->NextEntry) && + (N = CS_GetNextEntry (D->Code, D->OpIndex + 1)) != 0 && + (CE_IsCallTo (N, "ldauidx") || + CE_IsCallTo (N, "ldaidx"))) { + + int Signed = (strcmp (N->Arg, "ldaidx") == 0); + + /* Store the value into the zeropage instead of pushing it */ + AddStoreX (D); + AddStoreA (D); + + /* Replace the ldy by a tay. Be sure to create the new entry before + ** deleting the ldy, since we will reference the line info from this + ** insn. + */ + X = NewCodeEntry (OP65_TAY, AM65_IMP, 0, 0, D->NextEntry->LI); + DelEntry (D, D->OpIndex + 1); + InsertEntry (D, X, D->OpIndex + 1); + + /* Replace the call to ldaidx/ldauidx. Since X is already zero, and + ** the ptr is in the zero page location, we just need to load from + ** the pointer, and fix X in case of ldaidx. + */ + X = NewCodeEntry (OP65_LDA, AM65_ZP_INDY, D->ZPLo, 0, N->LI); + DelEntry (D, D->OpIndex + 2); + InsertEntry (D, X, D->OpIndex + 2); + if (Signed) { + + CodeLabel* L; + + /* Add sign extension - N is unused now */ + N = CS_GetNextEntry (D->Code, D->OpIndex + 2); + CHECK (N != 0); + L = CS_GenLabel (D->Code, N); + + X = NewCodeEntry (OP65_BPL, AM65_BRA, L->Name, L, X->LI); + InsertEntry (D, X, D->OpIndex + 3); + + X = NewCodeEntry (OP65_DEX, AM65_IMP, 0, 0, X->LI); + InsertEntry (D, X, D->OpIndex + 4); + } + + } else { + + /* Store the value into the zeropage instead of pushing it */ + ReplacePushByStore (D); + + /* Inline the add */ + D->IP = D->OpIndex+1; + + /* clc */ + X = NewCodeEntry (OP65_CLC, AM65_IMP, 0, 0, D->OpEntry->LI); + InsertEntry (D, X, D->IP++); + + /* Low byte */ + AddOpLow (D, OP65_ADC, &D->Lhs); + + /* High byte */ + if (D->PushEntry->RI->In.RegX == 0) { + + /* The high byte is the value in X plus the carry */ + CodeLabel* L = CS_GenLabel (D->Code, D->NextEntry); + + /* bcc L */ + X = NewCodeEntry (OP65_BCC, AM65_BRA, L->Name, L, D->OpEntry->LI); + InsertEntry (D, X, D->IP++); + + /* inx */ + X = NewCodeEntry (OP65_INX, AM65_IMP, 0, 0, D->OpEntry->LI); + InsertEntry (D, X, D->IP++); + + } else if (D->OpEntry->RI->In.RegX == 0 && + (RegValIsKnown (D->PushEntry->RI->In.RegX) || + (D->Lhs.X.Flags & LI_RELOAD_Y) == 0)) { + + /* The high byte is that of the first operand plus carry */ + CodeLabel* L; + if (RegValIsKnown (D->PushEntry->RI->In.RegX)) { + /* Value of first op high byte is known */ + const char* Arg = MakeHexArg (D->PushEntry->RI->In.RegX); + X = NewCodeEntry (OP65_LDX, AM65_IMM, Arg, 0, D->OpEntry->LI); + } else { + /* Value of first op high byte is unknown. Load from ZP or + ** original storage. + */ + if (D->Lhs.X.Flags & LI_DIRECT) { + CodeEntry* LoadX = D->Lhs.X.LoadEntry; + X = NewCodeEntry (OP65_LDX, LoadX->AM, LoadX->Arg, 0, D->OpEntry->LI); + } else { + X = NewCodeEntry (OP65_LDX, AM65_ZP, D->ZPHi, 0, D->OpEntry->LI); + } + } + InsertEntry (D, X, D->IP++); + + /* bcc label */ + L = CS_GenLabel (D->Code, D->NextEntry); + X = NewCodeEntry (OP65_BCC, AM65_BRA, L->Name, L, D->OpEntry->LI); + InsertEntry (D, X, D->IP++); + + /* inx */ + X = NewCodeEntry (OP65_INX, AM65_IMP, 0, 0, D->OpEntry->LI); + InsertEntry (D, X, D->IP++); + } else { + /* High byte is unknown */ + AddOpHigh (D, OP65_ADC, &D->Lhs, 1); + } + } + + /* Remove the push and the call to the tosaddax function */ + RemoveRemainders (D); + + /* We changed the sequence */ + return 1; +} + + + +static unsigned Opt_tosandax (StackOpData* D) +/* Optimize the tosandax sequence */ +{ + /* Store the value into the zeropage instead of pushing it */ + ReplacePushByStore (D); + + /* Inline the and, low byte */ + D->IP = D->OpIndex + 1; + AddOpLow (D, OP65_AND, &D->Lhs); + + /* High byte */ + AddOpHigh (D, OP65_AND, &D->Lhs, 1); + + /* Remove the push and the call to the tosandax function */ + RemoveRemainders (D); + + /* We changed the sequence */ + return 1; +} + + + +static unsigned Opt_tosaslax (StackOpData* D) +/* Optimize the tosaslax sequence */ +{ + return Opt_tosshift (D, "aslaxy"); +} + + + +static unsigned Opt_tosasrax (StackOpData* D) +/* Optimize the tosasrax sequence */ +{ + return Opt_tosshift (D, "asraxy"); +} + + + +static unsigned Opt_toseqax (StackOpData* D) +/* Optimize the toseqax sequence */ +{ + return Opt_toseqax_tosneax (D, "booleq"); +} + + + +static unsigned Opt_tosgeax (StackOpData* D) +/* Optimize the tosgeax sequence */ +{ + CodeEntry* X; + CodeLabel* L; + + /* Inline the sbc */ + D->IP = D->OpIndex+1; + + /* Must be true because of OP_RHS_LOAD */ + CHECK ((D->Rhs.A.Flags & D->Rhs.X.Flags & LI_DIRECT) != 0); + + /* Add code for low operand */ + AddOpLow (D, OP65_CMP, &D->Rhs); + + /* Add code for high operand */ + AddOpHigh (D, OP65_SBC, &D->Rhs, 0); + + /* eor #$80 */ + X = NewCodeEntry (OP65_EOR, AM65_IMM, "$80", 0, D->OpEntry->LI); + InsertEntry (D, X, D->IP++); + + /* asl a */ + X = NewCodeEntry (OP65_ASL, AM65_ACC, "a", 0, D->OpEntry->LI); + InsertEntry (D, X, D->IP++); + L = CS_GenLabel (D->Code, X); + + /* Insert a bvs L before the eor insn */ + X = NewCodeEntry (OP65_BVS, AM65_BRA, L->Name, L, D->OpEntry->LI); + InsertEntry (D, X, D->IP - 2); + ++D->IP; + + /* lda #$00 */ + X = NewCodeEntry (OP65_LDA, AM65_IMM, "$00", 0, D->OpEntry->LI); + InsertEntry (D, X, D->IP++); + + /* ldx #$00 */ + X = NewCodeEntry (OP65_LDX, AM65_IMM, "$00", 0, D->OpEntry->LI); + InsertEntry (D, X, D->IP++); + + /* rol a */ + X = NewCodeEntry (OP65_ROL, AM65_ACC, "a", 0, D->OpEntry->LI); + InsertEntry (D, X, D->IP++); + + /* Remove the push and the call to the tosgeax function */ + RemoveRemainders (D); + + /* We changed the sequence */ + return 1; +} + + + +static unsigned Opt_tosltax (StackOpData* D) +/* Optimize the tosltax sequence */ +{ + CodeEntry* X; + CodeLabel* L; + + + /* Inline the compare */ + D->IP = D->OpIndex+1; + + /* Must be true because of OP_RHS_LOAD */ + CHECK ((D->Rhs.A.Flags & D->Rhs.X.Flags & LI_DIRECT) != 0); + + /* Add code for low operand */ + AddOpLow (D, OP65_CMP, &D->Rhs); + + /* Add code for high operand */ + AddOpHigh (D, OP65_SBC, &D->Rhs, 0); + + /* eor #$80 */ + X = NewCodeEntry (OP65_EOR, AM65_IMM, "$80", 0, D->OpEntry->LI); + InsertEntry (D, X, D->IP++); + + /* asl a */ + X = NewCodeEntry (OP65_ASL, AM65_ACC, "a", 0, D->OpEntry->LI); + InsertEntry (D, X, D->IP++); + L = CS_GenLabel (D->Code, X); + + /* Insert a bvc L before the eor insn */ + X = NewCodeEntry (OP65_BVC, AM65_BRA, L->Name, L, D->OpEntry->LI); + InsertEntry (D, X, D->IP - 2); + ++D->IP; + + /* lda #$00 */ + X = NewCodeEntry (OP65_LDA, AM65_IMM, "$00", 0, D->OpEntry->LI); + InsertEntry (D, X, D->IP++); + + /* ldx #$00 */ + X = NewCodeEntry (OP65_LDX, AM65_IMM, "$00", 0, D->OpEntry->LI); + InsertEntry (D, X, D->IP++); + + /* rol a */ + X = NewCodeEntry (OP65_ROL, AM65_ACC, "a", 0, D->OpEntry->LI); + InsertEntry (D, X, D->IP++); - X = NewCodeEntry (OP65_DEY, AM65_IMP, 0, 0, D->OpEntry->LI); - InsertEntry (D, X, D->OpIndex+4); + /* Remove the push and the call to the tosltax function */ + RemoveRemainders (D); - X = NewCodeEntry (OP65_BPL, AM65_BRA, L->Name, L, D->OpEntry->LI); - InsertEntry (D, X, D->OpIndex+5); + /* We changed the sequence */ + return 1; +} - } else { - /* Loop using an explicit compare */ - X = NewCodeEntry (OP65_LDA, AM65_IMM, "$00", 0, D->OpEntry->LI); - InsertEntry (D, X, D->OpIndex+1); - X = NewCodeEntry (OP65_LDY, AM65_IMM, "$00", 0, D->OpEntry->LI); - InsertEntry (D, X, D->OpIndex+2); +static unsigned Opt_tosneax (StackOpData* D) +/* Optimize the tosneax sequence */ +{ + return Opt_toseqax_tosneax (D, "boolne"); +} - X = NewCodeEntry (OP65_STA, AM65_ZP_INDY, D->ZPLo, 0, D->OpEntry->LI); - InsertEntry (D, X, D->OpIndex+3); - L = CS_GenLabel (D->Code, X); - X = NewCodeEntry (OP65_INY, AM65_IMP, 0, 0, D->OpEntry->LI); - InsertEntry (D, X, D->OpIndex+4); - Arg = MakeHexArg (D->OpEntry->RI->In.RegA); - X = NewCodeEntry (OP65_CPY, AM65_IMM, Arg, 0, D->OpEntry->LI); - InsertEntry (D, X, D->OpIndex+5); +static unsigned Opt_tosorax (StackOpData* D) +/* Optimize the tosorax sequence */ +{ + /* Store the value into the zeropage instead of pushing it */ + ReplacePushByStore (D); - X = NewCodeEntry (OP65_BPL, AM65_BRA, L->Name, L, D->OpEntry->LI); - InsertEntry (D, X, D->OpIndex+6); - } + /* Inline the or, low byte */ + D->IP = D->OpIndex + 1; + AddOpLow (D, OP65_ORA, &D->Lhs); - } + /* High byte */ + AddOpHigh (D, OP65_ORA, &D->Lhs, 1); - /* Remove the push and the call to the __bzero function */ - RemovePushAndOp (D); + /* Remove the push and the call to the tosorax function */ + RemoveRemainders (D); /* We changed the sequence */ return 1; @@ -443,66 +1438,46 @@ static unsigned Opt___bzero (StackOpData* D) -static unsigned Opt_staspidx (StackOpData* D) -/* Optimize the staspidx sequence if possible */ +static unsigned Opt_tosshlax (StackOpData* D) +/* Optimize the tosshlax sequence */ { - CodeEntry* X; - - /* Check if we're using a register variable */ - if (!IsRegVar (D)) { - /* Store the value into the zeropage instead of pushing it */ - ReplacePushByStore (D); - } + return Opt_tosshift (D, "shlaxy"); +} - /* Replace the store subroutine call by a direct op */ - X = NewCodeEntry (OP65_STA, AM65_ZP_INDY, D->ZPLo, 0, D->OpEntry->LI); - InsertEntry (D, X, D->OpIndex+1); - /* Remove the push and the call to the staspidx function */ - RemovePushAndOp (D); - /* We changed the sequence */ - return 1; +static unsigned Opt_tosshrax (StackOpData* D) +/* Optimize the tosshrax sequence */ +{ + return Opt_tosshift (D, "shraxy"); } -static unsigned Opt_staxspidx (StackOpData* D) -/* Optimize the staxspidx sequence if possible */ +static unsigned Opt_tossubax (StackOpData* D) +/* Optimize the tossubax sequence. Note: subtraction is not commutative! */ { - CodeEntry* X; + CodeEntry* X; - /* Check if we're using a register variable */ - if (!IsRegVar (D)) { - /* Store the value into the zeropage instead of pushing it */ - ReplacePushByStore (D); - } - /* Inline the store */ - X = NewCodeEntry (OP65_STA, AM65_ZP_INDY, D->ZPLo, 0, D->OpEntry->LI); - InsertEntry (D, X, D->OpIndex+1); - if (RegValIsKnown (D->OpEntry->RI->In.RegY)) { - /* Value of Y is known */ - const char* Arg = MakeHexArg (D->OpEntry->RI->In.RegY + 1); - X = NewCodeEntry (OP65_LDY, AM65_IMM, Arg, 0, D->OpEntry->LI); - } else { - X = NewCodeEntry (OP65_INY, AM65_IMP, 0, 0, D->OpEntry->LI); - } - InsertEntry (D, X, D->OpIndex+2); - if (RegValIsKnown (D->OpEntry->RI->In.RegX)) { - /* Value of X is known */ - const char* Arg = MakeHexArg (D->OpEntry->RI->In.RegX); - X = NewCodeEntry (OP65_LDA, AM65_IMM, Arg, 0, D->OpEntry->LI); - } else { - /* Value unknown */ - X = NewCodeEntry (OP65_TXA, AM65_IMP, 0, 0, D->OpEntry->LI); - } - InsertEntry (D, X, D->OpIndex+3); - X = NewCodeEntry (OP65_STA, AM65_ZP_INDY, D->ZPLo, 0, D->OpEntry->LI); - InsertEntry (D, X, D->OpIndex+4); + /* Inline the sbc */ + D->IP = D->OpIndex+1; - /* Remove the push and the call to the staxspidx function */ - RemovePushAndOp (D); + /* sec */ + X = NewCodeEntry (OP65_SEC, AM65_IMP, 0, 0, D->OpEntry->LI); + InsertEntry (D, X, D->IP++); + + /* Must be true because of OP_RHS_LOAD */ + CHECK ((D->Rhs.A.Flags & D->Rhs.X.Flags & LI_DIRECT) != 0); + + /* Add code for low operand */ + AddOpLow (D, OP65_SBC, &D->Rhs); + + /* Add code for high operand */ + AddOpHigh (D, OP65_SBC, &D->Rhs, 1); + + /* Remove the push and the call to the tossubax function */ + RemoveRemainders (D); /* We changed the sequence */ return 1; @@ -510,123 +1485,81 @@ static unsigned Opt_staxspidx (StackOpData* D) -static unsigned Opt_tosaddax (StackOpData* D) -/* Optimize the tosaddax sequence if possible */ +static unsigned Opt_tosugeax (StackOpData* D) +/* Optimize the tosugeax sequence */ { CodeEntry* X; - CodeEntry* N; - /* We need the entry behind the add */ - CHECK (D->NextEntry != 0); - /* Check if the X register is known and zero when the add is done, and - * if the add is followed by - * - * ldy #$00 - * jsr ldauidx ; or ldaidx - * - * If this is true, the addition does actually add an offset to a pointer - * before it is dereferenced. Since both subroutines take an offset in Y, - * we can pass the offset (instead of #$00) and remove the addition - * alltogether. - */ - if (D->OpEntry->RI->In.RegX == 0 && - D->NextEntry->OPC == OP65_LDY && - CE_IsKnownImm (D->NextEntry, 0) && - !CE_HasLabel (D->NextEntry) && - (N = CS_GetNextEntry (D->Code, D->OpIndex + 1)) != 0 && - (CE_IsCallTo (N, "ldauidx") || - CE_IsCallTo (N, "ldaidx"))) { + /* Inline the sbc */ + D->IP = D->OpIndex+1; - int Signed = (strcmp (N->Arg, "ldaidx") == 0); + /* Must be true because of OP_RHS_LOAD */ + CHECK ((D->Rhs.A.Flags & D->Rhs.X.Flags & LI_DIRECT) != 0); - /* Store the value into the zeropage instead of pushing it */ - ReplacePushByStore (D); + /* Add code for low operand */ + AddOpLow (D, OP65_CMP, &D->Rhs); - /* Replace the ldy by a tay. Be sure to create the new entry before - * deleting the ldy, since we will reference the line info from this - * insn. - */ - X = NewCodeEntry (OP65_TAY, AM65_IMP, 0, 0, D->NextEntry->LI); - DelEntry (D, D->OpIndex + 1); - InsertEntry (D, X, D->OpIndex + 1); + /* Add code for high operand */ + AddOpHigh (D, OP65_SBC, &D->Rhs, 0); - /* Replace the call to ldaidx/ldauidx. Since X is already zero, and - * the ptr is in the zero page location, we just need to load from - * the pointer, and fix X in case of ldaidx. - */ - X = NewCodeEntry (OP65_LDA, AM65_ZP_INDY, D->ZPLo, 0, N->LI); - DelEntry (D, D->OpIndex + 2); - InsertEntry (D, X, D->OpIndex + 2); - if (Signed) { + /* lda #$00 */ + X = NewCodeEntry (OP65_LDA, AM65_IMM, "$00", 0, D->OpEntry->LI); + InsertEntry (D, X, D->IP++); - CodeLabel* L; + /* ldx #$00 */ + X = NewCodeEntry (OP65_LDX, AM65_IMM, "$00", 0, D->OpEntry->LI); + InsertEntry (D, X, D->IP++); - /* Add sign extension - N is unused now */ - N = CS_GetNextEntry (D->Code, D->OpIndex + 2); - CHECK (N != 0); - L = CS_GenLabel (D->Code, N); + /* rol a */ + X = NewCodeEntry (OP65_ROL, AM65_ACC, "a", 0, D->OpEntry->LI); + InsertEntry (D, X, D->IP++); - X = NewCodeEntry (OP65_BPL, AM65_BRA, L->Name, L, X->LI); - InsertEntry (D, X, D->OpIndex + 3); + /* Remove the push and the call to the tosugeax function */ + RemoveRemainders (D); - X = NewCodeEntry (OP65_DEX, AM65_IMP, 0, 0, X->LI); - InsertEntry (D, X, D->OpIndex + 4); - } + /* We changed the sequence */ + return 1; +} - } else { - /* Check the entry before the push. If it's a lda instruction with an - * addressing mode that allows us to replace it, we may use this - * location for the op and must not save the value in the zero page - * location. - */ - CheckDirectOp (D); - /* Store the value into the zeropage instead of pushing it */ - ReplacePushByStore (D); +static unsigned Opt_tosugtax (StackOpData* D) +/* Optimize the tosugtax sequence */ +{ + CodeEntry* X; - /* Inline the add */ - D->IP = D->OpIndex+1; - X = NewCodeEntry (OP65_CLC, AM65_IMP, 0, 0, D->OpEntry->LI); - InsertEntry (D, X, D->IP++); - /* Low byte */ - AddOpLow (D, OP65_ADC); + /* Inline the sbc */ + D->IP = D->OpIndex+1; - /* High byte */ - if (D->PushEntry->RI->In.RegX == 0) { - /* The high byte is the value in X plus the carry */ - CodeLabel* L = CS_GenLabel (D->Code, D->NextEntry); - X = NewCodeEntry (OP65_BCC, AM65_BRA, L->Name, L, D->OpEntry->LI); - InsertEntry (D, X, D->IP++); - X = NewCodeEntry (OP65_INX, AM65_IMP, 0, 0, D->OpEntry->LI); - InsertEntry (D, X, D->IP++); - } else if (D->OpEntry->RI->In.RegX == 0) { - /* The high byte is that of the first operand plus carry */ - CodeLabel* L; - if (RegValIsKnown (D->PushEntry->RI->In.RegX)) { - /* Value of first op high byte is known */ - const char* Arg = MakeHexArg (D->PushEntry->RI->In.RegX); - X = NewCodeEntry (OP65_LDX, AM65_IMM, Arg, 0, D->OpEntry->LI); - } else { - /* Value of first op high byte is unknown */ - X = NewCodeEntry (OP65_LDX, AM65_ZP, D->ZPHi, 0, D->OpEntry->LI); - } - InsertEntry (D, X, D->IP++); - L = CS_GenLabel (D->Code, D->NextEntry); - X = NewCodeEntry (OP65_BCC, AM65_BRA, L->Name, L, D->OpEntry->LI); - InsertEntry (D, X, D->IP++); - X = NewCodeEntry (OP65_INX, AM65_IMP, 0, 0, D->OpEntry->LI); - InsertEntry (D, X, D->IP++); - } else { - /* High byte is unknown */ - AddOpHigh (D, OP65_ADC); - } - } + /* Must be true because of OP_RHS_LOAD */ + CHECK ((D->Rhs.A.Flags & D->Rhs.X.Flags & LI_DIRECT) != 0); - /* Remove the push and the call to the tosaddax function */ - RemovePushAndOp (D); + /* sec */ + X = NewCodeEntry (OP65_SEC, AM65_IMP, 0, 0, D->OpEntry->LI); + InsertEntry (D, X, D->IP++); + + /* Add code for low operand */ + AddOpLow (D, OP65_SBC, &D->Rhs); + + /* We need the zero flag, so remember the immediate result */ + X = NewCodeEntry (OP65_STA, AM65_ZP, "tmp1", 0, D->OpEntry->LI); + InsertEntry (D, X, D->IP++); + + /* Add code for high operand */ + AddOpHigh (D, OP65_SBC, &D->Rhs, 0); + + /* Set Z flag */ + X = NewCodeEntry (OP65_ORA, AM65_ZP, "tmp1", 0, D->OpEntry->LI); + InsertEntry (D, X, D->IP++); + + /* Transform to boolean */ + X = NewCodeEntry (OP65_JSR, AM65_ABS, "boolugt", 0, D->OpEntry->LI); + InsertEntry (D, X, D->IP++); + + /* Remove the push and the call to the operator function */ + RemoveRemainders (D); /* We changed the sequence */ return 1; @@ -634,37 +1567,42 @@ static unsigned Opt_tosaddax (StackOpData* D) -static unsigned Opt_tosandax (StackOpData* D) -/* Optimize the tosandax sequence if possible */ +static unsigned Opt_tosuleax (StackOpData* D) +/* Optimize the tosuleax sequence */ { CodeEntry* X; - /* Check the entry before the push. If it's a lda instruction with an - * addressing mode that allows us to replace it, we may use this - * location for the op and must not save the value in the zero page - * location. - */ - CheckDirectOp (D); - /* Store the value into the zeropage instead of pushing it */ - ReplacePushByStore (D); + /* Inline the sbc */ + D->IP = D->OpIndex+1; - /* Inline the and, low byte */ - D->IP = D->OpIndex + 1; - AddOpLow (D, OP65_AND); + /* Must be true because of OP_RHS_LOAD */ + CHECK ((D->Rhs.A.Flags & D->Rhs.X.Flags & LI_DIRECT) != 0); - /* High byte */ - if (D->PushEntry->RI->In.RegX == 0 || D->OpEntry->RI->In.RegX == 0) { - /* The high byte is zero */ - X = NewCodeEntry (OP65_LDX, AM65_IMM, "$00", 0, D->OpEntry->LI); - InsertEntry (D, X, D->IP++); - } else { - /* High byte is unknown */ - AddOpHigh (D, OP65_AND); - } + /* sec */ + X = NewCodeEntry (OP65_SEC, AM65_IMP, 0, 0, D->OpEntry->LI); + InsertEntry (D, X, D->IP++); - /* Remove the push and the call to the tosandax function */ - RemovePushAndOp (D); + /* Add code for low operand */ + AddOpLow (D, OP65_SBC, &D->Rhs); + + /* We need the zero flag, so remember the immediate result */ + X = NewCodeEntry (OP65_STA, AM65_ZP, "tmp1", 0, D->OpEntry->LI); + InsertEntry (D, X, D->IP++); + + /* Add code for high operand */ + AddOpHigh (D, OP65_SBC, &D->Rhs, 0); + + /* Set Z flag */ + X = NewCodeEntry (OP65_ORA, AM65_ZP, "tmp1", 0, D->OpEntry->LI); + InsertEntry (D, X, D->IP++); + + /* Transform to boolean */ + X = NewCodeEntry (OP65_JSR, AM65_ABS, "boolule", 0, D->OpEntry->LI); + InsertEntry (D, X, D->IP++); + + /* Remove the push and the call to the operator function */ + RemoveRemainders (D); /* We changed the sequence */ return 1; @@ -672,40 +1610,30 @@ static unsigned Opt_tosandax (StackOpData* D) -static unsigned Opt_tosorax (StackOpData* D) -/* Optimize the tosorax sequence if possible */ +static unsigned Opt_tosultax (StackOpData* D) +/* Optimize the tosultax sequence */ { CodeEntry* X; - /* Check the entry before the push. If it's a lda instruction with an - * addressing mode that allows us to replace it, we may use this - * location for the op and must not save the value in the zero page - * location. - */ - CheckDirectOp (D); - /* Store the value into the zeropage instead of pushing it */ - ReplacePushByStore (D); + /* Inline the sbc */ + D->IP = D->OpIndex+1; - /* Inline the or, low byte */ - D->IP = D->OpIndex + 1; - AddOpLow (D, OP65_ORA); + /* Must be true because of OP_RHS_LOAD */ + CHECK ((D->Rhs.A.Flags & D->Rhs.X.Flags & LI_DIRECT) != 0); - /* High byte */ - if (RegValIsKnown (D->PushEntry->RI->In.RegX) && - RegValIsKnown (D->OpEntry->RI->In.RegX)) { - /* Both values known, precalculate the result */ - unsigned char Result = D->PushEntry->RI->In.RegX | D->OpEntry->RI->In.RegX; - const char* Arg = MakeHexArg (Result); - X = NewCodeEntry (OP65_LDX, AM65_IMM, Arg, 0, D->OpEntry->LI); - InsertEntry (D, X, D->IP++); - } else if (D->PushEntry->RI->In.RegX != 0) { - /* High byte is unknown */ - AddOpHigh (D, OP65_ORA); - } + /* Add code for low operand */ + AddOpLow (D, OP65_CMP, &D->Rhs); - /* Remove the push and the call to the tosorax function */ - RemovePushAndOp (D); + /* Add code for high operand */ + AddOpHigh (D, OP65_SBC, &D->Rhs, 0); + + /* Transform to boolean */ + X = NewCodeEntry (OP65_JSR, AM65_ABS, "boolult", 0, D->OpEntry->LI); + InsertEntry (D, X, D->IP++); + + /* Remove the push and the call to the operator function */ + RemoveRemainders (D); /* We changed the sequence */ return 1; @@ -714,38 +1642,32 @@ static unsigned Opt_tosorax (StackOpData* D) static unsigned Opt_tosxorax (StackOpData* D) -/* Optimize the tosxorax sequence if possible */ +/* Optimize the tosxorax sequence */ { CodeEntry* X; - /* Check the entry before the push. If it's a lda instruction with an - * addressing mode that allows us to replace it, we may use this - * location for the op and must not save the value in the zero page - * location. - */ - CheckDirectOp (D); /* Store the value into the zeropage instead of pushing it */ ReplacePushByStore (D); /* Inline the xor, low byte */ D->IP = D->OpIndex + 1; - AddOpLow (D, OP65_EOR); + AddOpLow (D, OP65_EOR, &D->Lhs); /* High byte */ if (RegValIsKnown (D->PushEntry->RI->In.RegX) && RegValIsKnown (D->OpEntry->RI->In.RegX)) { - /* Both values known, precalculate the result */ - const char* Arg = MakeHexArg (D->PushEntry->RI->In.RegX ^ D->OpEntry->RI->In.RegX); - X = NewCodeEntry (OP65_LDX, AM65_IMM, Arg, 0, D->OpEntry->LI); - InsertEntry (D, X, D->IP++); + /* Both values known, precalculate the result */ + const char* Arg = MakeHexArg (D->PushEntry->RI->In.RegX ^ D->OpEntry->RI->In.RegX); + X = NewCodeEntry (OP65_LDX, AM65_IMM, Arg, 0, D->OpEntry->LI); + InsertEntry (D, X, D->IP++); } else if (D->PushEntry->RI->In.RegX != 0) { - /* High byte is unknown */ - AddOpHigh (D, OP65_EOR); + /* High byte is unknown */ + AddOpHigh (D, OP65_EOR, &D->Lhs, 1); } /* Remove the push and the call to the tosandax function */ - RemovePushAndOp (D); + RemoveRemainders (D); /* We changed the sequence */ return 1; @@ -754,36 +1676,32 @@ static unsigned Opt_tosxorax (StackOpData* D) /*****************************************************************************/ -/* Code */ +/* Code */ /*****************************************************************************/ -/* Flags for the functions */ -typedef enum { - STOP_NONE = 0x00, /* Nothing special */ - STOP_A_UNUSED = 0x01, /* Call only if a unused later */ - STOP_A_KNOWN = 0x02, /* Call only if A is known */ - STOP_X_ZERO = 0x04 /* Call only if X is zero */ -} STOP_FLAGS; - - -typedef unsigned (*OptFunc) (StackOpData* D); -typedef struct OptFuncDesc OptFuncDesc; -struct OptFuncDesc { - const char* Name; /* Name of the replaced runtime function */ - OptFunc Func; /* Function pointer */ - STOP_FLAGS Flags; /* Flags */ -}; - static const OptFuncDesc FuncTable[] = { - { "__bzero", Opt___bzero, STOP_X_ZERO | STOP_A_KNOWN }, - { "staspidx", Opt_staspidx, STOP_NONE }, - { "staxspidx", Opt_staxspidx, STOP_A_UNUSED }, - { "tosaddax", Opt_tosaddax, STOP_NONE }, - { "tosandax", Opt_tosandax, STOP_NONE }, - { "tosorax", Opt_tosorax, STOP_NONE }, - { "tosxorax", Opt_tosxorax, STOP_NONE }, + { "__bzero", Opt___bzero, REG_NONE, OP_X_ZERO | OP_A_KNOWN }, + { "staspidx", Opt_staspidx, REG_NONE, OP_NONE }, + { "staxspidx", Opt_staxspidx, REG_AX, OP_NONE }, + { "tosaddax", Opt_tosaddax, REG_NONE, OP_NONE }, + { "tosandax", Opt_tosandax, REG_NONE, OP_NONE }, + { "tosaslax", Opt_tosaslax, REG_NONE, OP_NONE }, + { "tosasrax", Opt_tosasrax, REG_NONE, OP_NONE }, + { "toseqax", Opt_toseqax, REG_NONE, OP_NONE }, + { "tosgeax", Opt_tosgeax, REG_NONE, OP_RHS_LOAD_DIRECT }, + { "tosltax", Opt_tosltax, REG_NONE, OP_RHS_LOAD_DIRECT }, + { "tosneax", Opt_tosneax, REG_NONE, OP_NONE }, + { "tosorax", Opt_tosorax, REG_NONE, OP_NONE }, + { "tosshlax", Opt_tosshlax, REG_NONE, OP_NONE }, + { "tosshrax", Opt_tosshrax, REG_NONE, OP_NONE }, + { "tossubax", Opt_tossubax, REG_NONE, OP_RHS_LOAD_DIRECT }, + { "tosugeax", Opt_tosugeax, REG_NONE, OP_RHS_LOAD_DIRECT }, + { "tosugtax", Opt_tosugtax, REG_NONE, OP_RHS_LOAD_DIRECT }, + { "tosuleax", Opt_tosuleax, REG_NONE, OP_RHS_LOAD_DIRECT }, + { "tosultax", Opt_tosultax, REG_NONE, OP_RHS_LOAD_DIRECT }, + { "tosxorax", Opt_tosxorax, REG_NONE, OP_NONE }, }; #define FUNC_COUNT (sizeof(FuncTable) / sizeof(FuncTable[0])) @@ -792,15 +1710,15 @@ static const OptFuncDesc FuncTable[] = { static int CmpFunc (const void* Key, const void* Func) /* Compare function for bsearch */ { - return strcmp (Key, ((const OptFuncDesc*) Func)->Name); + return strcmp (Key, ((const OptFuncDesc*) Func)->Name); } static const OptFuncDesc* FindFunc (const char* Name) /* Find the function with the given name. Return a pointer to the table entry - * or NULL if the function was not found. - */ +** or NULL if the function was not found. +*/ { return bsearch (Name, FuncTable, FUNC_COUNT, sizeof(OptFuncDesc), CmpFunc); } @@ -817,13 +1735,53 @@ static int CmpHarmless (const void* Key, const void* Entry) static int HarmlessCall (const char* Name) /* Check if this is a call to a harmless subroutine that will not interrupt - * the pushax/op sequence when encountered. - */ +** the pushax/op sequence when encountered. +*/ { - static const char* Tab[] = { + static const char* const Tab[] = { + "aslax1", + "aslax2", + "aslax3", + "aslax4", + "aslaxy", + "asrax1", + "asrax2", + "asrax3", + "asrax4", + "asraxy", + "bnegax", + "complax", + "decax1", + "decax2", + "decax3", + "decax4", + "decax5", + "decax6", + "decax7", + "decax8", + "decaxy", + "incax1", + "incax2", + "incax3", + "incax4", + "incax5", + "incax6", + "incax7", + "incax8", + "incaxy", "ldaxidx", "ldaxysp", "negax", + "shlax1", + "shlax2", + "shlax3", + "shlax4", + "shlaxy", + "shrax1", + "shrax2", + "shrax3", + "shrax4", + "shraxy", }; void* R = bsearch (Name, @@ -836,8 +1794,96 @@ static int HarmlessCall (const char* Name) +static void ResetStackOpData (StackOpData* Data) +/* Reset the given data structure */ +{ + Data->OptFunc = 0; + Data->ZPUsage = REG_NONE; + + ClearLoadInfo (&Data->Lhs); + ClearLoadInfo (&Data->Rhs); + + Data->PushIndex = -1; + Data->OpIndex = -1; +} + + + +static int PreCondOk (StackOpData* D) +/* Check if the preconditions for a call to the optimizer subfunction are +** satisfied. As a side effect, this function will also choose the zero page +** register to use. +*/ +{ + /* Check the flags */ + unsigned UnusedRegs = D->OptFunc->UnusedRegs; + if (UnusedRegs != REG_NONE && + (GetRegInfo (D->Code, D->OpIndex+1, UnusedRegs) & UnusedRegs) != 0) { + /* Cannot optimize */ + return 0; + } + if ((D->OptFunc->Flags & OP_A_KNOWN) != 0 && + RegValIsUnknown (D->OpEntry->RI->In.RegA)) { + /* Cannot optimize */ + return 0; + } + if ((D->OptFunc->Flags & OP_X_ZERO) != 0 && + D->OpEntry->RI->In.RegX != 0) { + /* Cannot optimize */ + return 0; + } + if ((D->OptFunc->Flags & OP_LHS_LOAD) != 0) { + if (D->Lhs.A.LoadIndex < 0 || D->Lhs.X.LoadIndex < 0) { + /* Cannot optimize */ + return 0; + } else if ((D->OptFunc->Flags & OP_LHS_LOAD_DIRECT) != 0) { + if ((D->Lhs.A.Flags & D->Lhs.X.Flags & LI_DIRECT) == 0) { + /* Cannot optimize */ + return 0; + } + } + } + if ((D->OptFunc->Flags & OP_RHS_LOAD) != 0) { + if (D->Rhs.A.LoadIndex < 0 || D->Rhs.X.LoadIndex < 0) { + /* Cannot optimize */ + return 0; + } else if ((D->OptFunc->Flags & OP_RHS_LOAD_DIRECT) != 0) { + if ((D->Rhs.A.Flags & D->Rhs.X.Flags & LI_DIRECT) == 0) { + /* Cannot optimize */ + return 0; + } + } + } + if ((D->Rhs.A.Flags | D->Rhs.X.Flags) & LI_DUP_LOAD) { + /* Cannot optimize */ + return 0; + } + + /* Determine the zero page locations to use. We've tracked the used + ** ZP locations, so try to find some for us that are unused. + */ + if ((D->ZPUsage & REG_PTR1) == REG_NONE) { + D->ZPLo = "ptr1"; + D->ZPHi = "ptr1+1"; + } else if ((D->ZPUsage & REG_SREG) == REG_NONE) { + D->ZPLo = "sreg"; + D->ZPHi = "sreg+1"; + } else if ((D->ZPUsage & REG_PTR2) == REG_NONE) { + D->ZPLo = "ptr2"; + D->ZPHi = "ptr2+1"; + } else { + /* No registers available */ + return 0; + } + + /* Determine if we have a basic block */ + return CS_IsBasicBlock (D->Code, D->PushIndex, D->OpIndex); +} + + + /*****************************************************************************/ -/* Code */ +/* Code */ /*****************************************************************************/ @@ -845,161 +1891,231 @@ static int HarmlessCall (const char* Name) unsigned OptStackOps (CodeSeg* S) /* Optimize operations that take operands via the stack */ { - unsigned Changes = 0; /* Number of changes in one run */ - int InSeq = 0; /* Inside a sequence */ - unsigned Push = 0; /* Index of pushax */ - unsigned UsedRegs = 0; /* Zeropage registers used in sequence */ - unsigned I; + unsigned Changes = 0; /* Number of changes in one run */ + StackOpData Data; + int I; + int OldEntryCount; /* Old number of entries */ + unsigned UsedRegs = 0; /* Registers used */ + unsigned ChangedRegs = 0;/* Registers changed */ + + + enum { + Initialize, + Search, + FoundPush, + FoundOp + } State = Initialize; - /* Generate register info */ - CS_GenRegInfo (S); + /* Remember the code segment in the info struct */ + Data.Code = S; /* Look for a call to pushax followed by a call to some other function - * that takes it's first argument on the stack, and the second argument - * in the primary register. - * It depends on the code between the two if we can handle/transform the - * sequence, so check this code for the following list of things: - * - * - the range must be a basic block (one entry, one exit) - * - there may not be accesses to local variables with unknown - * offsets (because we have to adjust these offsets). - * - no subroutine calls - * - no jump labels - * - * Since we need a zero page register later, do also check the - * intermediate code for zero page use. - */ + ** that takes it's first argument on the stack, and the second argument + ** in the primary register. + ** It depends on the code between the two if we can handle/transform the + ** sequence, so check this code for the following list of things: + ** + ** - the range must be a basic block (one entry, one exit) + ** - there may not be accesses to local variables with unknown + ** offsets (because we have to adjust these offsets). + ** - no subroutine calls + ** - no jump labels + ** + ** Since we need a zero page register later, do also check the + ** intermediate code for zero page use. + */ I = 0; - while (I < CS_GetEntryCount (S)) { - - /* Get the next entry */ - CodeEntry* E = CS_GetEntry (S, I); - - /* Handling depends if we're inside a sequence or not */ - if (InSeq) { - - /* If we are using the stack, and we don't have "indirect Y" - * addressing mode, or the value of Y is unknown, or less than - * two, we cannot cope with this piece of code. Having an unknown - * value of Y means that we cannot correct the stack offset, while - * having an offset less than two means that the code works with - * the value on stack which is to be removed. - */ - if ((E->Use & REG_SP) != 0 && - (E->AM != AM65_ZP_INDY || RegValIsUnknown (E->RI->In.RegY) || - E->RI->In.RegY < 2)) { - - /* All this stuff is not allowed in a sequence */ - InSeq = 0; - - } else if (E->OPC == OP65_JSR) { - - /* Subroutine call: Check if this is one of our functions */ - const OptFuncDesc* F = FindFunc (E->Arg); - if (F) { - - StackOpData Data; - int PreCondOk = 1; - - /* Check the flags */ - if ((F->Flags & STOP_A_UNUSED) != 0 && RegAUsed (S, I+1)) { - /* Cannot optimize */ - PreCondOk = 0; - } else if ((F->Flags & STOP_A_KNOWN) != 0 && RegValIsUnknown (E->RI->In.RegA)) { - /* Cannot optimize */ - PreCondOk = 0; - } else if ((F->Flags & STOP_X_ZERO) != 0 && E->RI->In.RegX != 0) { - /* Cannot optimize */ - PreCondOk = 0; + while (I < (int)CS_GetEntryCount (S)) { + + /* Get the next entry */ + CodeEntry* E = CS_GetEntry (S, I); + + /* Actions depend on state */ + switch (State) { + + case Initialize: + ResetStackOpData (&Data); + UsedRegs = ChangedRegs = REG_NONE; + State = Search; + /* FALLTHROUGH */ + + case Search: + /* While searching, track register load insns, so we can tell + ** what is in a register once pushax is encountered. + */ + if (CE_HasLabel (E)) { + /* Currently we don't track across branches */ + ClearLoadInfo (&Data.Lhs); + } + if (CE_IsCallTo (E, "pushax")) { + Data.PushIndex = I; + State = FoundPush; + } else { + /* Track load insns */ + TrackLoads (&Data.Lhs, E, I); + } + break; + + case FoundPush: + /* We' found a pushax before. Search for a stack op that may + ** follow and in the meantime, track zeropage usage and check + ** for code that will disable us from translating the sequence. + */ + if (CE_HasLabel (E)) { + /* Currently we don't track across branches */ + ClearLoadInfo (&Data.Rhs); + } + if (E->OPC == OP65_JSR) { + + /* Subroutine call: Check if this is one of the functions, + ** we're going to replace. + */ + Data.OptFunc = FindFunc (E->Arg); + if (Data.OptFunc) { + /* Remember the op index and go on */ + Data.OpIndex = I; + Data.OpEntry = E; + State = FoundOp; + break; + } else if (!HarmlessCall (E->Arg)) { + /* A call to an unkown subroutine: We need to start + ** over after the last pushax. Note: This will also + ** happen if we encounter a call to pushax! + */ + I = Data.PushIndex; + State = Initialize; + break; + } else { + /* Track register usage */ + Data.ZPUsage |= (E->Use | E->Chg); + TrackLoads (&Data.Rhs, E, I); } - /* Determine the zero page locations to use */ - if (PreCondOk) { - UsedRegs |= GetRegInfo (S, I+1, REG_SREG | REG_PTR1 | REG_PTR2); - if ((UsedRegs & REG_SREG) == REG_NONE) { - /* SREG is available */ - Data.ZPLo = "sreg"; - Data.ZPHi = "sreg+1"; - } else if ((UsedRegs & REG_PTR1) == REG_NONE) { - Data.ZPLo = "ptr1"; - Data.ZPHi = "ptr1+1"; - } else if ((UsedRegs & REG_PTR2) == REG_NONE) { - Data.ZPLo = "ptr2"; - Data.ZPHi = "ptr2+1"; - } else { - /* No registers available */ - PreCondOk = 0; - } - } - - /* Determine if we have a basic block */ - if (PreCondOk) { - PreCondOk = CS_IsBasicBlock (S, Push, I); - } + } else if (E->Info & OF_STORE && (E->Chg & REG_ZP) == 0) { + + /* Too dangerous - there may be a change of a variable + ** within the sequence. + */ + I = Data.PushIndex; + State = Initialize; + break; + + } else if ((E->Use & REG_SP) != 0 && + (E->AM != AM65_ZP_INDY || + RegValIsUnknown (E->RI->In.RegY) || + E->RI->In.RegY < 2)) { + + /* If we are using the stack, and we don't have "indirect Y" + ** addressing mode, or the value of Y is unknown, or less + ** than two, we cannot cope with this piece of code. Having + ** an unknown value of Y means that we cannot correct the + ** stack offset, while having an offset less than two means + ** that the code works with the value on stack which is to + ** be removed. + */ + I = Data.PushIndex; + State = Initialize; + break; - /* If preconditions are ok, call the optimizer function */ - if (PreCondOk) { - - /* Adjust stack offsets */ - Data.OpIndex = I + AdjustStackOffset (S, Push, I, 2); - - /* Prepare the remainder of the data structure */ - Data.Code = S; - Data.Flags = 0; - Data.PushIndex = Push; - Data.PrevEntry = CS_GetPrevEntry (S, Data.PushIndex); - Data.PushEntry = CS_GetEntry (S, Data.PushIndex); - Data.OpEntry = E; - Data.NextEntry = CS_GetNextEntry (S, Data.OpIndex); - - /* Call the optimizer function */ - Changes += F->Func (&Data); - - /* Regenerate register info */ - CS_GenRegInfo (S); - } - - /* End of sequence */ - InSeq = 0; - - } else if (strcmp (E->Arg, "pushax") == 0) { - /* Restart the sequence */ - Push = I; - UsedRegs = REG_NONE; - } else if (HarmlessCall (E->Arg)) { - /* Track zeropage register usage */ - UsedRegs |= (E->Use | E->Chg); } else { - /* A call to an unkown subroutine ends the sequence */ - InSeq = 0; - } - - } else { - /* Other stuff: Track zeropage register usage */ - UsedRegs |= (E->Use | E->Chg); - } - - } else if (CE_IsCallTo (E, "pushax")) { - - /* This starts a sequence */ - Push = I; - UsedRegs = REG_NONE; - InSeq = 1; + /* Other stuff: Track register usage */ + Data.ZPUsage |= (E->Use | E->Chg); + TrackLoads (&Data.Rhs, E, I); + } + /* If the registers from the push (A/X) are used before they're + ** changed, we cannot change the sequence, because this would + ** with a high probability change the register contents. + */ + UsedRegs |= E->Use; + if ((UsedRegs & ~ChangedRegs) & REG_AX) { + I = Data.PushIndex; + State = Initialize; + break; + } + ChangedRegs |= E->Chg; + break; + + case FoundOp: + /* Track zero page location usage beyond this point */ + Data.ZPUsage |= GetRegInfo (S, I, REG_SREG | REG_PTR1 | REG_PTR2); + + /* Finalize the load info */ + FinalizeLoadInfo (&Data.Lhs, S); + FinalizeLoadInfo (&Data.Rhs, S); + + /* Check if the lhs loads from zeropage. If this is true, these + ** zero page locations have to be added to ZPUsage, because + ** they cannot be used for intermediate storage. In addition, + ** if one of these zero page locations is destroyed between + ** pushing the lhs and the actual operation, we cannot use the + ** original zero page locations for the final op, but must + ** use another ZP location to save them. + */ + ChangedRegs &= REG_ZP; + if (Data.Lhs.A.LoadEntry && Data.Lhs.A.LoadEntry->AM == AM65_ZP) { + Data.ZPUsage |= Data.Lhs.A.LoadEntry->Use; + if ((Data.Lhs.A.LoadEntry->Use & ChangedRegs) != 0) { + Data.Lhs.A.Flags &= ~(LI_DIRECT | LI_RELOAD_Y); + } + } + if (Data.Lhs.X.LoadEntry && Data.Lhs.X.LoadEntry->AM == AM65_ZP) { + Data.ZPUsage |= Data.Lhs.X.LoadEntry->Use; + if ((Data.Lhs.X.LoadEntry->Use & ChangedRegs) != 0) { + Data.Lhs.X.Flags &= ~(LI_DIRECT | LI_RELOAD_Y); + } + } + + /* Check the preconditions. If they aren't ok, reset the insn + ** pointer to the pushax and start over. We will loose part of + ** load tracking but at least a/x has probably lost between + ** pushax and here and will be tracked again when restarting. + */ + if (!PreCondOk (&Data)) { + I = Data.PushIndex; + State = Initialize; + break; + } + + /* Prepare the remainder of the data structure. */ + Data.PrevEntry = CS_GetPrevEntry (S, Data.PushIndex); + Data.PushEntry = CS_GetEntry (S, Data.PushIndex); + Data.OpEntry = CS_GetEntry (S, Data.OpIndex); + Data.NextEntry = CS_GetNextEntry (S, Data.OpIndex); + + /* Remember the current number of code lines */ + OldEntryCount = CS_GetEntryCount (S); + + /* Adjust stack offsets to account for the upcoming removal */ + AdjustStackOffset (&Data, 2); + + /* Regenerate register info, since AdjustStackOffset changed + ** the code + */ + CS_GenRegInfo (S); + + /* Call the optimizer function */ + Changes += Data.OptFunc->Func (&Data); + + /* Since the function may have added or deleted entries, + ** correct the index. + */ + I += CS_GetEntryCount (S) - OldEntryCount; + + /* Regenerate register info */ + CS_GenRegInfo (S); + + /* Done */ + State = Initialize; + continue; - } + } - /* Next entry */ - ++I; + /* Next entry */ + ++I; } - /* Free the register info */ - CS_FreeRegInfo (S); - /* Return the number of changes made */ return Changes; } - - -