1 /*****************************************************************************/
5 /* Optimize operations that take operands via the stack */
9 /* (C) 2001-2009 Ullrich von Bassewitz */
10 /* Roemerstrasse 52 */
11 /* D-70794 Filderstadt */
12 /* EMail: uz@cc65.org */
15 /* This software is provided 'as-is', without any expressed or implied */
16 /* warranty. In no event will the authors be held liable for any damages */
17 /* arising from the use of this software. */
19 /* Permission is granted to anyone to use this software for any purpose, */
20 /* including commercial applications, and to alter it and redistribute it */
21 /* freely, subject to the following restrictions: */
23 /* 1. The origin of this software must not be misrepresented; you must not */
24 /* claim that you wrote the original software. If you use this software */
25 /* in a product, an acknowledgment in the product documentation would be */
26 /* appreciated but is not required. */
27 /* 2. Altered source versions must be plainly marked as such, and must not */
28 /* be misrepresented as being the original software. */
29 /* 3. This notice may not be removed or altered from any source */
32 /*****************************************************************************/
48 /*****************************************************************************/
49 /* Load tracking data */
50 /*****************************************************************************/
54 /* LoadRegInfo flags set by DirectOp */
57 LI_DIRECT = 0x01, /* Direct op may be used */
58 LI_RELOAD_Y = 0x02, /* Reload index register Y */
59 LI_TRANSFER = 0x04, /* Loaded value is transfered */
60 LI_REMOVE = 0x08, /* Load may be removed */
63 /* Structure that tells us how to load the lhs values */
64 typedef struct LoadRegInfo LoadRegInfo;
66 LI_FLAGS Flags; /* Tells us how to load */
67 int LoadIndex; /* Index of load insn, -1 if invalid */
68 CodeEntry* LoadEntry; /* The actual entry, 0 if invalid */
69 int XferIndex; /* Index of transfer insn */
70 CodeEntry* XferEntry; /* The actual transfer entry */
71 unsigned char Offs; /* Stack offset if data is on stack */
74 /* Now combined for both registers */
75 typedef struct LoadInfo LoadInfo;
77 LoadRegInfo A; /* Info for A register */
78 LoadRegInfo X; /* Info for X register */
79 LoadRegInfo Y; /* Info for Y register */
84 /*****************************************************************************/
86 /*****************************************************************************/
90 /* Flags for the functions */
92 OP_NONE = 0x00, /* Nothing special */
93 OP_A_KNOWN = 0x01, /* Value of A must be known */
94 OP_X_ZERO = 0x02, /* X must be zero */
95 OP_LHS_LOAD = 0x04, /* Must have load insns for LHS */
96 OP_LHS_LOAD_DIRECT = 0x0C, /* Must have direct load insn for LHS */
97 OP_RHS_LOAD = 0x10, /* Must have load insns for RHS */
98 OP_RHS_LOAD_DIRECT = 0x30, /* Must have direct load insn for RHS */
101 /* Structure forward decl */
102 typedef struct StackOpData StackOpData;
104 /* Structure that describes an optimizer subfunction for a specific op */
105 typedef unsigned (*OptFunc) (StackOpData* D);
106 typedef struct OptFuncDesc OptFuncDesc;
108 const char* Name; /* Name of the replaced runtime function */
109 OptFunc Func; /* Function pointer */
110 unsigned UnusedRegs; /* Regs that must not be used later */
111 OP_FLAGS Flags; /* Flags */
114 /* Structure that holds the needed data */
116 CodeSeg* Code; /* Pointer to code segment */
117 unsigned Flags; /* Flags to remember things */
119 /* Pointer to optimizer subfunction description */
120 const OptFuncDesc* OptFunc;
122 /* ZP register usage inside the sequence */
125 /* Register load information for lhs and rhs */
129 /* Several indices of insns in the code segment */
130 int PushIndex; /* Index of call to pushax in codeseg */
131 int OpIndex; /* Index of actual operation */
133 /* Pointers to insns in the code segment */
134 CodeEntry* PrevEntry; /* Entry before the call to pushax */
135 CodeEntry* PushEntry; /* Pointer to entry with call to pushax */
136 CodeEntry* OpEntry; /* Pointer to entry with op */
137 CodeEntry* NextEntry; /* Entry after the op */
139 const char* ZPLo; /* Lo byte of zero page loc to use */
140 const char* ZPHi; /* Hi byte of zero page loc to use */
141 unsigned IP; /* Insertion point used by some routines */
146 /*****************************************************************************/
147 /* Load tracking code */
148 /*****************************************************************************/
152 static void ClearLoadRegInfo (LoadRegInfo* RI)
153 /* Clear a LoadRegInfo struct */
162 static void InvalidateLoadRegInfo (LoadRegInfo* RI)
163 /* Invalidate a LoadRegInfo struct */
171 static void FinalizeLoadRegInfo (LoadRegInfo* RI, CodeSeg* S)
172 /* Prepare a LoadRegInfo struct for use */
174 /* Get the entries */
175 if (RI->LoadIndex >= 0) {
176 RI->LoadEntry = CS_GetEntry (S, RI->LoadIndex);
180 if (RI->XferIndex >= 0) {
181 RI->XferEntry = CS_GetEntry (S, RI->XferIndex);
189 static void ClearLoadInfo (LoadInfo* LI)
190 /* Clear a LoadInfo struct */
192 ClearLoadRegInfo (&LI->A);
193 ClearLoadRegInfo (&LI->X);
194 ClearLoadRegInfo (&LI->Y);
199 static void AdjustLoadRegInfo (LoadRegInfo* RI, int Index, int Change)
200 /* Adjust a load register info struct after deleting or inserting an entry
204 CHECK (abs (Change) == 1);
207 if (Index < RI->LoadIndex) {
209 } else if (Index == RI->LoadIndex) {
210 /* Has been removed */
214 if (Index < RI->XferIndex) {
216 } else if (Index == RI->XferIndex) {
217 /* Has been removed */
223 if (Index <= RI->LoadIndex) {
226 if (Index <= RI->XferIndex) {
234 static void FinalizeLoadInfo (LoadInfo* LI, CodeSeg* S)
235 /* Prepare a LoadInfo struct for use */
237 /* Get the entries */
238 FinalizeLoadRegInfo (&LI->A, S);
239 FinalizeLoadRegInfo (&LI->X, S);
240 FinalizeLoadRegInfo (&LI->Y, S);
245 static void AdjustLoadInfo (LoadInfo* LI, int Index, int Change)
246 /* Adjust a load info struct after deleting entry with a given index */
248 AdjustLoadRegInfo (&LI->A, Index, Change);
249 AdjustLoadRegInfo (&LI->X, Index, Change);
250 AdjustLoadRegInfo (&LI->Y, Index, Change);
255 static void TrackLoads (LoadInfo* LI, CodeEntry* E, int I)
256 /* Track loads for a code entry */
258 if (E->Info & OF_LOAD) {
259 if (E->Chg & REG_A) {
261 LI->A.XferIndex = -1;
263 if (E->Chg & REG_X) {
265 LI->X.XferIndex = -1;
267 if (E->Chg & REG_Y) {
269 LI->Y.XferIndex = -1;
271 } else if (E->Info & OF_XFR) {
274 LI->X.LoadIndex = LI->A.LoadIndex;
278 LI->Y.LoadIndex = LI->A.LoadIndex;
282 LI->A.LoadIndex = LI->X.LoadIndex;
286 LI->A.LoadIndex = LI->Y.LoadIndex;
292 } else if (CE_IsCallTo (E, "ldaxysp")) {
293 /* Both registers set, Y changed */
295 LI->A.XferIndex = -1;
297 LI->X.XferIndex = -1;
298 InvalidateLoadRegInfo (&LI->Y);
300 if (E->Chg & REG_A) {
301 InvalidateLoadRegInfo (&LI->A);
303 if (E->Chg & REG_X) {
304 InvalidateLoadRegInfo (&LI->X);
306 if (E->Chg & REG_Y) {
307 InvalidateLoadRegInfo (&LI->Y);
314 /*****************************************************************************/
316 /*****************************************************************************/
320 static void InsertEntry (StackOpData* D, CodeEntry* E, int Index)
321 /* Insert a new entry. Depending on Index, D->PushIndex and D->OpIndex will
322 * be adjusted by this function.
325 /* Insert the entry into the code segment */
326 CS_InsertEntry (D->Code, E, Index);
328 /* Adjust register loads if necessary */
329 AdjustLoadInfo (&D->Lhs, Index, 1);
330 AdjustLoadInfo (&D->Rhs, Index, 1);
332 /* Adjust the indices if necessary */
333 if (D->PushEntry && Index <= D->PushIndex) {
336 if (D->OpEntry && Index <= D->OpIndex) {
343 static void DelEntry (StackOpData* D, int Index)
344 /* Delete an entry. Depending on Index, D->PushIndex and D->OpIndex will be
345 * adjusted by this function, and PushEntry/OpEntry may get invalidated.
348 /* Delete the entry from the code segment */
349 CS_DelEntry (D->Code, Index);
351 /* Adjust register loads if necessary */
352 AdjustLoadInfo (&D->Lhs, Index, -1);
353 AdjustLoadInfo (&D->Rhs, Index, -1);
355 /* Adjust the other indices if necessary */
356 if (Index < D->PushIndex) {
358 } else if (Index == D->PushIndex) {
361 if (Index < D->OpIndex) {
363 } else if (Index == D->OpIndex) {
370 static void AdjustStackOffset (StackOpData* D, unsigned Offs)
371 /* Adjust the offset for all stack accesses in the range PushIndex to OpIndex.
372 * OpIndex is adjusted according to the insertions.
375 /* Walk over all entries */
376 int I = D->PushIndex + 1;
377 while (I < D->OpIndex) {
379 CodeEntry* E = CS_GetEntry (D->Code, I);
381 int NeedCorrection = 0;
382 if ((E->Use & REG_SP) != 0) {
384 /* Check for some things that should not happen */
385 CHECK (E->AM == AM65_ZP_INDY || E->RI->In.RegY >= (short) Offs);
386 CHECK (strcmp (E->Arg, "sp") == 0);
388 /* We need to correct this one */
391 } else if (CE_IsCallTo (E, "ldaxysp")) {
393 /* We need to correct this one */
398 if (NeedCorrection) {
400 /* Get the code entry before this one. If it's a LDY, adjust the
403 CodeEntry* P = CS_GetPrevEntry (D->Code, I);
404 if (P && P->OPC == OP65_LDY && CE_IsConstImm (P)) {
406 /* The Y load is just before the stack access, adjust it */
407 CE_SetNumArg (P, P->Num - Offs);
411 /* Insert a new load instruction before the stack access */
412 const char* Arg = MakeHexArg (E->RI->In.RegY - Offs);
413 CodeEntry* X = NewCodeEntry (OP65_LDY, AM65_IMM, Arg, 0, E->LI);
414 InsertEntry (D, X, I++);
418 /* If we need the value of Y later, be sure to reload it */
419 if (RegYUsed (D->Code, I+1)) {
420 const char* Arg = MakeHexArg (E->RI->In.RegY);
421 CodeEntry* X = NewCodeEntry (OP65_LDY, AM65_IMM, Arg, 0, E->LI);
422 InsertEntry (D, X, I+1);
424 /* Skip this instruction in the next round */
433 /* If we have rhs load insns that load from stack, we'll have to adjust
434 * the offsets for these also.
436 if (D->Rhs.A.Flags & LI_RELOAD_Y) {
437 D->Rhs.A.Offs -= Offs;
439 if (D->Rhs.X.Flags & LI_RELOAD_Y) {
440 D->Rhs.X.Offs -= Offs;
446 static void CheckOneDirectOp (LoadRegInfo* LI, unsigned char Offs)
447 /* Check if the given entry is a lda instruction with an addressing mode
448 * that allows us to replace it by another operation (like ora). If so, we may
449 * use this location for the or and must not save the value in the zero
453 /* Get the load entry */
454 CodeEntry* E = LI->LoadEntry;
460 /* Check the load entry */
462 /* Must check the call first since addressing mode is ABS, so second
463 * "if" will catch otherwise.
465 if (CE_IsCallTo (E, "ldaxysp")) {
466 /* Same as single loads from stack. Since we must distinguish
467 * between A and X here, the necessary offset is passed to the
468 * function as a parameter.
470 LI->Offs = (unsigned char) E->RI->In.RegY - Offs;
471 LI->Flags |= (LI_DIRECT | LI_RELOAD_Y);
472 } else if (E->AM == AM65_IMM || E->AM == AM65_ZP || E->AM == AM65_ABS) {
473 /* These insns are all ok and replaceable */
474 LI->Flags |= LI_DIRECT;
475 } else if (E->AM == AM65_ZP_INDY &&
476 RegValIsKnown (E->RI->In.RegY) &&
477 strcmp (E->Arg, "sp") == 0) {
478 /* A load from the stack with known offset is also ok, but in this
479 * case we must reload the index register later. Please note that
480 * a load indirect via other zero page locations is not ok, since
481 * these locations may change between the push and the actual
484 LI->Offs = (unsigned char) E->RI->In.RegY;
485 LI->Flags |= (LI_DIRECT | LI_RELOAD_Y);
492 static void CheckDirectOp (StackOpData* D)
493 /* Check if the given entry is a lda instruction with an addressing mode
494 * that allows us to replace it by another operation (like ora). If so, we may
495 * use this location for the or and must not save the value in the zero
499 /* Check flags for all load instructions */
500 CheckOneDirectOp (&D->Lhs.A, 1);
501 CheckOneDirectOp (&D->Lhs.X, 0);
502 CheckOneDirectOp (&D->Rhs.A, 1);
503 CheckOneDirectOp (&D->Rhs.X, 0);
508 static void AddStoreA (StackOpData* D)
509 /* Add a store to zero page after the push insn */
511 CodeEntry* X = NewCodeEntry (OP65_STA, AM65_ZP, D->ZPLo, 0, D->PushEntry->LI);
512 InsertEntry (D, X, D->PushIndex+1);
517 static void AddStoreX (StackOpData* D)
518 /* Add a store to zero page after the push insn */
520 CodeEntry* X = NewCodeEntry (OP65_STX, AM65_ZP, D->ZPHi, 0, D->PushEntry->LI);
521 InsertEntry (D, X, D->PushIndex+1);
526 static void ReplacePushByStore (StackOpData* D)
527 /* Replace the call to the push subroutine by a store into the zero page
528 * location (actually, the push is not replaced, because we need it for
529 * later, but the name is still ok since the push will get removed at the
530 * end of each routine).
533 /* Store the value into the zeropage instead of pushing it. Check high
534 * byte first so that the store is later in A/X order.
536 if ((D->Lhs.X.Flags & LI_DIRECT) == 0) {
539 if ((D->Lhs.A.Flags & LI_DIRECT) == 0) {
546 static void AddOpLow (StackOpData* D, opc_t OPC, LoadInfo* LI)
547 /* Add an op for the low byte of an operator. This function honours the
548 * OP_DIRECT and OP_RELOAD_Y flags and generates the necessary instructions.
549 * All code is inserted at the current insertion point.
554 if ((LI->A.Flags & LI_DIRECT) != 0) {
555 /* Op with a variable location. If the location is on the stack, we
556 * need to reload the Y register.
558 if ((LI->A.Flags & LI_RELOAD_Y) == 0) {
561 CodeEntry* LoadA = LI->A.LoadEntry;
562 X = NewCodeEntry (OPC, LoadA->AM, LoadA->Arg, 0, D->OpEntry->LI);
563 InsertEntry (D, X, D->IP++);
568 const char* Arg = MakeHexArg (LI->A.Offs);
569 X = NewCodeEntry (OP65_LDY, AM65_IMM, Arg, 0, D->OpEntry->LI);
570 InsertEntry (D, X, D->IP++);
573 X = NewCodeEntry (OPC, AM65_ZP_INDY, "sp", 0, D->OpEntry->LI);
574 InsertEntry (D, X, D->IP++);
578 /* In both cases, we can remove the load */
579 LI->A.Flags |= LI_REMOVE;
583 /* Op with temp storage */
584 X = NewCodeEntry (OPC, AM65_ZP, D->ZPLo, 0, D->OpEntry->LI);
585 InsertEntry (D, X, D->IP++);
592 static void AddOpHigh (StackOpData* D, opc_t OPC, LoadInfo* LI, int KeepResult)
593 /* Add an op for the high byte of an operator. Special cases (constant values
594 * or similar) have to be checked separately, the function covers only the
595 * generic case. Code is inserted at the insertion point.
602 X = NewCodeEntry (OP65_PHA, AM65_IMP, 0, 0, D->OpEntry->LI);
603 InsertEntry (D, X, D->IP++);
607 X = NewCodeEntry (OP65_TXA, AM65_IMP, 0, 0, D->OpEntry->LI);
608 InsertEntry (D, X, D->IP++);
610 if ((LI->X.Flags & LI_DIRECT) != 0) {
612 if ((LI->X.Flags & LI_RELOAD_Y) == 0) {
615 CodeEntry* LoadX = LI->X.LoadEntry;
616 X = NewCodeEntry (OPC, LoadX->AM, LoadX->Arg, 0, D->OpEntry->LI);
617 InsertEntry (D, X, D->IP++);
622 const char* Arg = MakeHexArg (LI->X.Offs);
623 X = NewCodeEntry (OP65_LDY, AM65_IMM, Arg, 0, D->OpEntry->LI);
624 InsertEntry (D, X, D->IP++);
627 X = NewCodeEntry (OPC, AM65_ZP_INDY, "sp", 0, D->OpEntry->LI);
628 InsertEntry (D, X, D->IP++);
631 /* In both cases, we can remove the load */
632 LI->X.Flags |= LI_REMOVE;
636 X = NewCodeEntry (OPC, AM65_ZP, D->ZPHi, 0, D->OpEntry->LI);
637 InsertEntry (D, X, D->IP++);
642 X = NewCodeEntry (OP65_TAX, AM65_IMP, 0, 0, D->OpEntry->LI);
643 InsertEntry (D, X, D->IP++);
646 X = NewCodeEntry (OP65_PLA, AM65_IMP, 0, 0, D->OpEntry->LI);
647 InsertEntry (D, X, D->IP++);
653 static void RemoveRegLoads (StackOpData* D, LoadInfo* LI)
654 /* Remove register load insns */
656 /* Both registers may be loaded with one insn, but DelEntry will in this
657 * case clear the other one.
659 if (LI->A.Flags & LI_REMOVE) {
660 if (LI->A.LoadIndex >= 0) {
661 DelEntry (D, LI->A.LoadIndex);
663 if (LI->A.XferIndex >= 0) {
664 DelEntry (D, LI->A.XferIndex);
667 if (LI->X.Flags & LI_REMOVE) {
668 if (LI->X.LoadIndex >= 0) {
669 DelEntry (D, LI->X.LoadIndex);
671 if (LI->X.XferIndex >= 0) {
672 DelEntry (D, LI->X.XferIndex);
679 static void RemoveRemainders (StackOpData* D)
680 /* Remove the code that is unnecessary after translation of the sequence */
682 /* Remove the register loads for lhs and rhs */
683 RemoveRegLoads (D, &D->Lhs);
684 RemoveRegLoads (D, &D->Rhs);
686 /* Remove the push and the operator routine */
687 DelEntry (D, D->OpIndex);
688 DelEntry (D, D->PushIndex);
693 static int IsRegVar (StackOpData* D)
694 /* If the value pushed is that of a zeropage variable, replace ZPLo and ZPHi
695 * in the given StackOpData struct by the variable and return true. Otherwise
696 * leave D untouched and return false.
699 CodeEntry* LoadA = D->Lhs.A.LoadEntry;
700 CodeEntry* LoadX = D->Lhs.X.LoadEntry;
703 /* Must have both load insns */
704 if (LoadA == 0 || LoadX == 0) {
708 /* Must be loads from zp */
709 if (LoadA->AM != AM65_ZP || LoadX->AM != AM65_ZP) {
713 /* Must be the same zp loc with high byte in X */
714 Len = strlen (LoadA->Arg);
715 if (strncmp (LoadA->Arg, LoadX->Arg, Len) != 0 ||
716 strcmp (LoadX->Arg + Len, "+1") != 0) {
720 /* Use the zero page location directly */
721 D->ZPLo = LoadA->Arg;
722 D->ZPHi = LoadX->Arg;
728 /*****************************************************************************/
729 /* Actual optimization functions */
730 /*****************************************************************************/
734 static unsigned Opt_toseqax_tosneax (StackOpData* D, const char* BoolTransformer)
735 /* Optimize the toseqax and tosneax sequences. */
740 /* Create a call to the boolean transformer function and a label for this
741 * insn. This is needed for all variants. Other insns are inserted *before*
744 X = NewCodeEntry (OP65_JSR, AM65_ABS, BoolTransformer, 0, D->OpEntry->LI);
745 InsertEntry (D, X, D->OpIndex + 1);
746 L = CS_GenLabel (D->Code, X);
748 /* If the lhs is direct (but not stack relative), encode compares with lhs
749 * effectively reverting the order (which doesn't matter for ==).
751 if ((D->Lhs.A.Flags & (LI_DIRECT | LI_RELOAD_Y)) == LI_DIRECT &&
752 (D->Lhs.X.Flags & (LI_DIRECT | LI_RELOAD_Y)) == LI_DIRECT) {
754 CodeEntry* LoadX = D->Lhs.X.LoadEntry;
755 CodeEntry* LoadA = D->Lhs.A.LoadEntry;
757 D->IP = D->OpIndex+1;
760 X = NewCodeEntry (OP65_CPX, LoadX->AM, LoadX->Arg, 0, D->OpEntry->LI);
761 InsertEntry (D, X, D->IP++);
764 X = NewCodeEntry (OP65_BNE, AM65_BRA, L->Name, L, D->OpEntry->LI);
765 InsertEntry (D, X, D->IP++);
768 X = NewCodeEntry (OP65_CMP, LoadA->AM, LoadA->Arg, 0, D->OpEntry->LI);
769 InsertEntry (D, X, D->IP++);
771 /* Lhs load entries can be removed */
772 D->Lhs.X.Flags |= LI_REMOVE;
773 D->Lhs.A.Flags |= LI_REMOVE;
775 } else if ((D->Rhs.A.Flags & (LI_DIRECT | LI_RELOAD_Y)) == LI_DIRECT &&
776 (D->Rhs.X.Flags & (LI_DIRECT | LI_RELOAD_Y)) == LI_DIRECT) {
778 CodeEntry* LoadX = D->Rhs.X.LoadEntry;
779 CodeEntry* LoadA = D->Rhs.A.LoadEntry;
781 D->IP = D->OpIndex+1;
784 X = NewCodeEntry (OP65_CPX, LoadX->AM, LoadX->Arg, 0, D->OpEntry->LI);
785 InsertEntry (D, X, D->IP++);
788 X = NewCodeEntry (OP65_BNE, AM65_BRA, L->Name, L, D->OpEntry->LI);
789 InsertEntry (D, X, D->IP++);
792 X = NewCodeEntry (OP65_CMP, LoadA->AM, LoadA->Arg, 0, D->OpEntry->LI);
793 InsertEntry (D, X, D->IP++);
795 /* Rhs load entries can be removed */
796 D->Rhs.X.Flags |= LI_REMOVE;
797 D->Rhs.A.Flags |= LI_REMOVE;
799 } else if ((D->Rhs.A.Flags & LI_DIRECT) != 0 &&
800 (D->Rhs.X.Flags & LI_DIRECT) != 0) {
802 D->IP = D->OpIndex+1;
804 /* Add operand for low byte */
805 AddOpLow (D, OP65_CMP, &D->Rhs);
808 X = NewCodeEntry (OP65_BNE, AM65_BRA, L->Name, L, D->OpEntry->LI);
809 InsertEntry (D, X, D->IP++);
811 /* Add operand for high byte */
812 AddOpHigh (D, OP65_CMP, &D->Rhs, 0);
816 /* Save lhs into zeropage, then compare */
820 D->IP = D->OpIndex+1;
823 X = NewCodeEntry (OP65_CPX, AM65_ZP, D->ZPHi, 0, D->OpEntry->LI);
824 InsertEntry (D, X, D->IP++);
827 X = NewCodeEntry (OP65_BNE, AM65_BRA, L->Name, L, D->OpEntry->LI);
828 InsertEntry (D, X, D->IP++);
831 X = NewCodeEntry (OP65_CMP, AM65_ZP, D->ZPLo, 0, D->OpEntry->LI);
832 InsertEntry (D, X, D->IP++);
836 /* Remove the push and the call to the tosgeax function */
837 RemoveRemainders (D);
839 /* We changed the sequence */
845 static unsigned Opt___bzero (StackOpData* D)
846 /* Optimize the __bzero sequence */
852 /* Check if we're using a register variable */
854 /* Store the value into the zeropage instead of pushing it */
859 /* If the return value of __bzero is used, we have to add code to reload
860 * a/x from the pointer variable.
862 if (RegAXUsed (D->Code, D->OpIndex+1)) {
863 X = NewCodeEntry (OP65_LDA, AM65_ZP, D->ZPLo, 0, D->OpEntry->LI);
864 InsertEntry (D, X, D->OpIndex+1);
865 X = NewCodeEntry (OP65_LDX, AM65_ZP, D->ZPHi, 0, D->OpEntry->LI);
866 InsertEntry (D, X, D->OpIndex+2);
869 /* X is always zero, A contains the size of the data area to zero.
870 * Note: A may be zero, in which case the operation is null op.
872 if (D->OpEntry->RI->In.RegA != 0) {
875 X = NewCodeEntry (OP65_LDA, AM65_IMM, "$00", 0, D->OpEntry->LI);
876 InsertEntry (D, X, D->OpIndex+1);
878 /* The value of A is known */
879 if (D->OpEntry->RI->In.RegA <= 0x81) {
881 /* Loop using the sign bit */
884 Arg = MakeHexArg (D->OpEntry->RI->In.RegA - 1);
885 X = NewCodeEntry (OP65_LDY, AM65_IMM, Arg, 0, D->OpEntry->LI);
886 InsertEntry (D, X, D->OpIndex+2);
889 X = NewCodeEntry (OP65_STA, AM65_ZP_INDY, D->ZPLo, 0, D->OpEntry->LI);
890 InsertEntry (D, X, D->OpIndex+3);
891 L = CS_GenLabel (D->Code, X);
894 X = NewCodeEntry (OP65_DEY, AM65_IMP, 0, 0, D->OpEntry->LI);
895 InsertEntry (D, X, D->OpIndex+4);
898 X = NewCodeEntry (OP65_BPL, AM65_BRA, L->Name, L, D->OpEntry->LI);
899 InsertEntry (D, X, D->OpIndex+5);
903 /* Loop using an explicit compare */
906 X = NewCodeEntry (OP65_LDY, AM65_IMM, "$00", 0, D->OpEntry->LI);
907 InsertEntry (D, X, D->OpIndex+2);
910 X = NewCodeEntry (OP65_STA, AM65_ZP_INDY, D->ZPLo, 0, D->OpEntry->LI);
911 InsertEntry (D, X, D->OpIndex+3);
912 L = CS_GenLabel (D->Code, X);
915 X = NewCodeEntry (OP65_INY, AM65_IMP, 0, 0, D->OpEntry->LI);
916 InsertEntry (D, X, D->OpIndex+4);
919 Arg = MakeHexArg (D->OpEntry->RI->In.RegA);
920 X = NewCodeEntry (OP65_CPY, AM65_IMM, Arg, 0, D->OpEntry->LI);
921 InsertEntry (D, X, D->OpIndex+5);
924 X = NewCodeEntry (OP65_BNE, AM65_BRA, L->Name, L, D->OpEntry->LI);
925 InsertEntry (D, X, D->OpIndex+6);
930 /* Remove the push and the call to the __bzero function */
931 RemoveRemainders (D);
933 /* We changed the sequence */
939 static unsigned Opt_staspidx (StackOpData* D)
940 /* Optimize the staspidx sequence */
944 /* Check if we're using a register variable */
946 /* Store the value into the zeropage instead of pushing it */
951 /* Replace the store subroutine call by a direct op */
952 X = NewCodeEntry (OP65_STA, AM65_ZP_INDY, D->ZPLo, 0, D->OpEntry->LI);
953 InsertEntry (D, X, D->OpIndex+1);
955 /* Remove the push and the call to the staspidx function */
956 RemoveRemainders (D);
958 /* We changed the sequence */
964 static unsigned Opt_staxspidx (StackOpData* D)
965 /* Optimize the staxspidx sequence */
969 /* Check if we're using a register variable */
971 /* Store the value into the zeropage instead of pushing it */
976 /* Inline the store */
979 X = NewCodeEntry (OP65_STA, AM65_ZP_INDY, D->ZPLo, 0, D->OpEntry->LI);
980 InsertEntry (D, X, D->OpIndex+1);
982 if (RegValIsKnown (D->OpEntry->RI->In.RegY)) {
983 /* Value of Y is known */
984 const char* Arg = MakeHexArg (D->OpEntry->RI->In.RegY + 1);
985 X = NewCodeEntry (OP65_LDY, AM65_IMM, Arg, 0, D->OpEntry->LI);
987 X = NewCodeEntry (OP65_INY, AM65_IMP, 0, 0, D->OpEntry->LI);
989 InsertEntry (D, X, D->OpIndex+2);
991 if (RegValIsKnown (D->OpEntry->RI->In.RegX)) {
992 /* Value of X is known */
993 const char* Arg = MakeHexArg (D->OpEntry->RI->In.RegX);
994 X = NewCodeEntry (OP65_LDA, AM65_IMM, Arg, 0, D->OpEntry->LI);
997 X = NewCodeEntry (OP65_TXA, AM65_IMP, 0, 0, D->OpEntry->LI);
999 InsertEntry (D, X, D->OpIndex+3);
1002 X = NewCodeEntry (OP65_STA, AM65_ZP_INDY, D->ZPLo, 0, D->OpEntry->LI);
1003 InsertEntry (D, X, D->OpIndex+4);
1005 /* If we remove staxspidx, we must restore the Y register to what the
1006 * function would return.
1008 X = NewCodeEntry (OP65_LDY, AM65_IMM, "$00", 0, D->OpEntry->LI);
1009 InsertEntry (D, X, D->OpIndex+5);
1011 /* Remove the push and the call to the staxspidx function */
1012 RemoveRemainders (D);
1014 /* We changed the sequence */
1020 static unsigned Opt_tosaddax (StackOpData* D)
1021 /* Optimize the tosaddax sequence */
1026 /* We need the entry behind the add */
1027 CHECK (D->NextEntry != 0);
1029 /* Check if the X register is known and zero when the add is done, and
1030 * if the add is followed by
1033 * jsr ldauidx ; or ldaidx
1035 * If this is true, the addition does actually add an offset to a pointer
1036 * before it is dereferenced. Since both subroutines take an offset in Y,
1037 * we can pass the offset (instead of #$00) and remove the addition
1040 if (D->OpEntry->RI->In.RegX == 0 &&
1041 D->NextEntry->OPC == OP65_LDY &&
1042 CE_IsKnownImm (D->NextEntry, 0) &&
1043 !CE_HasLabel (D->NextEntry) &&
1044 (N = CS_GetNextEntry (D->Code, D->OpIndex + 1)) != 0 &&
1045 (CE_IsCallTo (N, "ldauidx") ||
1046 CE_IsCallTo (N, "ldaidx"))) {
1048 int Signed = (strcmp (N->Arg, "ldaidx") == 0);
1050 /* Store the value into the zeropage instead of pushing it */
1054 /* Replace the ldy by a tay. Be sure to create the new entry before
1055 * deleting the ldy, since we will reference the line info from this
1058 X = NewCodeEntry (OP65_TAY, AM65_IMP, 0, 0, D->NextEntry->LI);
1059 DelEntry (D, D->OpIndex + 1);
1060 InsertEntry (D, X, D->OpIndex + 1);
1062 /* Replace the call to ldaidx/ldauidx. Since X is already zero, and
1063 * the ptr is in the zero page location, we just need to load from
1064 * the pointer, and fix X in case of ldaidx.
1066 X = NewCodeEntry (OP65_LDA, AM65_ZP_INDY, D->ZPLo, 0, N->LI);
1067 DelEntry (D, D->OpIndex + 2);
1068 InsertEntry (D, X, D->OpIndex + 2);
1073 /* Add sign extension - N is unused now */
1074 N = CS_GetNextEntry (D->Code, D->OpIndex + 2);
1076 L = CS_GenLabel (D->Code, N);
1078 X = NewCodeEntry (OP65_BPL, AM65_BRA, L->Name, L, X->LI);
1079 InsertEntry (D, X, D->OpIndex + 3);
1081 X = NewCodeEntry (OP65_DEX, AM65_IMP, 0, 0, X->LI);
1082 InsertEntry (D, X, D->OpIndex + 4);
1087 /* Store the value into the zeropage instead of pushing it */
1088 ReplacePushByStore (D);
1090 /* Inline the add */
1091 D->IP = D->OpIndex+1;
1094 X = NewCodeEntry (OP65_CLC, AM65_IMP, 0, 0, D->OpEntry->LI);
1095 InsertEntry (D, X, D->IP++);
1098 AddOpLow (D, OP65_ADC, &D->Lhs);
1101 if (D->PushEntry->RI->In.RegX == 0) {
1103 /* The high byte is the value in X plus the carry */
1104 CodeLabel* L = CS_GenLabel (D->Code, D->NextEntry);
1107 X = NewCodeEntry (OP65_BCC, AM65_BRA, L->Name, L, D->OpEntry->LI);
1108 InsertEntry (D, X, D->IP++);
1111 X = NewCodeEntry (OP65_INX, AM65_IMP, 0, 0, D->OpEntry->LI);
1112 InsertEntry (D, X, D->IP++);
1114 } else if (D->OpEntry->RI->In.RegX == 0 &&
1115 (RegValIsKnown (D->PushEntry->RI->In.RegX) ||
1116 (D->Lhs.X.Flags & LI_RELOAD_Y) == 0)) {
1118 /* The high byte is that of the first operand plus carry */
1120 if (RegValIsKnown (D->PushEntry->RI->In.RegX)) {
1121 /* Value of first op high byte is known */
1122 const char* Arg = MakeHexArg (D->PushEntry->RI->In.RegX);
1123 X = NewCodeEntry (OP65_LDX, AM65_IMM, Arg, 0, D->OpEntry->LI);
1125 /* Value of first op high byte is unknown. Load from ZP or
1128 if (D->Lhs.X.Flags & LI_DIRECT) {
1129 CodeEntry* LoadX = D->Lhs.X.LoadEntry;
1130 X = NewCodeEntry (OP65_LDX, LoadX->AM, LoadX->Arg, 0, D->OpEntry->LI);
1132 X = NewCodeEntry (OP65_LDX, AM65_ZP, D->ZPHi, 0, D->OpEntry->LI);
1135 InsertEntry (D, X, D->IP++);
1138 L = CS_GenLabel (D->Code, D->NextEntry);
1139 X = NewCodeEntry (OP65_BCC, AM65_BRA, L->Name, L, D->OpEntry->LI);
1140 InsertEntry (D, X, D->IP++);
1143 X = NewCodeEntry (OP65_INX, AM65_IMP, 0, 0, D->OpEntry->LI);
1144 InsertEntry (D, X, D->IP++);
1146 /* High byte is unknown */
1147 AddOpHigh (D, OP65_ADC, &D->Lhs, 1);
1151 /* Remove the push and the call to the tosaddax function */
1152 RemoveRemainders (D);
1154 /* We changed the sequence */
1160 static unsigned Opt_tosandax (StackOpData* D)
1161 /* Optimize the tosandax sequence */
1163 /* Store the value into the zeropage instead of pushing it */
1164 ReplacePushByStore (D);
1166 /* Inline the and, low byte */
1167 D->IP = D->OpIndex + 1;
1168 AddOpLow (D, OP65_AND, &D->Lhs);
1171 AddOpHigh (D, OP65_AND, &D->Lhs, 1);
1173 /* Remove the push and the call to the tosandax function */
1174 RemoveRemainders (D);
1176 /* We changed the sequence */
1182 static unsigned Opt_toseqax (StackOpData* D)
1183 /* Optimize the toseqax sequence */
1185 return Opt_toseqax_tosneax (D, "booleq");
1190 static unsigned Opt_tosgeax (StackOpData* D)
1191 /* Optimize the tosgeax sequence */
1196 /* Inline the sbc */
1197 D->IP = D->OpIndex+1;
1199 /* Must be true because of OP_RHS_LOAD */
1200 CHECK ((D->Rhs.A.Flags & D->Rhs.X.Flags & LI_DIRECT) != 0);
1202 /* Add code for low operand */
1203 AddOpLow (D, OP65_CMP, &D->Rhs);
1205 /* Add code for high operand */
1206 AddOpHigh (D, OP65_SBC, &D->Rhs, 0);
1209 X = NewCodeEntry (OP65_EOR, AM65_IMM, "$80", 0, D->OpEntry->LI);
1210 InsertEntry (D, X, D->IP++);
1213 X = NewCodeEntry (OP65_ASL, AM65_ACC, "a", 0, D->OpEntry->LI);
1214 InsertEntry (D, X, D->IP++);
1215 L = CS_GenLabel (D->Code, X);
1217 /* Insert a bvs L before the eor insn */
1218 X = NewCodeEntry (OP65_BVS, AM65_BRA, L->Name, L, D->OpEntry->LI);
1219 InsertEntry (D, X, D->IP - 2);
1223 X = NewCodeEntry (OP65_LDA, AM65_IMM, "$00", 0, D->OpEntry->LI);
1224 InsertEntry (D, X, D->IP++);
1227 X = NewCodeEntry (OP65_LDX, AM65_IMM, "$00", 0, D->OpEntry->LI);
1228 InsertEntry (D, X, D->IP++);
1231 X = NewCodeEntry (OP65_ROL, AM65_ACC, "a", 0, D->OpEntry->LI);
1232 InsertEntry (D, X, D->IP++);
1234 /* Remove the push and the call to the tosgeax function */
1235 RemoveRemainders (D);
1237 /* We changed the sequence */
1243 static unsigned Opt_tosltax (StackOpData* D)
1244 /* Optimize the tosltax sequence */
1250 /* Inline the sbc */
1251 D->IP = D->OpIndex+1;
1253 /* Must be true because of OP_RHS_LOAD */
1254 CHECK ((D->Rhs.A.Flags & D->Rhs.X.Flags & LI_DIRECT) != 0);
1256 /* Add code for low operand */
1257 AddOpLow (D, OP65_CMP, &D->Rhs);
1259 /* Add code for high operand */
1260 AddOpHigh (D, OP65_SBC, &D->Rhs, 0);
1263 X = NewCodeEntry (OP65_EOR, AM65_IMM, "$80", 0, D->OpEntry->LI);
1264 InsertEntry (D, X, D->IP++);
1267 X = NewCodeEntry (OP65_ASL, AM65_ACC, "a", 0, D->OpEntry->LI);
1268 InsertEntry (D, X, D->IP++);
1269 L = CS_GenLabel (D->Code, X);
1271 /* Insert a bvc L before the eor insn */
1272 X = NewCodeEntry (OP65_BVC, AM65_BRA, L->Name, L, D->OpEntry->LI);
1273 InsertEntry (D, X, D->IP - 2);
1277 X = NewCodeEntry (OP65_LDA, AM65_IMM, "$00", 0, D->OpEntry->LI);
1278 InsertEntry (D, X, D->IP++);
1281 X = NewCodeEntry (OP65_LDX, AM65_IMM, "$00", 0, D->OpEntry->LI);
1282 InsertEntry (D, X, D->IP++);
1285 X = NewCodeEntry (OP65_ROL, AM65_ACC, "a", 0, D->OpEntry->LI);
1286 InsertEntry (D, X, D->IP++);
1288 /* Remove the push and the call to the tosltax function */
1289 RemoveRemainders (D);
1291 /* We changed the sequence */
1297 static unsigned Opt_tosneax (StackOpData* D)
1298 /* Optimize the tosneax sequence */
1300 return Opt_toseqax_tosneax (D, "boolne");
1305 static unsigned Opt_tosorax (StackOpData* D)
1306 /* Optimize the tosorax sequence */
1308 /* Store the value into the zeropage instead of pushing it */
1309 ReplacePushByStore (D);
1311 /* Inline the or, low byte */
1312 D->IP = D->OpIndex + 1;
1313 AddOpLow (D, OP65_ORA, &D->Lhs);
1316 AddOpHigh (D, OP65_ORA, &D->Lhs, 1);
1318 /* Remove the push and the call to the tosorax function */
1319 RemoveRemainders (D);
1321 /* We changed the sequence */
1327 static unsigned Opt_tossubax (StackOpData* D)
1328 /* Optimize the tossubax sequence. Note: subtraction is not commutative! */
1333 /* Inline the sbc */
1334 D->IP = D->OpIndex+1;
1337 X = NewCodeEntry (OP65_SEC, AM65_IMP, 0, 0, D->OpEntry->LI);
1338 InsertEntry (D, X, D->IP++);
1340 /* Must be true because of OP_RHS_LOAD */
1341 CHECK ((D->Rhs.A.Flags & D->Rhs.X.Flags & LI_DIRECT) != 0);
1343 /* Add code for low operand */
1344 AddOpLow (D, OP65_SBC, &D->Rhs);
1346 /* Add code for high operand */
1347 AddOpHigh (D, OP65_SBC, &D->Rhs, 1);
1349 /* Remove the push and the call to the tossubax function */
1350 RemoveRemainders (D);
1352 /* We changed the sequence */
1358 static unsigned Opt_tosugeax (StackOpData* D)
1359 /* Optimize the tosugeax sequence */
1364 /* Inline the sbc */
1365 D->IP = D->OpIndex+1;
1367 /* Must be true because of OP_RHS_LOAD */
1368 CHECK ((D->Rhs.A.Flags & D->Rhs.X.Flags & LI_DIRECT) != 0);
1370 /* Add code for low operand */
1371 AddOpLow (D, OP65_CMP, &D->Rhs);
1373 /* Add code for high operand */
1374 AddOpHigh (D, OP65_SBC, &D->Rhs, 0);
1377 X = NewCodeEntry (OP65_LDA, AM65_IMM, "$00", 0, D->OpEntry->LI);
1378 InsertEntry (D, X, D->IP++);
1381 X = NewCodeEntry (OP65_LDX, AM65_IMM, "$00", 0, D->OpEntry->LI);
1382 InsertEntry (D, X, D->IP++);
1385 X = NewCodeEntry (OP65_ROL, AM65_ACC, "a", 0, D->OpEntry->LI);
1386 InsertEntry (D, X, D->IP++);
1388 /* Remove the push and the call to the tosugeax function */
1389 RemoveRemainders (D);
1391 /* We changed the sequence */
1397 static unsigned Opt_tosugtax (StackOpData* D)
1398 /* Optimize the tosugtax sequence */
1403 /* Inline the sbc */
1404 D->IP = D->OpIndex+1;
1406 /* Must be true because of OP_RHS_LOAD */
1407 CHECK ((D->Rhs.A.Flags & D->Rhs.X.Flags & LI_DIRECT) != 0);
1410 X = NewCodeEntry (OP65_SEC, AM65_IMP, 0, 0, D->OpEntry->LI);
1411 InsertEntry (D, X, D->IP++);
1413 /* Add code for low operand */
1414 AddOpLow (D, OP65_SBC, &D->Rhs);
1416 /* We need the zero flag, so remember the immediate result */
1417 X = NewCodeEntry (OP65_STA, AM65_ZP, "tmp1", 0, D->OpEntry->LI);
1418 InsertEntry (D, X, D->IP++);
1420 /* Add code for high operand */
1421 AddOpHigh (D, OP65_SBC, &D->Rhs, 0);
1424 X = NewCodeEntry (OP65_ORA, AM65_ZP, "tmp1", 0, D->OpEntry->LI);
1425 InsertEntry (D, X, D->IP++);
1427 /* Transform to boolean */
1428 X = NewCodeEntry (OP65_JSR, AM65_ABS, "boolugt", 0, D->OpEntry->LI);
1429 InsertEntry (D, X, D->IP++);
1431 /* Remove the push and the call to the operator function */
1432 RemoveRemainders (D);
1434 /* We changed the sequence */
1440 static unsigned Opt_tosuleax (StackOpData* D)
1441 /* Optimize the tosuleax sequence */
1446 /* Inline the sbc */
1447 D->IP = D->OpIndex+1;
1449 /* Must be true because of OP_RHS_LOAD */
1450 CHECK ((D->Rhs.A.Flags & D->Rhs.X.Flags & LI_DIRECT) != 0);
1453 X = NewCodeEntry (OP65_SEC, AM65_IMP, 0, 0, D->OpEntry->LI);
1454 InsertEntry (D, X, D->IP++);
1456 /* Add code for low operand */
1457 AddOpLow (D, OP65_SBC, &D->Rhs);
1459 /* We need the zero flag, so remember the immediate result */
1460 X = NewCodeEntry (OP65_STA, AM65_ZP, "tmp1", 0, D->OpEntry->LI);
1461 InsertEntry (D, X, D->IP++);
1463 /* Add code for high operand */
1464 AddOpHigh (D, OP65_SBC, &D->Rhs, 0);
1467 X = NewCodeEntry (OP65_ORA, AM65_ZP, "tmp1", 0, D->OpEntry->LI);
1468 InsertEntry (D, X, D->IP++);
1470 /* Transform to boolean */
1471 X = NewCodeEntry (OP65_JSR, AM65_ABS, "boolule", 0, D->OpEntry->LI);
1472 InsertEntry (D, X, D->IP++);
1474 /* Remove the push and the call to the operator function */
1475 RemoveRemainders (D);
1477 /* We changed the sequence */
1483 static unsigned Opt_tosultax (StackOpData* D)
1484 /* Optimize the tosultax sequence */
1489 /* Inline the sbc */
1490 D->IP = D->OpIndex+1;
1492 /* Must be true because of OP_RHS_LOAD */
1493 CHECK ((D->Rhs.A.Flags & D->Rhs.X.Flags & LI_DIRECT) != 0);
1495 /* Add code for low operand */
1496 AddOpLow (D, OP65_CMP, &D->Rhs);
1498 /* Add code for high operand */
1499 AddOpHigh (D, OP65_SBC, &D->Rhs, 0);
1501 /* Transform to boolean */
1502 X = NewCodeEntry (OP65_JSR, AM65_ABS, "boolult", 0, D->OpEntry->LI);
1503 InsertEntry (D, X, D->IP++);
1505 /* Remove the push and the call to the operator function */
1506 RemoveRemainders (D);
1508 /* We changed the sequence */
1514 static unsigned Opt_tosxorax (StackOpData* D)
1515 /* Optimize the tosxorax sequence */
1520 /* Store the value into the zeropage instead of pushing it */
1521 ReplacePushByStore (D);
1523 /* Inline the xor, low byte */
1524 D->IP = D->OpIndex + 1;
1525 AddOpLow (D, OP65_EOR, &D->Lhs);
1528 if (RegValIsKnown (D->PushEntry->RI->In.RegX) &&
1529 RegValIsKnown (D->OpEntry->RI->In.RegX)) {
1530 /* Both values known, precalculate the result */
1531 const char* Arg = MakeHexArg (D->PushEntry->RI->In.RegX ^ D->OpEntry->RI->In.RegX);
1532 X = NewCodeEntry (OP65_LDX, AM65_IMM, Arg, 0, D->OpEntry->LI);
1533 InsertEntry (D, X, D->IP++);
1534 } else if (D->PushEntry->RI->In.RegX != 0) {
1535 /* High byte is unknown */
1536 AddOpHigh (D, OP65_EOR, &D->Lhs, 1);
1539 /* Remove the push and the call to the tosandax function */
1540 RemoveRemainders (D);
1542 /* We changed the sequence */
1548 /*****************************************************************************/
1550 /*****************************************************************************/
1554 static const OptFuncDesc FuncTable[] = {
1555 { "__bzero", Opt___bzero, REG_NONE, OP_X_ZERO | OP_A_KNOWN },
1556 { "staspidx", Opt_staspidx, REG_NONE, OP_NONE },
1557 { "staxspidx", Opt_staxspidx, REG_AX, OP_NONE },
1558 { "tosaddax", Opt_tosaddax, REG_NONE, OP_NONE },
1559 { "tosandax", Opt_tosandax, REG_NONE, OP_NONE },
1560 { "toseqax", Opt_toseqax, REG_NONE, OP_NONE },
1561 { "tosgeax", Opt_tosgeax, REG_NONE, OP_RHS_LOAD_DIRECT },
1562 { "tosltax", Opt_tosltax, REG_NONE, OP_RHS_LOAD_DIRECT },
1563 { "tosneax", Opt_tosneax, REG_NONE, OP_NONE },
1564 { "tosorax", Opt_tosorax, REG_NONE, OP_NONE },
1565 { "tossubax", Opt_tossubax, REG_NONE, OP_RHS_LOAD_DIRECT },
1566 { "tosugeax", Opt_tosugeax, REG_NONE, OP_RHS_LOAD_DIRECT },
1567 { "tosugtax", Opt_tosugtax, REG_NONE, OP_RHS_LOAD_DIRECT },
1568 { "tosuleax", Opt_tosuleax, REG_NONE, OP_RHS_LOAD_DIRECT },
1569 { "tosultax", Opt_tosultax, REG_NONE, OP_RHS_LOAD_DIRECT },
1570 { "tosxorax", Opt_tosxorax, REG_NONE, OP_NONE },
1572 #define FUNC_COUNT (sizeof(FuncTable) / sizeof(FuncTable[0]))
1576 static int CmpFunc (const void* Key, const void* Func)
1577 /* Compare function for bsearch */
1579 return strcmp (Key, ((const OptFuncDesc*) Func)->Name);
1584 static const OptFuncDesc* FindFunc (const char* Name)
1585 /* Find the function with the given name. Return a pointer to the table entry
1586 * or NULL if the function was not found.
1589 return bsearch (Name, FuncTable, FUNC_COUNT, sizeof(OptFuncDesc), CmpFunc);
1594 static int CmpHarmless (const void* Key, const void* Entry)
1595 /* Compare function for bsearch */
1597 return strcmp (Key, *(const char**)Entry);
1602 static int HarmlessCall (const char* Name)
1603 /* Check if this is a call to a harmless subroutine that will not interrupt
1604 * the pushax/op sequence when encountered.
1607 static const char* Tab[] = {
1630 void* R = bsearch (Name,
1632 sizeof (Tab) / sizeof (Tab[0]),
1640 static void ResetStackOpData (StackOpData* Data)
1641 /* Reset the given data structure */
1644 Data->UsedRegs = REG_NONE;
1646 ClearLoadInfo (&Data->Lhs);
1647 ClearLoadInfo (&Data->Rhs);
1649 Data->PushIndex = -1;
1655 static int PreCondOk (StackOpData* D)
1656 /* Check if the preconditions for a call to the optimizer subfunction are
1657 * satisfied. As a side effect, this function will also choose the zero page
1661 /* Check the flags */
1662 unsigned UnusedRegs = D->OptFunc->UnusedRegs;
1663 if (UnusedRegs != REG_NONE &&
1664 (GetRegInfo (D->Code, D->OpIndex+1, UnusedRegs) & UnusedRegs) != 0) {
1665 /* Cannot optimize */
1668 if ((D->OptFunc->Flags & OP_A_KNOWN) != 0 &&
1669 RegValIsUnknown (D->OpEntry->RI->In.RegA)) {
1670 /* Cannot optimize */
1673 if ((D->OptFunc->Flags & OP_X_ZERO) != 0 &&
1674 D->OpEntry->RI->In.RegX != 0) {
1675 /* Cannot optimize */
1678 if ((D->OptFunc->Flags & OP_LHS_LOAD) != 0) {
1679 if (D->Lhs.A.LoadIndex < 0 || D->Lhs.X.LoadIndex < 0) {
1680 /* Cannot optimize */
1682 } else if ((D->OptFunc->Flags & OP_LHS_LOAD_DIRECT) != 0) {
1683 if ((D->Lhs.A.Flags & D->Lhs.X.Flags & LI_DIRECT) == 0) {
1684 /* Cannot optimize */
1689 if ((D->OptFunc->Flags & OP_RHS_LOAD) != 0) {
1690 if (D->Rhs.A.LoadIndex < 0 || D->Rhs.X.LoadIndex < 0) {
1691 /* Cannot optimize */
1693 } else if ((D->OptFunc->Flags & OP_RHS_LOAD_DIRECT) != 0) {
1694 if ((D->Rhs.A.Flags & D->Rhs.X.Flags & LI_DIRECT) == 0) {
1695 /* Cannot optimize */
1701 /* Determine the zero page locations to use */
1702 if ((D->UsedRegs & REG_PTR1) == REG_NONE) {
1705 } else if ((D->UsedRegs & REG_SREG) == REG_NONE) {
1708 } else if ((D->UsedRegs & REG_PTR2) == REG_NONE) {
1712 /* No registers available */
1716 /* Determine if we have a basic block */
1717 return CS_IsBasicBlock (D->Code, D->PushIndex, D->OpIndex);
1722 /*****************************************************************************/
1724 /*****************************************************************************/
1728 unsigned OptStackOps (CodeSeg* S)
1729 /* Optimize operations that take operands via the stack */
1731 unsigned Changes = 0; /* Number of changes in one run */
1740 } State = Initialize;
1743 /* Generate register info */
1746 /* Remember the code segment in the info struct */
1749 /* Look for a call to pushax followed by a call to some other function
1750 * that takes it's first argument on the stack, and the second argument
1751 * in the primary register.
1752 * It depends on the code between the two if we can handle/transform the
1753 * sequence, so check this code for the following list of things:
1755 * - the range must be a basic block (one entry, one exit)
1756 * - there may not be accesses to local variables with unknown
1757 * offsets (because we have to adjust these offsets).
1758 * - no subroutine calls
1761 * Since we need a zero page register later, do also check the
1762 * intermediate code for zero page use.
1765 while (I < CS_GetEntryCount (S)) {
1767 /* Get the next entry */
1768 CodeEntry* E = CS_GetEntry (S, I);
1770 /* Actions depend on state */
1774 ResetStackOpData (&Data);
1779 /* While searching, track register load insns, so we can tell
1780 * what is in a register once pushax is encountered.
1782 if (CE_HasLabel (E)) {
1783 /* Currently we don't track across branches */
1784 InvalidateLoadRegInfo (&Data.Lhs.A);
1785 InvalidateLoadRegInfo (&Data.Lhs.X);
1786 InvalidateLoadRegInfo (&Data.Lhs.Y);
1788 if (CE_IsCallTo (E, "pushax")) {
1792 /* Track load insns */
1793 TrackLoads (&Data.Lhs, E, I);
1798 /* We' found a pushax before. Search for a stack op that may
1799 * follow and in the meantime, track zeropage usage and check
1800 * for code that will disable us from translating the sequence.
1802 if (CE_HasLabel (E)) {
1803 /* Currently we don't track across branches */
1804 InvalidateLoadRegInfo (&Data.Rhs.A);
1805 InvalidateLoadRegInfo (&Data.Rhs.X);
1806 InvalidateLoadRegInfo (&Data.Rhs.Y);
1808 if (E->OPC == OP65_JSR) {
1810 /* Subroutine call: Check if this is one of the functions,
1811 * we're going to replace.
1813 Data.OptFunc = FindFunc (E->Arg);
1815 /* Remember the op index and go on */
1820 } else if (!HarmlessCall (E->Arg)) {
1821 /* A call to an unkown subroutine: We need to start
1822 * over after the last pushax. Note: This will also
1823 * happen if we encounter a call to pushax!
1829 /* Track register usage */
1830 Data.UsedRegs |= (E->Use | E->Chg);
1831 TrackLoads (&Data.Rhs, E, I);
1834 } else if (E->Info & OF_STORE && (E->Chg & REG_ZP) == 0) {
1836 /* Too dangerous - there may be a change of a variable
1837 * within the sequence.
1843 } else if ((E->Use & REG_SP) != 0 &&
1844 (E->AM != AM65_ZP_INDY ||
1845 RegValIsUnknown (E->RI->In.RegY) ||
1846 E->RI->In.RegY < 2)) {
1848 /* If we are using the stack, and we don't have "indirect Y"
1849 * addressing mode, or the value of Y is unknown, or less
1850 * than two, we cannot cope with this piece of code. Having
1851 * an unknown value of Y means that we cannot correct the
1852 * stack offset, while having an offset less than two means
1853 * that the code works with the value on stack which is to
1861 /* Other stuff: Track register usage */
1862 Data.UsedRegs |= (E->Use | E->Chg);
1863 TrackLoads (&Data.Rhs, E, I);
1868 /* Track zero page location usage beyond this point */
1869 Data.UsedRegs |= GetRegInfo (S, I, REG_SREG | REG_PTR1 | REG_PTR2);
1871 /* Finalize the load info */
1872 FinalizeLoadInfo (&Data.Lhs, S);
1873 FinalizeLoadInfo (&Data.Rhs, S);
1875 /* Set flags for direct operations */
1876 CheckDirectOp (&Data);
1878 /* If the Lhs loads do load from zeropage, we have to include
1879 * them into UsedRegs registers used. The Rhs loads have already
1882 if (Data.Lhs.A.LoadEntry && Data.Lhs.A.LoadEntry->AM == AM65_ZP) {
1883 Data.UsedRegs |= Data.Lhs.A.LoadEntry->Use;
1885 if (Data.Lhs.X.LoadEntry && Data.Lhs.X.LoadEntry->AM == AM65_ZP) {
1886 Data.UsedRegs |= Data.Lhs.X.LoadEntry->Use;
1889 /* Check the preconditions. If they aren't ok, reset the insn
1890 * pointer to the pushax and start over. We will loose part of
1891 * load tracking but at least a/x has probably lost between
1892 * pushax and here and will be tracked again when restarting.
1894 if (!PreCondOk (&Data)) {
1900 /* Prepare the remainder of the data structure. */
1901 Data.PrevEntry = CS_GetPrevEntry (S, Data.PushIndex);
1902 Data.PushEntry = CS_GetEntry (S, Data.PushIndex);
1903 Data.OpEntry = CS_GetEntry (S, Data.OpIndex);
1904 Data.NextEntry = CS_GetNextEntry (S, Data.OpIndex);
1906 /* Adjust stack offsets to account for the upcoming removal */
1907 AdjustStackOffset (&Data, 2);
1909 /* Regenerate register info, since AdjustStackOffset changed
1914 /* Call the optimizer function */
1915 Changes += Data.OptFunc->Func (&Data);
1917 /* Regenerate register info */
1931 /* Free the register info */
1934 /* Return the number of changes made */