1 /*****************************************************************************/
5 /* Optimize operations that take operands via the stack */
9 /* (C) 2001-2009 Ullrich von Bassewitz */
10 /* Roemerstrasse 52 */
11 /* D-70794 Filderstadt */
12 /* EMail: uz@cc65.org */
15 /* This software is provided 'as-is', without any expressed or implied */
16 /* warranty. In no event will the authors be held liable for any damages */
17 /* arising from the use of this software. */
19 /* Permission is granted to anyone to use this software for any purpose, */
20 /* including commercial applications, and to alter it and redistribute it */
21 /* freely, subject to the following restrictions: */
23 /* 1. The origin of this software must not be misrepresented; you must not */
24 /* claim that you wrote the original software. If you use this software */
25 /* in a product, an acknowledgment in the product documentation would be */
26 /* appreciated but is not required. */
27 /* 2. Altered source versions must be plainly marked as such, and must not */
28 /* be misrepresented as being the original software. */
29 /* 3. This notice may not be removed or altered from any source */
32 /*****************************************************************************/
49 /*****************************************************************************/
50 /* Load tracking data */
51 /*****************************************************************************/
55 /* LoadRegInfo flags set by DirectOp */
58 LI_DIRECT = 0x01, /* Direct op may be used */
59 LI_RELOAD_Y = 0x02, /* Reload index register Y */
60 LI_REMOVE = 0x04, /* Load may be removed */
61 LI_DUP_LOAD = 0x08, /* Duplicate load */
64 /* Structure that tells us how to load the lhs values */
65 typedef struct LoadRegInfo LoadRegInfo;
67 LI_FLAGS Flags; /* Tells us how to load */
68 int LoadIndex; /* Index of load insn, -1 if invalid */
69 CodeEntry* LoadEntry; /* The actual entry, 0 if invalid */
70 int XferIndex; /* Index of transfer insn */
71 CodeEntry* XferEntry; /* The actual transfer entry */
72 int Offs; /* Stack offset if data is on stack */
75 /* Now combined for both registers */
76 typedef struct LoadInfo LoadInfo;
78 LoadRegInfo A; /* Info for A register */
79 LoadRegInfo X; /* Info for X register */
80 LoadRegInfo Y; /* Info for Y register */
85 /*****************************************************************************/
87 /*****************************************************************************/
91 /* Flags for the functions */
93 OP_NONE = 0x00, /* Nothing special */
94 OP_A_KNOWN = 0x01, /* Value of A must be known */
95 OP_X_ZERO = 0x02, /* X must be zero */
96 OP_LHS_LOAD = 0x04, /* Must have load insns for LHS */
97 OP_LHS_LOAD_DIRECT = 0x0C, /* Must have direct load insn for LHS */
98 OP_RHS_LOAD = 0x10, /* Must have load insns for RHS */
99 OP_RHS_LOAD_DIRECT = 0x30, /* Must have direct load insn for RHS */
102 /* Structure forward decl */
103 typedef struct StackOpData StackOpData;
105 /* Structure that describes an optimizer subfunction for a specific op */
106 typedef unsigned (*OptFunc) (StackOpData* D);
107 typedef struct OptFuncDesc OptFuncDesc;
109 const char* Name; /* Name of the replaced runtime function */
110 OptFunc Func; /* Function pointer */
111 unsigned UnusedRegs; /* Regs that must not be used later */
112 OP_FLAGS Flags; /* Flags */
115 /* Structure that holds the needed data */
117 CodeSeg* Code; /* Pointer to code segment */
118 unsigned Flags; /* Flags to remember things */
120 /* Pointer to optimizer subfunction description */
121 const OptFuncDesc* OptFunc;
123 /* ZP register usage inside the sequence */
126 /* Register load information for lhs and rhs */
130 /* Several indices of insns in the code segment */
131 int PushIndex; /* Index of call to pushax in codeseg */
132 int OpIndex; /* Index of actual operation */
134 /* Pointers to insns in the code segment */
135 CodeEntry* PrevEntry; /* Entry before the call to pushax */
136 CodeEntry* PushEntry; /* Pointer to entry with call to pushax */
137 CodeEntry* OpEntry; /* Pointer to entry with op */
138 CodeEntry* NextEntry; /* Entry after the op */
140 const char* ZPLo; /* Lo byte of zero page loc to use */
141 const char* ZPHi; /* Hi byte of zero page loc to use */
142 unsigned IP; /* Insertion point used by some routines */
147 /*****************************************************************************/
148 /* Load tracking code */
149 /*****************************************************************************/
153 static void ClearLoadRegInfo (LoadRegInfo* RI)
154 /* Clear a LoadRegInfo struct */
164 static void FinalizeLoadRegInfo (LoadRegInfo* RI, CodeSeg* S)
165 /* Prepare a LoadRegInfo struct for use */
167 /* Get the entries */
168 if (RI->LoadIndex >= 0) {
169 RI->LoadEntry = CS_GetEntry (S, RI->LoadIndex);
173 if (RI->XferIndex >= 0) {
174 RI->XferEntry = CS_GetEntry (S, RI->XferIndex);
182 static void ClearLoadInfo (LoadInfo* LI)
183 /* Clear a LoadInfo struct */
185 ClearLoadRegInfo (&LI->A);
186 ClearLoadRegInfo (&LI->X);
187 ClearLoadRegInfo (&LI->Y);
192 static void AdjustLoadRegInfo (LoadRegInfo* RI, int Index, int Change)
193 /* Adjust a load register info struct after deleting or inserting an entry
197 CHECK (abs (Change) == 1);
200 if (Index < RI->LoadIndex) {
202 } else if (Index == RI->LoadIndex) {
203 /* Has been removed */
207 if (Index < RI->XferIndex) {
209 } else if (Index == RI->XferIndex) {
210 /* Has been removed */
216 if (Index <= RI->LoadIndex) {
219 if (Index <= RI->XferIndex) {
227 static void FinalizeLoadInfo (LoadInfo* LI, CodeSeg* S)
228 /* Prepare a LoadInfo struct for use */
230 /* Get the entries */
231 FinalizeLoadRegInfo (&LI->A, S);
232 FinalizeLoadRegInfo (&LI->X, S);
233 FinalizeLoadRegInfo (&LI->Y, S);
238 static void AdjustLoadInfo (LoadInfo* LI, int Index, int Change)
239 /* Adjust a load info struct after deleting entry with a given index */
241 AdjustLoadRegInfo (&LI->A, Index, Change);
242 AdjustLoadRegInfo (&LI->X, Index, Change);
243 AdjustLoadRegInfo (&LI->Y, Index, Change);
248 static void TrackLoads (LoadInfo* LI, CodeEntry* E, int I)
249 /* Track loads for a code entry */
251 if (E->Info & OF_LOAD) {
255 /* Determine, which register was loaded */
256 if (E->Chg & REG_A) {
258 } else if (E->Chg & REG_X) {
260 } else if (E->Chg & REG_Y) {
265 /* If we had a load or xfer op before, this is a duplicate load which
266 * can cause problems if it encountered between the pushax and the op,
269 if (RI->LoadIndex >= 0 || RI->XferIndex >= 0) {
270 RI->Flags |= LI_DUP_LOAD;
273 /* Remember the load */
278 RI->Flags &= ~(LI_DIRECT | LI_RELOAD_Y);
279 if (E->AM == AM65_IMM || E->AM == AM65_ZP || E->AM == AM65_ABS) {
280 /* These insns are all ok and replaceable */
281 RI->Flags |= LI_DIRECT;
282 } else if (E->AM == AM65_ZP_INDY &&
283 RegValIsKnown (E->RI->In.RegY) &&
284 strcmp (E->Arg, "sp") == 0) {
285 /* A load from the stack with known offset is also ok, but in this
286 * case we must reload the index register later. Please note that
287 * a load indirect via other zero page locations is not ok, since
288 * these locations may change between the push and the actual
291 RI->Offs = (unsigned char) E->RI->In.RegY;
292 RI->Flags |= (LI_DIRECT | LI_RELOAD_Y);
296 } else if (E->Info & OF_XFR) {
298 /* Determine source and target of the transfer and handle the TSX insn */
302 case OP65_TAX: Src = &LI->A; Tgt = &LI->X; break;
303 case OP65_TAY: Src = &LI->A; Tgt = &LI->Y; break;
304 case OP65_TXA: Src = &LI->X; Tgt = &LI->A; break;
305 case OP65_TYA: Src = &LI->Y; Tgt = &LI->A; break;
306 case OP65_TSX: ClearLoadRegInfo (&LI->X); return;
307 case OP65_TXS: return;
308 default: Internal ("Unknown XFR insn in TrackLoads");
311 /* If we had a load or xfer op before, this is a duplicate load which
312 * can cause problems if it encountered between the pushax and the op,
315 if (Tgt->LoadIndex >= 0 || Tgt->XferIndex >= 0) {
316 Tgt->Flags |= LI_DUP_LOAD;
319 /* Transfer the data */
320 Tgt->LoadIndex = Src->LoadIndex;
322 Tgt->Offs = Src->Offs;
323 Tgt->Flags &= ~(LI_DIRECT | LI_RELOAD_Y);
324 Tgt->Flags |= Src->Flags & (LI_DIRECT | LI_RELOAD_Y);
326 } else if (CE_IsCallTo (E, "ldaxysp") && RegValIsKnown (E->RI->In.RegY)) {
328 /* If we had a load or xfer op before, this is a duplicate load which
329 * can cause problems if it encountered between the pushax and the op,
330 * so remember it for both registers involved.
332 if (LI->A.LoadIndex >= 0 || LI->A.XferIndex >= 0) {
333 LI->A.Flags |= LI_DUP_LOAD;
335 if (LI->X.LoadIndex >= 0 || LI->X.XferIndex >= 0) {
336 LI->X.Flags |= LI_DUP_LOAD;
339 /* Both registers set, Y changed */
341 LI->A.XferIndex = -1;
342 LI->A.Flags |= (LI_DIRECT | LI_RELOAD_Y);
343 LI->A.Offs = (unsigned char) E->RI->In.RegY - 1;
346 LI->X.XferIndex = -1;
347 LI->X.Flags |= (LI_DIRECT | LI_RELOAD_Y);
348 LI->X.Offs = (unsigned char) E->RI->In.RegY;
350 ClearLoadRegInfo (&LI->Y);
352 if (E->Chg & REG_A) {
353 ClearLoadRegInfo (&LI->A);
355 if (E->Chg & REG_X) {
356 ClearLoadRegInfo (&LI->X);
358 if (E->Chg & REG_Y) {
359 ClearLoadRegInfo (&LI->Y);
366 /*****************************************************************************/
368 /*****************************************************************************/
372 static void InsertEntry (StackOpData* D, CodeEntry* E, int Index)
373 /* Insert a new entry. Depending on Index, D->PushIndex and D->OpIndex will
374 * be adjusted by this function.
377 /* Insert the entry into the code segment */
378 CS_InsertEntry (D->Code, E, Index);
380 /* Adjust register loads if necessary */
381 AdjustLoadInfo (&D->Lhs, Index, 1);
382 AdjustLoadInfo (&D->Rhs, Index, 1);
384 /* Adjust the indices if necessary */
385 if (D->PushEntry && Index <= D->PushIndex) {
388 if (D->OpEntry && Index <= D->OpIndex) {
395 static void DelEntry (StackOpData* D, int Index)
396 /* Delete an entry. Depending on Index, D->PushIndex and D->OpIndex will be
397 * adjusted by this function, and PushEntry/OpEntry may get invalidated.
400 /* Delete the entry from the code segment */
401 CS_DelEntry (D->Code, Index);
403 /* Adjust register loads if necessary */
404 AdjustLoadInfo (&D->Lhs, Index, -1);
405 AdjustLoadInfo (&D->Rhs, Index, -1);
407 /* Adjust the other indices if necessary */
408 if (Index < D->PushIndex) {
410 } else if (Index == D->PushIndex) {
413 if (Index < D->OpIndex) {
415 } else if (Index == D->OpIndex) {
422 static void AdjustStackOffset (StackOpData* D, unsigned Offs)
423 /* Adjust the offset for all stack accesses in the range PushIndex to OpIndex.
424 * OpIndex is adjusted according to the insertions.
427 /* Walk over all entries */
428 int I = D->PushIndex + 1;
429 while (I < D->OpIndex) {
431 CodeEntry* E = CS_GetEntry (D->Code, I);
433 int NeedCorrection = 0;
434 if ((E->Use & REG_SP) != 0) {
436 /* Check for some things that should not happen */
437 CHECK (E->AM == AM65_ZP_INDY || E->RI->In.RegY >= (short) Offs);
438 CHECK (strcmp (E->Arg, "sp") == 0);
440 /* We need to correct this one */
443 } else if (CE_IsCallTo (E, "ldaxysp")) {
445 /* We need to correct this one */
450 if (NeedCorrection) {
452 /* Get the code entry before this one. If it's a LDY, adjust the
455 CodeEntry* P = CS_GetPrevEntry (D->Code, I);
456 if (P && P->OPC == OP65_LDY && CE_IsConstImm (P)) {
458 /* The Y load is just before the stack access, adjust it */
459 CE_SetNumArg (P, P->Num - Offs);
463 /* Insert a new load instruction before the stack access */
464 const char* Arg = MakeHexArg (E->RI->In.RegY - Offs);
465 CodeEntry* X = NewCodeEntry (OP65_LDY, AM65_IMM, Arg, 0, E->LI);
466 InsertEntry (D, X, I++);
470 /* If we need the value of Y later, be sure to reload it */
471 if (RegYUsed (D->Code, I+1)) {
472 const char* Arg = MakeHexArg (E->RI->In.RegY);
473 CodeEntry* X = NewCodeEntry (OP65_LDY, AM65_IMM, Arg, 0, E->LI);
474 InsertEntry (D, X, I+1);
476 /* Skip this instruction in the next round */
485 /* If we have rhs load insns that load from stack, we'll have to adjust
486 * the offsets for these also.
488 if (D->Rhs.A.Flags & LI_RELOAD_Y) {
489 D->Rhs.A.Offs -= Offs;
491 if (D->Rhs.X.Flags & LI_RELOAD_Y) {
492 D->Rhs.X.Offs -= Offs;
498 static void AddStoreA (StackOpData* D)
499 /* Add a store to zero page after the push insn */
501 CodeEntry* X = NewCodeEntry (OP65_STA, AM65_ZP, D->ZPLo, 0, D->PushEntry->LI);
502 InsertEntry (D, X, D->PushIndex+1);
507 static void AddStoreX (StackOpData* D)
508 /* Add a store to zero page after the push insn */
510 CodeEntry* X = NewCodeEntry (OP65_STX, AM65_ZP, D->ZPHi, 0, D->PushEntry->LI);
511 InsertEntry (D, X, D->PushIndex+1);
516 static void ReplacePushByStore (StackOpData* D)
517 /* Replace the call to the push subroutine by a store into the zero page
518 * location (actually, the push is not replaced, because we need it for
519 * later, but the name is still ok since the push will get removed at the
520 * end of each routine).
523 /* Store the value into the zeropage instead of pushing it. Check high
524 * byte first so that the store is later in A/X order.
526 if ((D->Lhs.X.Flags & LI_DIRECT) == 0) {
529 if ((D->Lhs.A.Flags & LI_DIRECT) == 0) {
536 static void AddOpLow (StackOpData* D, opc_t OPC, LoadInfo* LI)
537 /* Add an op for the low byte of an operator. This function honours the
538 * OP_DIRECT and OP_RELOAD_Y flags and generates the necessary instructions.
539 * All code is inserted at the current insertion point.
544 if ((LI->A.Flags & LI_DIRECT) != 0) {
545 /* Op with a variable location. If the location is on the stack, we
546 * need to reload the Y register.
548 if ((LI->A.Flags & LI_RELOAD_Y) == 0) {
551 CodeEntry* LoadA = LI->A.LoadEntry;
552 X = NewCodeEntry (OPC, LoadA->AM, LoadA->Arg, 0, D->OpEntry->LI);
553 InsertEntry (D, X, D->IP++);
558 const char* Arg = MakeHexArg (LI->A.Offs);
559 X = NewCodeEntry (OP65_LDY, AM65_IMM, Arg, 0, D->OpEntry->LI);
560 InsertEntry (D, X, D->IP++);
563 X = NewCodeEntry (OPC, AM65_ZP_INDY, "sp", 0, D->OpEntry->LI);
564 InsertEntry (D, X, D->IP++);
568 /* In both cases, we can remove the load */
569 LI->A.Flags |= LI_REMOVE;
573 /* Op with temp storage */
574 X = NewCodeEntry (OPC, AM65_ZP, D->ZPLo, 0, D->OpEntry->LI);
575 InsertEntry (D, X, D->IP++);
582 static void AddOpHigh (StackOpData* D, opc_t OPC, LoadInfo* LI, int KeepResult)
583 /* Add an op for the high byte of an operator. Special cases (constant values
584 * or similar) have to be checked separately, the function covers only the
585 * generic case. Code is inserted at the insertion point.
592 X = NewCodeEntry (OP65_PHA, AM65_IMP, 0, 0, D->OpEntry->LI);
593 InsertEntry (D, X, D->IP++);
597 X = NewCodeEntry (OP65_TXA, AM65_IMP, 0, 0, D->OpEntry->LI);
598 InsertEntry (D, X, D->IP++);
600 if ((LI->X.Flags & LI_DIRECT) != 0) {
602 if ((LI->X.Flags & LI_RELOAD_Y) == 0) {
605 CodeEntry* LoadX = LI->X.LoadEntry;
606 X = NewCodeEntry (OPC, LoadX->AM, LoadX->Arg, 0, D->OpEntry->LI);
607 InsertEntry (D, X, D->IP++);
612 const char* Arg = MakeHexArg (LI->X.Offs);
613 X = NewCodeEntry (OP65_LDY, AM65_IMM, Arg, 0, D->OpEntry->LI);
614 InsertEntry (D, X, D->IP++);
617 X = NewCodeEntry (OPC, AM65_ZP_INDY, "sp", 0, D->OpEntry->LI);
618 InsertEntry (D, X, D->IP++);
621 /* In both cases, we can remove the load */
622 LI->X.Flags |= LI_REMOVE;
626 X = NewCodeEntry (OPC, AM65_ZP, D->ZPHi, 0, D->OpEntry->LI);
627 InsertEntry (D, X, D->IP++);
632 X = NewCodeEntry (OP65_TAX, AM65_IMP, 0, 0, D->OpEntry->LI);
633 InsertEntry (D, X, D->IP++);
636 X = NewCodeEntry (OP65_PLA, AM65_IMP, 0, 0, D->OpEntry->LI);
637 InsertEntry (D, X, D->IP++);
643 static void RemoveRegLoads (StackOpData* D, LoadInfo* LI)
644 /* Remove register load insns */
646 /* Both registers may be loaded with one insn, but DelEntry will in this
647 * case clear the other one.
649 if (LI->A.Flags & LI_REMOVE) {
650 if (LI->A.LoadIndex >= 0) {
651 DelEntry (D, LI->A.LoadIndex);
653 if (LI->A.XferIndex >= 0) {
654 DelEntry (D, LI->A.XferIndex);
657 if (LI->X.Flags & LI_REMOVE) {
658 if (LI->X.LoadIndex >= 0) {
659 DelEntry (D, LI->X.LoadIndex);
661 if (LI->X.XferIndex >= 0) {
662 DelEntry (D, LI->X.XferIndex);
669 static void RemoveRemainders (StackOpData* D)
670 /* Remove the code that is unnecessary after translation of the sequence */
672 /* Remove the register loads for lhs and rhs */
673 RemoveRegLoads (D, &D->Lhs);
674 RemoveRegLoads (D, &D->Rhs);
676 /* Remove the push and the operator routine */
677 DelEntry (D, D->OpIndex);
678 DelEntry (D, D->PushIndex);
683 static int IsRegVar (StackOpData* D)
684 /* If the value pushed is that of a zeropage variable, replace ZPLo and ZPHi
685 * in the given StackOpData struct by the variable and return true. Otherwise
686 * leave D untouched and return false.
689 CodeEntry* LoadA = D->Lhs.A.LoadEntry;
690 CodeEntry* LoadX = D->Lhs.X.LoadEntry;
693 /* Must have both load insns */
694 if (LoadA == 0 || LoadX == 0) {
698 /* Must be loads from zp */
699 if (LoadA->AM != AM65_ZP || LoadX->AM != AM65_ZP) {
703 /* Must be the same zp loc with high byte in X */
704 Len = strlen (LoadA->Arg);
705 if (strncmp (LoadA->Arg, LoadX->Arg, Len) != 0 ||
706 strcmp (LoadX->Arg + Len, "+1") != 0) {
710 /* Use the zero page location directly */
711 D->ZPLo = LoadA->Arg;
712 D->ZPHi = LoadX->Arg;
718 /*****************************************************************************/
719 /* Actual optimization functions */
720 /*****************************************************************************/
724 static unsigned Opt_toseqax_tosneax (StackOpData* D, const char* BoolTransformer)
725 /* Optimize the toseqax and tosneax sequences. */
730 /* Create a call to the boolean transformer function and a label for this
731 * insn. This is needed for all variants. Other insns are inserted *before*
734 X = NewCodeEntry (OP65_JSR, AM65_ABS, BoolTransformer, 0, D->OpEntry->LI);
735 InsertEntry (D, X, D->OpIndex + 1);
736 L = CS_GenLabel (D->Code, X);
738 /* If the lhs is direct (but not stack relative), encode compares with lhs
739 * effectively reverting the order (which doesn't matter for ==).
741 if ((D->Lhs.A.Flags & (LI_DIRECT | LI_RELOAD_Y)) == LI_DIRECT &&
742 (D->Lhs.X.Flags & (LI_DIRECT | LI_RELOAD_Y)) == LI_DIRECT) {
744 CodeEntry* LoadX = D->Lhs.X.LoadEntry;
745 CodeEntry* LoadA = D->Lhs.A.LoadEntry;
747 D->IP = D->OpIndex+1;
750 X = NewCodeEntry (OP65_CPX, LoadX->AM, LoadX->Arg, 0, D->OpEntry->LI);
751 InsertEntry (D, X, D->IP++);
754 X = NewCodeEntry (OP65_BNE, AM65_BRA, L->Name, L, D->OpEntry->LI);
755 InsertEntry (D, X, D->IP++);
758 X = NewCodeEntry (OP65_CMP, LoadA->AM, LoadA->Arg, 0, D->OpEntry->LI);
759 InsertEntry (D, X, D->IP++);
761 /* Lhs load entries can be removed */
762 D->Lhs.X.Flags |= LI_REMOVE;
763 D->Lhs.A.Flags |= LI_REMOVE;
765 } else if ((D->Rhs.A.Flags & (LI_DIRECT | LI_RELOAD_Y)) == LI_DIRECT &&
766 (D->Rhs.X.Flags & (LI_DIRECT | LI_RELOAD_Y)) == LI_DIRECT) {
768 CodeEntry* LoadX = D->Rhs.X.LoadEntry;
769 CodeEntry* LoadA = D->Rhs.A.LoadEntry;
771 D->IP = D->OpIndex+1;
774 X = NewCodeEntry (OP65_CPX, LoadX->AM, LoadX->Arg, 0, D->OpEntry->LI);
775 InsertEntry (D, X, D->IP++);
778 X = NewCodeEntry (OP65_BNE, AM65_BRA, L->Name, L, D->OpEntry->LI);
779 InsertEntry (D, X, D->IP++);
782 X = NewCodeEntry (OP65_CMP, LoadA->AM, LoadA->Arg, 0, D->OpEntry->LI);
783 InsertEntry (D, X, D->IP++);
785 /* Rhs load entries can be removed */
786 D->Rhs.X.Flags |= LI_REMOVE;
787 D->Rhs.A.Flags |= LI_REMOVE;
789 } else if ((D->Rhs.A.Flags & LI_DIRECT) != 0 &&
790 (D->Rhs.X.Flags & LI_DIRECT) != 0) {
792 D->IP = D->OpIndex+1;
794 /* Add operand for low byte */
795 AddOpLow (D, OP65_CMP, &D->Rhs);
798 X = NewCodeEntry (OP65_BNE, AM65_BRA, L->Name, L, D->OpEntry->LI);
799 InsertEntry (D, X, D->IP++);
801 /* Add operand for high byte */
802 AddOpHigh (D, OP65_CMP, &D->Rhs, 0);
806 /* Save lhs into zeropage, then compare */
810 D->IP = D->OpIndex+1;
813 X = NewCodeEntry (OP65_CPX, AM65_ZP, D->ZPHi, 0, D->OpEntry->LI);
814 InsertEntry (D, X, D->IP++);
817 X = NewCodeEntry (OP65_BNE, AM65_BRA, L->Name, L, D->OpEntry->LI);
818 InsertEntry (D, X, D->IP++);
821 X = NewCodeEntry (OP65_CMP, AM65_ZP, D->ZPLo, 0, D->OpEntry->LI);
822 InsertEntry (D, X, D->IP++);
826 /* Remove the push and the call to the tosgeax function */
827 RemoveRemainders (D);
829 /* We changed the sequence */
835 static unsigned Opt_tosshift (StackOpData* D, const char* Name)
836 /* Optimize shift sequences. */
840 /* Store the value into the zeropage instead of pushing it */
841 ReplacePushByStore (D);
843 /* If the lhs is direct (but not stack relative), we can just reload the
846 if ((D->Lhs.A.Flags & (LI_DIRECT | LI_RELOAD_Y)) == LI_DIRECT &&
847 (D->Lhs.X.Flags & (LI_DIRECT | LI_RELOAD_Y)) == LI_DIRECT) {
849 CodeEntry* LoadX = D->Lhs.X.LoadEntry;
850 CodeEntry* LoadA = D->Lhs.A.LoadEntry;
852 /* Inline the shift */
853 D->IP = D->OpIndex+1;
856 X = NewCodeEntry (OP65_TAY, AM65_IMP, 0, 0, D->OpEntry->LI);
857 InsertEntry (D, X, D->IP++);
860 X = NewCodeEntry (OP65_LDA, LoadA->AM, LoadA->Arg, 0, D->OpEntry->LI);
861 InsertEntry (D, X, D->IP++);
864 X = NewCodeEntry (OP65_LDX, LoadX->AM, LoadX->Arg, 0, D->OpEntry->LI);
865 InsertEntry (D, X, D->IP++);
867 /* Lhs load entries can be removed */
868 D->Lhs.X.Flags |= LI_REMOVE;
869 D->Lhs.A.Flags |= LI_REMOVE;
873 /* Save lhs into zeropage and reload later */
877 /* Be sure to setup IP after adding the stores, otherwise it will get
880 D->IP = D->OpIndex+1;
883 X = NewCodeEntry (OP65_TAY, AM65_IMP, 0, 0, D->OpEntry->LI);
884 InsertEntry (D, X, D->IP++);
887 X = NewCodeEntry (OP65_LDA, AM65_ZP, D->ZPLo, 0, D->OpEntry->LI);
888 InsertEntry (D, X, D->IP++);
891 X = NewCodeEntry (OP65_LDX, AM65_ZP, D->ZPHi, 0, D->OpEntry->LI);
892 InsertEntry (D, X, D->IP++);
896 /* jsr shlaxy/aslaxy/whatever */
897 X = NewCodeEntry (OP65_JSR, AM65_ABS, Name, 0, D->OpEntry->LI);
898 InsertEntry (D, X, D->IP++);
900 /* Remove the push and the call to the shift function */
901 RemoveRemainders (D);
903 /* We changed the sequence */
909 static unsigned Opt___bzero (StackOpData* D)
910 /* Optimize the __bzero sequence */
916 /* Check if we're using a register variable */
918 /* Store the value into the zeropage instead of pushing it */
923 /* If the return value of __bzero is used, we have to add code to reload
924 * a/x from the pointer variable.
926 if (RegAXUsed (D->Code, D->OpIndex+1)) {
927 X = NewCodeEntry (OP65_LDA, AM65_ZP, D->ZPLo, 0, D->OpEntry->LI);
928 InsertEntry (D, X, D->OpIndex+1);
929 X = NewCodeEntry (OP65_LDX, AM65_ZP, D->ZPHi, 0, D->OpEntry->LI);
930 InsertEntry (D, X, D->OpIndex+2);
933 /* X is always zero, A contains the size of the data area to zero.
934 * Note: A may be zero, in which case the operation is null op.
936 if (D->OpEntry->RI->In.RegA != 0) {
939 X = NewCodeEntry (OP65_LDA, AM65_IMM, "$00", 0, D->OpEntry->LI);
940 InsertEntry (D, X, D->OpIndex+1);
942 /* The value of A is known */
943 if (D->OpEntry->RI->In.RegA <= 0x81) {
945 /* Loop using the sign bit */
948 Arg = MakeHexArg (D->OpEntry->RI->In.RegA - 1);
949 X = NewCodeEntry (OP65_LDY, AM65_IMM, Arg, 0, D->OpEntry->LI);
950 InsertEntry (D, X, D->OpIndex+2);
953 X = NewCodeEntry (OP65_STA, AM65_ZP_INDY, D->ZPLo, 0, D->OpEntry->LI);
954 InsertEntry (D, X, D->OpIndex+3);
955 L = CS_GenLabel (D->Code, X);
958 X = NewCodeEntry (OP65_DEY, AM65_IMP, 0, 0, D->OpEntry->LI);
959 InsertEntry (D, X, D->OpIndex+4);
962 X = NewCodeEntry (OP65_BPL, AM65_BRA, L->Name, L, D->OpEntry->LI);
963 InsertEntry (D, X, D->OpIndex+5);
967 /* Loop using an explicit compare */
970 X = NewCodeEntry (OP65_LDY, AM65_IMM, "$00", 0, D->OpEntry->LI);
971 InsertEntry (D, X, D->OpIndex+2);
974 X = NewCodeEntry (OP65_STA, AM65_ZP_INDY, D->ZPLo, 0, D->OpEntry->LI);
975 InsertEntry (D, X, D->OpIndex+3);
976 L = CS_GenLabel (D->Code, X);
979 X = NewCodeEntry (OP65_INY, AM65_IMP, 0, 0, D->OpEntry->LI);
980 InsertEntry (D, X, D->OpIndex+4);
983 Arg = MakeHexArg (D->OpEntry->RI->In.RegA);
984 X = NewCodeEntry (OP65_CPY, AM65_IMM, Arg, 0, D->OpEntry->LI);
985 InsertEntry (D, X, D->OpIndex+5);
988 X = NewCodeEntry (OP65_BNE, AM65_BRA, L->Name, L, D->OpEntry->LI);
989 InsertEntry (D, X, D->OpIndex+6);
994 /* Remove the push and the call to the __bzero function */
995 RemoveRemainders (D);
997 /* We changed the sequence */
1003 static unsigned Opt_staspidx (StackOpData* D)
1004 /* Optimize the staspidx sequence */
1008 /* Check if we're using a register variable */
1009 if (!IsRegVar (D)) {
1010 /* Store the value into the zeropage instead of pushing it */
1015 /* Replace the store subroutine call by a direct op */
1016 X = NewCodeEntry (OP65_STA, AM65_ZP_INDY, D->ZPLo, 0, D->OpEntry->LI);
1017 InsertEntry (D, X, D->OpIndex+1);
1019 /* Remove the push and the call to the staspidx function */
1020 RemoveRemainders (D);
1022 /* We changed the sequence */
1028 static unsigned Opt_staxspidx (StackOpData* D)
1029 /* Optimize the staxspidx sequence */
1033 /* Check if we're using a register variable */
1034 if (!IsRegVar (D)) {
1035 /* Store the value into the zeropage instead of pushing it */
1040 /* Inline the store */
1043 X = NewCodeEntry (OP65_STA, AM65_ZP_INDY, D->ZPLo, 0, D->OpEntry->LI);
1044 InsertEntry (D, X, D->OpIndex+1);
1046 if (RegValIsKnown (D->OpEntry->RI->In.RegY)) {
1047 /* Value of Y is known */
1048 const char* Arg = MakeHexArg (D->OpEntry->RI->In.RegY + 1);
1049 X = NewCodeEntry (OP65_LDY, AM65_IMM, Arg, 0, D->OpEntry->LI);
1051 X = NewCodeEntry (OP65_INY, AM65_IMP, 0, 0, D->OpEntry->LI);
1053 InsertEntry (D, X, D->OpIndex+2);
1055 if (RegValIsKnown (D->OpEntry->RI->In.RegX)) {
1056 /* Value of X is known */
1057 const char* Arg = MakeHexArg (D->OpEntry->RI->In.RegX);
1058 X = NewCodeEntry (OP65_LDA, AM65_IMM, Arg, 0, D->OpEntry->LI);
1061 X = NewCodeEntry (OP65_TXA, AM65_IMP, 0, 0, D->OpEntry->LI);
1063 InsertEntry (D, X, D->OpIndex+3);
1066 X = NewCodeEntry (OP65_STA, AM65_ZP_INDY, D->ZPLo, 0, D->OpEntry->LI);
1067 InsertEntry (D, X, D->OpIndex+4);
1069 /* If we remove staxspidx, we must restore the Y register to what the
1070 * function would return.
1072 X = NewCodeEntry (OP65_LDY, AM65_IMM, "$00", 0, D->OpEntry->LI);
1073 InsertEntry (D, X, D->OpIndex+5);
1075 /* Remove the push and the call to the staxspidx function */
1076 RemoveRemainders (D);
1078 /* We changed the sequence */
1084 static unsigned Opt_tosaddax (StackOpData* D)
1085 /* Optimize the tosaddax sequence */
1090 /* We need the entry behind the add */
1091 CHECK (D->NextEntry != 0);
1093 /* Check if the X register is known and zero when the add is done, and
1094 * if the add is followed by
1097 * jsr ldauidx ; or ldaidx
1099 * If this is true, the addition does actually add an offset to a pointer
1100 * before it is dereferenced. Since both subroutines take an offset in Y,
1101 * we can pass the offset (instead of #$00) and remove the addition
1104 if (D->OpEntry->RI->In.RegX == 0 &&
1105 D->NextEntry->OPC == OP65_LDY &&
1106 CE_IsKnownImm (D->NextEntry, 0) &&
1107 !CE_HasLabel (D->NextEntry) &&
1108 (N = CS_GetNextEntry (D->Code, D->OpIndex + 1)) != 0 &&
1109 (CE_IsCallTo (N, "ldauidx") ||
1110 CE_IsCallTo (N, "ldaidx"))) {
1112 int Signed = (strcmp (N->Arg, "ldaidx") == 0);
1114 /* Store the value into the zeropage instead of pushing it */
1118 /* Replace the ldy by a tay. Be sure to create the new entry before
1119 * deleting the ldy, since we will reference the line info from this
1122 X = NewCodeEntry (OP65_TAY, AM65_IMP, 0, 0, D->NextEntry->LI);
1123 DelEntry (D, D->OpIndex + 1);
1124 InsertEntry (D, X, D->OpIndex + 1);
1126 /* Replace the call to ldaidx/ldauidx. Since X is already zero, and
1127 * the ptr is in the zero page location, we just need to load from
1128 * the pointer, and fix X in case of ldaidx.
1130 X = NewCodeEntry (OP65_LDA, AM65_ZP_INDY, D->ZPLo, 0, N->LI);
1131 DelEntry (D, D->OpIndex + 2);
1132 InsertEntry (D, X, D->OpIndex + 2);
1137 /* Add sign extension - N is unused now */
1138 N = CS_GetNextEntry (D->Code, D->OpIndex + 2);
1140 L = CS_GenLabel (D->Code, N);
1142 X = NewCodeEntry (OP65_BPL, AM65_BRA, L->Name, L, X->LI);
1143 InsertEntry (D, X, D->OpIndex + 3);
1145 X = NewCodeEntry (OP65_DEX, AM65_IMP, 0, 0, X->LI);
1146 InsertEntry (D, X, D->OpIndex + 4);
1151 /* Store the value into the zeropage instead of pushing it */
1152 ReplacePushByStore (D);
1154 /* Inline the add */
1155 D->IP = D->OpIndex+1;
1158 X = NewCodeEntry (OP65_CLC, AM65_IMP, 0, 0, D->OpEntry->LI);
1159 InsertEntry (D, X, D->IP++);
1162 AddOpLow (D, OP65_ADC, &D->Lhs);
1165 if (D->PushEntry->RI->In.RegX == 0) {
1167 /* The high byte is the value in X plus the carry */
1168 CodeLabel* L = CS_GenLabel (D->Code, D->NextEntry);
1171 X = NewCodeEntry (OP65_BCC, AM65_BRA, L->Name, L, D->OpEntry->LI);
1172 InsertEntry (D, X, D->IP++);
1175 X = NewCodeEntry (OP65_INX, AM65_IMP, 0, 0, D->OpEntry->LI);
1176 InsertEntry (D, X, D->IP++);
1178 } else if (D->OpEntry->RI->In.RegX == 0 &&
1179 (RegValIsKnown (D->PushEntry->RI->In.RegX) ||
1180 (D->Lhs.X.Flags & LI_RELOAD_Y) == 0)) {
1182 /* The high byte is that of the first operand plus carry */
1184 if (RegValIsKnown (D->PushEntry->RI->In.RegX)) {
1185 /* Value of first op high byte is known */
1186 const char* Arg = MakeHexArg (D->PushEntry->RI->In.RegX);
1187 X = NewCodeEntry (OP65_LDX, AM65_IMM, Arg, 0, D->OpEntry->LI);
1189 /* Value of first op high byte is unknown. Load from ZP or
1192 if (D->Lhs.X.Flags & LI_DIRECT) {
1193 CodeEntry* LoadX = D->Lhs.X.LoadEntry;
1194 X = NewCodeEntry (OP65_LDX, LoadX->AM, LoadX->Arg, 0, D->OpEntry->LI);
1196 X = NewCodeEntry (OP65_LDX, AM65_ZP, D->ZPHi, 0, D->OpEntry->LI);
1199 InsertEntry (D, X, D->IP++);
1202 L = CS_GenLabel (D->Code, D->NextEntry);
1203 X = NewCodeEntry (OP65_BCC, AM65_BRA, L->Name, L, D->OpEntry->LI);
1204 InsertEntry (D, X, D->IP++);
1207 X = NewCodeEntry (OP65_INX, AM65_IMP, 0, 0, D->OpEntry->LI);
1208 InsertEntry (D, X, D->IP++);
1210 /* High byte is unknown */
1211 AddOpHigh (D, OP65_ADC, &D->Lhs, 1);
1215 /* Remove the push and the call to the tosaddax function */
1216 RemoveRemainders (D);
1218 /* We changed the sequence */
1224 static unsigned Opt_tosandax (StackOpData* D)
1225 /* Optimize the tosandax sequence */
1227 /* Store the value into the zeropage instead of pushing it */
1228 ReplacePushByStore (D);
1230 /* Inline the and, low byte */
1231 D->IP = D->OpIndex + 1;
1232 AddOpLow (D, OP65_AND, &D->Lhs);
1235 AddOpHigh (D, OP65_AND, &D->Lhs, 1);
1237 /* Remove the push and the call to the tosandax function */
1238 RemoveRemainders (D);
1240 /* We changed the sequence */
1246 static unsigned Opt_tosaslax (StackOpData* D)
1247 /* Optimize the tosaslax sequence */
1249 return Opt_tosshift (D, "aslaxy");
1254 static unsigned Opt_tosasrax (StackOpData* D)
1255 /* Optimize the tosasrax sequence */
1257 return Opt_tosshift (D, "asraxy");
1262 static unsigned Opt_toseqax (StackOpData* D)
1263 /* Optimize the toseqax sequence */
1265 return Opt_toseqax_tosneax (D, "booleq");
1270 static unsigned Opt_tosgeax (StackOpData* D)
1271 /* Optimize the tosgeax sequence */
1276 /* Inline the sbc */
1277 D->IP = D->OpIndex+1;
1279 /* Must be true because of OP_RHS_LOAD */
1280 CHECK ((D->Rhs.A.Flags & D->Rhs.X.Flags & LI_DIRECT) != 0);
1282 /* Add code for low operand */
1283 AddOpLow (D, OP65_CMP, &D->Rhs);
1285 /* Add code for high operand */
1286 AddOpHigh (D, OP65_SBC, &D->Rhs, 0);
1289 X = NewCodeEntry (OP65_EOR, AM65_IMM, "$80", 0, D->OpEntry->LI);
1290 InsertEntry (D, X, D->IP++);
1293 X = NewCodeEntry (OP65_ASL, AM65_ACC, "a", 0, D->OpEntry->LI);
1294 InsertEntry (D, X, D->IP++);
1295 L = CS_GenLabel (D->Code, X);
1297 /* Insert a bvs L before the eor insn */
1298 X = NewCodeEntry (OP65_BVS, AM65_BRA, L->Name, L, D->OpEntry->LI);
1299 InsertEntry (D, X, D->IP - 2);
1303 X = NewCodeEntry (OP65_LDA, AM65_IMM, "$00", 0, D->OpEntry->LI);
1304 InsertEntry (D, X, D->IP++);
1307 X = NewCodeEntry (OP65_LDX, AM65_IMM, "$00", 0, D->OpEntry->LI);
1308 InsertEntry (D, X, D->IP++);
1311 X = NewCodeEntry (OP65_ROL, AM65_ACC, "a", 0, D->OpEntry->LI);
1312 InsertEntry (D, X, D->IP++);
1314 /* Remove the push and the call to the tosgeax function */
1315 RemoveRemainders (D);
1317 /* We changed the sequence */
1323 static unsigned Opt_tosltax (StackOpData* D)
1324 /* Optimize the tosltax sequence */
1330 /* Inline the compare */
1331 D->IP = D->OpIndex+1;
1333 /* Must be true because of OP_RHS_LOAD */
1334 CHECK ((D->Rhs.A.Flags & D->Rhs.X.Flags & LI_DIRECT) != 0);
1336 /* Add code for low operand */
1337 AddOpLow (D, OP65_CMP, &D->Rhs);
1339 /* Add code for high operand */
1340 AddOpHigh (D, OP65_SBC, &D->Rhs, 0);
1343 X = NewCodeEntry (OP65_EOR, AM65_IMM, "$80", 0, D->OpEntry->LI);
1344 InsertEntry (D, X, D->IP++);
1347 X = NewCodeEntry (OP65_ASL, AM65_ACC, "a", 0, D->OpEntry->LI);
1348 InsertEntry (D, X, D->IP++);
1349 L = CS_GenLabel (D->Code, X);
1351 /* Insert a bvc L before the eor insn */
1352 X = NewCodeEntry (OP65_BVC, AM65_BRA, L->Name, L, D->OpEntry->LI);
1353 InsertEntry (D, X, D->IP - 2);
1357 X = NewCodeEntry (OP65_LDA, AM65_IMM, "$00", 0, D->OpEntry->LI);
1358 InsertEntry (D, X, D->IP++);
1361 X = NewCodeEntry (OP65_LDX, AM65_IMM, "$00", 0, D->OpEntry->LI);
1362 InsertEntry (D, X, D->IP++);
1365 X = NewCodeEntry (OP65_ROL, AM65_ACC, "a", 0, D->OpEntry->LI);
1366 InsertEntry (D, X, D->IP++);
1368 /* Remove the push and the call to the tosltax function */
1369 RemoveRemainders (D);
1371 /* We changed the sequence */
1377 static unsigned Opt_tosneax (StackOpData* D)
1378 /* Optimize the tosneax sequence */
1380 return Opt_toseqax_tosneax (D, "boolne");
1385 static unsigned Opt_tosorax (StackOpData* D)
1386 /* Optimize the tosorax sequence */
1388 /* Store the value into the zeropage instead of pushing it */
1389 ReplacePushByStore (D);
1391 /* Inline the or, low byte */
1392 D->IP = D->OpIndex + 1;
1393 AddOpLow (D, OP65_ORA, &D->Lhs);
1396 AddOpHigh (D, OP65_ORA, &D->Lhs, 1);
1398 /* Remove the push and the call to the tosorax function */
1399 RemoveRemainders (D);
1401 /* We changed the sequence */
1407 static unsigned Opt_tosshlax (StackOpData* D)
1408 /* Optimize the tosshlax sequence */
1410 return Opt_tosshift (D, "shlaxy");
1415 static unsigned Opt_tosshrax (StackOpData* D)
1416 /* Optimize the tosshrax sequence */
1418 return Opt_tosshift (D, "shraxy");
1423 static unsigned Opt_tossubax (StackOpData* D)
1424 /* Optimize the tossubax sequence. Note: subtraction is not commutative! */
1429 /* Inline the sbc */
1430 D->IP = D->OpIndex+1;
1433 X = NewCodeEntry (OP65_SEC, AM65_IMP, 0, 0, D->OpEntry->LI);
1434 InsertEntry (D, X, D->IP++);
1436 /* Must be true because of OP_RHS_LOAD */
1437 CHECK ((D->Rhs.A.Flags & D->Rhs.X.Flags & LI_DIRECT) != 0);
1439 /* Add code for low operand */
1440 AddOpLow (D, OP65_SBC, &D->Rhs);
1442 /* Add code for high operand */
1443 AddOpHigh (D, OP65_SBC, &D->Rhs, 1);
1445 /* Remove the push and the call to the tossubax function */
1446 RemoveRemainders (D);
1448 /* We changed the sequence */
1454 static unsigned Opt_tosugeax (StackOpData* D)
1455 /* Optimize the tosugeax sequence */
1460 /* Inline the sbc */
1461 D->IP = D->OpIndex+1;
1463 /* Must be true because of OP_RHS_LOAD */
1464 CHECK ((D->Rhs.A.Flags & D->Rhs.X.Flags & LI_DIRECT) != 0);
1466 /* Add code for low operand */
1467 AddOpLow (D, OP65_CMP, &D->Rhs);
1469 /* Add code for high operand */
1470 AddOpHigh (D, OP65_SBC, &D->Rhs, 0);
1473 X = NewCodeEntry (OP65_LDA, AM65_IMM, "$00", 0, D->OpEntry->LI);
1474 InsertEntry (D, X, D->IP++);
1477 X = NewCodeEntry (OP65_LDX, AM65_IMM, "$00", 0, D->OpEntry->LI);
1478 InsertEntry (D, X, D->IP++);
1481 X = NewCodeEntry (OP65_ROL, AM65_ACC, "a", 0, D->OpEntry->LI);
1482 InsertEntry (D, X, D->IP++);
1484 /* Remove the push and the call to the tosugeax function */
1485 RemoveRemainders (D);
1487 /* We changed the sequence */
1493 static unsigned Opt_tosugtax (StackOpData* D)
1494 /* Optimize the tosugtax sequence */
1499 /* Inline the sbc */
1500 D->IP = D->OpIndex+1;
1502 /* Must be true because of OP_RHS_LOAD */
1503 CHECK ((D->Rhs.A.Flags & D->Rhs.X.Flags & LI_DIRECT) != 0);
1506 X = NewCodeEntry (OP65_SEC, AM65_IMP, 0, 0, D->OpEntry->LI);
1507 InsertEntry (D, X, D->IP++);
1509 /* Add code for low operand */
1510 AddOpLow (D, OP65_SBC, &D->Rhs);
1512 /* We need the zero flag, so remember the immediate result */
1513 X = NewCodeEntry (OP65_STA, AM65_ZP, "tmp1", 0, D->OpEntry->LI);
1514 InsertEntry (D, X, D->IP++);
1516 /* Add code for high operand */
1517 AddOpHigh (D, OP65_SBC, &D->Rhs, 0);
1520 X = NewCodeEntry (OP65_ORA, AM65_ZP, "tmp1", 0, D->OpEntry->LI);
1521 InsertEntry (D, X, D->IP++);
1523 /* Transform to boolean */
1524 X = NewCodeEntry (OP65_JSR, AM65_ABS, "boolugt", 0, D->OpEntry->LI);
1525 InsertEntry (D, X, D->IP++);
1527 /* Remove the push and the call to the operator function */
1528 RemoveRemainders (D);
1530 /* We changed the sequence */
1536 static unsigned Opt_tosuleax (StackOpData* D)
1537 /* Optimize the tosuleax sequence */
1542 /* Inline the sbc */
1543 D->IP = D->OpIndex+1;
1545 /* Must be true because of OP_RHS_LOAD */
1546 CHECK ((D->Rhs.A.Flags & D->Rhs.X.Flags & LI_DIRECT) != 0);
1549 X = NewCodeEntry (OP65_SEC, AM65_IMP, 0, 0, D->OpEntry->LI);
1550 InsertEntry (D, X, D->IP++);
1552 /* Add code for low operand */
1553 AddOpLow (D, OP65_SBC, &D->Rhs);
1555 /* We need the zero flag, so remember the immediate result */
1556 X = NewCodeEntry (OP65_STA, AM65_ZP, "tmp1", 0, D->OpEntry->LI);
1557 InsertEntry (D, X, D->IP++);
1559 /* Add code for high operand */
1560 AddOpHigh (D, OP65_SBC, &D->Rhs, 0);
1563 X = NewCodeEntry (OP65_ORA, AM65_ZP, "tmp1", 0, D->OpEntry->LI);
1564 InsertEntry (D, X, D->IP++);
1566 /* Transform to boolean */
1567 X = NewCodeEntry (OP65_JSR, AM65_ABS, "boolule", 0, D->OpEntry->LI);
1568 InsertEntry (D, X, D->IP++);
1570 /* Remove the push and the call to the operator function */
1571 RemoveRemainders (D);
1573 /* We changed the sequence */
1579 static unsigned Opt_tosultax (StackOpData* D)
1580 /* Optimize the tosultax sequence */
1585 /* Inline the sbc */
1586 D->IP = D->OpIndex+1;
1588 /* Must be true because of OP_RHS_LOAD */
1589 CHECK ((D->Rhs.A.Flags & D->Rhs.X.Flags & LI_DIRECT) != 0);
1591 /* Add code for low operand */
1592 AddOpLow (D, OP65_CMP, &D->Rhs);
1594 /* Add code for high operand */
1595 AddOpHigh (D, OP65_SBC, &D->Rhs, 0);
1597 /* Transform to boolean */
1598 X = NewCodeEntry (OP65_JSR, AM65_ABS, "boolult", 0, D->OpEntry->LI);
1599 InsertEntry (D, X, D->IP++);
1601 /* Remove the push and the call to the operator function */
1602 RemoveRemainders (D);
1604 /* We changed the sequence */
1610 static unsigned Opt_tosxorax (StackOpData* D)
1611 /* Optimize the tosxorax sequence */
1616 /* Store the value into the zeropage instead of pushing it */
1617 ReplacePushByStore (D);
1619 /* Inline the xor, low byte */
1620 D->IP = D->OpIndex + 1;
1621 AddOpLow (D, OP65_EOR, &D->Lhs);
1624 if (RegValIsKnown (D->PushEntry->RI->In.RegX) &&
1625 RegValIsKnown (D->OpEntry->RI->In.RegX)) {
1626 /* Both values known, precalculate the result */
1627 const char* Arg = MakeHexArg (D->PushEntry->RI->In.RegX ^ D->OpEntry->RI->In.RegX);
1628 X = NewCodeEntry (OP65_LDX, AM65_IMM, Arg, 0, D->OpEntry->LI);
1629 InsertEntry (D, X, D->IP++);
1630 } else if (D->PushEntry->RI->In.RegX != 0) {
1631 /* High byte is unknown */
1632 AddOpHigh (D, OP65_EOR, &D->Lhs, 1);
1635 /* Remove the push and the call to the tosandax function */
1636 RemoveRemainders (D);
1638 /* We changed the sequence */
1644 /*****************************************************************************/
1646 /*****************************************************************************/
1650 static const OptFuncDesc FuncTable[] = {
1651 { "__bzero", Opt___bzero, REG_NONE, OP_X_ZERO | OP_A_KNOWN },
1652 { "staspidx", Opt_staspidx, REG_NONE, OP_NONE },
1653 { "staxspidx", Opt_staxspidx, REG_AX, OP_NONE },
1654 { "tosaddax", Opt_tosaddax, REG_NONE, OP_NONE },
1655 { "tosandax", Opt_tosandax, REG_NONE, OP_NONE },
1656 { "tosaslax", Opt_tosaslax, REG_NONE, OP_NONE },
1657 { "tosasrax", Opt_tosasrax, REG_NONE, OP_NONE },
1658 { "toseqax", Opt_toseqax, REG_NONE, OP_NONE },
1659 { "tosgeax", Opt_tosgeax, REG_NONE, OP_RHS_LOAD_DIRECT },
1660 { "tosltax", Opt_tosltax, REG_NONE, OP_RHS_LOAD_DIRECT },
1661 { "tosneax", Opt_tosneax, REG_NONE, OP_NONE },
1662 { "tosorax", Opt_tosorax, REG_NONE, OP_NONE },
1663 { "tosshlax", Opt_tosshlax, REG_NONE, OP_NONE },
1664 { "tosshrax", Opt_tosshrax, REG_NONE, OP_NONE },
1665 { "tossubax", Opt_tossubax, REG_NONE, OP_RHS_LOAD_DIRECT },
1666 { "tosugeax", Opt_tosugeax, REG_NONE, OP_RHS_LOAD_DIRECT },
1667 { "tosugtax", Opt_tosugtax, REG_NONE, OP_RHS_LOAD_DIRECT },
1668 { "tosuleax", Opt_tosuleax, REG_NONE, OP_RHS_LOAD_DIRECT },
1669 { "tosultax", Opt_tosultax, REG_NONE, OP_RHS_LOAD_DIRECT },
1670 { "tosxorax", Opt_tosxorax, REG_NONE, OP_NONE },
1672 #define FUNC_COUNT (sizeof(FuncTable) / sizeof(FuncTable[0]))
1676 static int CmpFunc (const void* Key, const void* Func)
1677 /* Compare function for bsearch */
1679 return strcmp (Key, ((const OptFuncDesc*) Func)->Name);
1684 static const OptFuncDesc* FindFunc (const char* Name)
1685 /* Find the function with the given name. Return a pointer to the table entry
1686 * or NULL if the function was not found.
1689 return bsearch (Name, FuncTable, FUNC_COUNT, sizeof(OptFuncDesc), CmpFunc);
1694 static int CmpHarmless (const void* Key, const void* Entry)
1695 /* Compare function for bsearch */
1697 return strcmp (Key, *(const char**)Entry);
1702 static int HarmlessCall (const char* Name)
1703 /* Check if this is a call to a harmless subroutine that will not interrupt
1704 * the pushax/op sequence when encountered.
1707 static const char* Tab[] = {
1753 void* R = bsearch (Name,
1755 sizeof (Tab) / sizeof (Tab[0]),
1763 static void ResetStackOpData (StackOpData* Data)
1764 /* Reset the given data structure */
1767 Data->UsedRegs = REG_NONE;
1769 ClearLoadInfo (&Data->Lhs);
1770 ClearLoadInfo (&Data->Rhs);
1772 Data->PushIndex = -1;
1778 static int PreCondOk (StackOpData* D)
1779 /* Check if the preconditions for a call to the optimizer subfunction are
1780 * satisfied. As a side effect, this function will also choose the zero page
1784 /* Check the flags */
1785 unsigned UnusedRegs = D->OptFunc->UnusedRegs;
1786 if (UnusedRegs != REG_NONE &&
1787 (GetRegInfo (D->Code, D->OpIndex+1, UnusedRegs) & UnusedRegs) != 0) {
1788 /* Cannot optimize */
1791 if ((D->OptFunc->Flags & OP_A_KNOWN) != 0 &&
1792 RegValIsUnknown (D->OpEntry->RI->In.RegA)) {
1793 /* Cannot optimize */
1796 if ((D->OptFunc->Flags & OP_X_ZERO) != 0 &&
1797 D->OpEntry->RI->In.RegX != 0) {
1798 /* Cannot optimize */
1801 if ((D->OptFunc->Flags & OP_LHS_LOAD) != 0) {
1802 if (D->Lhs.A.LoadIndex < 0 || D->Lhs.X.LoadIndex < 0) {
1803 /* Cannot optimize */
1805 } else if ((D->OptFunc->Flags & OP_LHS_LOAD_DIRECT) != 0) {
1806 if ((D->Lhs.A.Flags & D->Lhs.X.Flags & LI_DIRECT) == 0) {
1807 /* Cannot optimize */
1812 if ((D->OptFunc->Flags & OP_RHS_LOAD) != 0) {
1813 if (D->Rhs.A.LoadIndex < 0 || D->Rhs.X.LoadIndex < 0) {
1814 /* Cannot optimize */
1816 } else if ((D->OptFunc->Flags & OP_RHS_LOAD_DIRECT) != 0) {
1817 if ((D->Rhs.A.Flags & D->Rhs.X.Flags & LI_DIRECT) == 0) {
1818 /* Cannot optimize */
1823 if ((D->Rhs.A.Flags | D->Rhs.X.Flags) & LI_DUP_LOAD) {
1824 /* Cannot optimize */
1828 /* Determine the zero page locations to use */
1829 if ((D->UsedRegs & REG_PTR1) == REG_NONE) {
1832 } else if ((D->UsedRegs & REG_SREG) == REG_NONE) {
1835 } else if ((D->UsedRegs & REG_PTR2) == REG_NONE) {
1839 /* No registers available */
1843 /* Determine if we have a basic block */
1844 return CS_IsBasicBlock (D->Code, D->PushIndex, D->OpIndex);
1849 /*****************************************************************************/
1851 /*****************************************************************************/
1855 unsigned OptStackOps (CodeSeg* S)
1856 /* Optimize operations that take operands via the stack */
1858 unsigned Changes = 0; /* Number of changes in one run */
1861 int OldEntryCount; /* Old number of entries */
1862 unsigned UsedRegs = 0; /* Registers used */
1863 unsigned ChangedRegs = 0;/* Registers changed */
1871 } State = Initialize;
1874 /* Remember the code segment in the info struct */
1877 /* Look for a call to pushax followed by a call to some other function
1878 * that takes it's first argument on the stack, and the second argument
1879 * in the primary register.
1880 * It depends on the code between the two if we can handle/transform the
1881 * sequence, so check this code for the following list of things:
1883 * - the range must be a basic block (one entry, one exit)
1884 * - there may not be accesses to local variables with unknown
1885 * offsets (because we have to adjust these offsets).
1886 * - no subroutine calls
1889 * Since we need a zero page register later, do also check the
1890 * intermediate code for zero page use.
1893 while (I < (int)CS_GetEntryCount (S)) {
1895 /* Get the next entry */
1896 CodeEntry* E = CS_GetEntry (S, I);
1898 /* Actions depend on state */
1902 ResetStackOpData (&Data);
1903 UsedRegs = ChangedRegs = REG_NONE;
1908 /* While searching, track register load insns, so we can tell
1909 * what is in a register once pushax is encountered.
1911 if (CE_HasLabel (E)) {
1912 /* Currently we don't track across branches */
1913 ClearLoadInfo (&Data.Lhs);
1915 if (CE_IsCallTo (E, "pushax")) {
1919 /* Track load insns */
1920 TrackLoads (&Data.Lhs, E, I);
1925 /* We' found a pushax before. Search for a stack op that may
1926 * follow and in the meantime, track zeropage usage and check
1927 * for code that will disable us from translating the sequence.
1929 if (CE_HasLabel (E)) {
1930 /* Currently we don't track across branches */
1931 ClearLoadInfo (&Data.Rhs);
1933 if (E->OPC == OP65_JSR) {
1935 /* Subroutine call: Check if this is one of the functions,
1936 * we're going to replace.
1938 Data.OptFunc = FindFunc (E->Arg);
1940 /* Remember the op index and go on */
1945 } else if (!HarmlessCall (E->Arg)) {
1946 /* A call to an unkown subroutine: We need to start
1947 * over after the last pushax. Note: This will also
1948 * happen if we encounter a call to pushax!
1954 /* Track register usage */
1955 Data.UsedRegs |= (E->Use | E->Chg);
1956 TrackLoads (&Data.Rhs, E, I);
1959 } else if (E->Info & OF_STORE && (E->Chg & REG_ZP) == 0) {
1961 /* Too dangerous - there may be a change of a variable
1962 * within the sequence.
1968 } else if ((E->Use & REG_SP) != 0 &&
1969 (E->AM != AM65_ZP_INDY ||
1970 RegValIsUnknown (E->RI->In.RegY) ||
1971 E->RI->In.RegY < 2)) {
1973 /* If we are using the stack, and we don't have "indirect Y"
1974 * addressing mode, or the value of Y is unknown, or less
1975 * than two, we cannot cope with this piece of code. Having
1976 * an unknown value of Y means that we cannot correct the
1977 * stack offset, while having an offset less than two means
1978 * that the code works with the value on stack which is to
1986 /* Other stuff: Track register usage */
1987 Data.UsedRegs |= (E->Use | E->Chg);
1988 TrackLoads (&Data.Rhs, E, I);
1990 /* If the registers from the push (A/X) are used before they're
1991 * changed, we cannot change the sequence, because this would
1992 * with a high probability change the register contents.
1995 if ((UsedRegs & ~ChangedRegs) & REG_AX) {
2000 ChangedRegs |= E->Chg;
2004 /* Track zero page location usage beyond this point */
2005 Data.UsedRegs |= GetRegInfo (S, I, REG_SREG | REG_PTR1 | REG_PTR2);
2007 /* Finalize the load info */
2008 FinalizeLoadInfo (&Data.Lhs, S);
2009 FinalizeLoadInfo (&Data.Rhs, S);
2011 /* If the Lhs loads do load from zeropage, we have to include
2012 * them into UsedRegs registers used. The Rhs loads have already
2015 if (Data.Lhs.A.LoadEntry && Data.Lhs.A.LoadEntry->AM == AM65_ZP) {
2016 Data.UsedRegs |= Data.Lhs.A.LoadEntry->Use;
2018 if (Data.Lhs.X.LoadEntry && Data.Lhs.X.LoadEntry->AM == AM65_ZP) {
2019 Data.UsedRegs |= Data.Lhs.X.LoadEntry->Use;
2022 /* Check the preconditions. If they aren't ok, reset the insn
2023 * pointer to the pushax and start over. We will loose part of
2024 * load tracking but at least a/x has probably lost between
2025 * pushax and here and will be tracked again when restarting.
2027 if (!PreCondOk (&Data)) {
2033 /* Prepare the remainder of the data structure. */
2034 Data.PrevEntry = CS_GetPrevEntry (S, Data.PushIndex);
2035 Data.PushEntry = CS_GetEntry (S, Data.PushIndex);
2036 Data.OpEntry = CS_GetEntry (S, Data.OpIndex);
2037 Data.NextEntry = CS_GetNextEntry (S, Data.OpIndex);
2039 /* Remember the current number of code lines */
2040 OldEntryCount = CS_GetEntryCount (S);
2042 /* Adjust stack offsets to account for the upcoming removal */
2043 AdjustStackOffset (&Data, 2);
2045 /* Regenerate register info, since AdjustStackOffset changed
2050 /* Call the optimizer function */
2051 Changes += Data.OptFunc->Func (&Data);
2053 /* Since the function may have added or deleted entries,
2054 * correct the index.
2056 I += CS_GetEntryCount (S) - OldEntryCount;
2058 /* Regenerate register info */
2072 /* Return the number of changes made */