1 /*****************************************************************************/
5 /* Optimize operations that take operands via the stack */
9 /* (C) 2001-2009 Ullrich von Bassewitz */
10 /* Roemerstrasse 52 */
11 /* D-70794 Filderstadt */
12 /* EMail: uz@cc65.org */
15 /* This software is provided 'as-is', without any expressed or implied */
16 /* warranty. In no event will the authors be held liable for any damages */
17 /* arising from the use of this software. */
19 /* Permission is granted to anyone to use this software for any purpose, */
20 /* including commercial applications, and to alter it and redistribute it */
21 /* freely, subject to the following restrictions: */
23 /* 1. The origin of this software must not be misrepresented; you must not */
24 /* claim that you wrote the original software. If you use this software */
25 /* in a product, an acknowledgment in the product documentation would be */
26 /* appreciated but is not required. */
27 /* 2. Altered source versions must be plainly marked as such, and must not */
28 /* be misrepresented as being the original software. */
29 /* 3. This notice may not be removed or altered from any source */
32 /*****************************************************************************/
48 /*****************************************************************************/
49 /* Load tracking data */
50 /*****************************************************************************/
54 /* LoadRegInfo flags set by DirectOp */
56 #define LI_DIRECT 0x01 /* Direct op may be used */
57 #define LI_RELOAD_Y 0x02 /* Reload index register Y */
58 #define LI_REMOVE 0x04 /* Load may be removed */
60 /* Structure that tells us how to load the lhs values */
61 typedef struct LoadRegInfo LoadRegInfo;
63 int LoadIndex; /* Index of load insn, -1 if invalid */
64 CodeEntry* LoadEntry; /* The actual entry, 0 if invalid */
65 unsigned char Flags; /* Tells us how to load */
66 unsigned char Offs; /* Stack offset if data is on stack */
69 /* Now combined for both registers */
70 typedef struct LoadInfo LoadInfo;
72 LoadRegInfo A; /* Info for A register */
73 LoadRegInfo X; /* Info for X register */
74 LoadRegInfo Y; /* Info for Y register */
79 /*****************************************************************************/
81 /*****************************************************************************/
85 /* Flags for the functions */
87 OP_NONE = 0x00, /* Nothing special */
88 OP_A_KNOWN = 0x01, /* Value of A must be known */
89 OP_X_ZERO = 0x02, /* X must be zero */
90 OP_LHS_LOAD = 0x04, /* Must have load insns for LHS */
91 OP_RHS_LOAD = 0x08, /* Must have load insns for RHS */
94 /* Structure forward decl */
95 typedef struct StackOpData StackOpData;
97 /* Structure that describes an optimizer subfunction for a specific op */
98 typedef unsigned (*OptFunc) (StackOpData* D);
99 typedef struct OptFuncDesc OptFuncDesc;
101 const char* Name; /* Name of the replaced runtime function */
102 OptFunc Func; /* Function pointer */
103 unsigned UnusedRegs; /* Regs that must not be used later */
104 OP_FLAGS Flags; /* Flags */
107 /* Structure that holds the needed data */
109 CodeSeg* Code; /* Pointer to code segment */
110 unsigned Flags; /* Flags to remember things */
112 /* Pointer to optimizer subfunction description */
113 const OptFuncDesc* OptFunc;
115 /* ZP register usage inside the sequence */
118 /* Register load information for lhs and rhs */
122 /* Several indices of insns in the code segment */
123 int PushIndex; /* Index of call to pushax in codeseg */
124 int OpIndex; /* Index of actual operation */
126 /* Pointers to insns in the code segment */
127 CodeEntry* PrevEntry; /* Entry before the call to pushax */
128 CodeEntry* PushEntry; /* Pointer to entry with call to pushax */
129 CodeEntry* OpEntry; /* Pointer to entry with op */
130 CodeEntry* NextEntry; /* Entry after the op */
132 const char* ZPLo; /* Lo byte of zero page loc to use */
133 const char* ZPHi; /* Hi byte of zero page loc to use */
134 unsigned IP; /* Insertion point used by some routines */
139 /*****************************************************************************/
140 /* Load tracking code */
141 /*****************************************************************************/
145 static void ClearLoadRegInfo (LoadRegInfo* RI)
146 /* Clear a LoadRegInfo struct */
153 static void FinalizeLoadRegInfo (LoadRegInfo* RI, CodeSeg* S)
154 /* Prepare a LoadRegInfo struct for use */
157 if (RI->LoadIndex >= 0) {
158 RI->LoadEntry = CS_GetEntry (S, RI->LoadIndex);
167 static void ClearLoadInfo (LoadInfo* LI)
168 /* Clear a LoadInfo struct */
170 ClearLoadRegInfo (&LI->A);
171 ClearLoadRegInfo (&LI->X);
172 ClearLoadRegInfo (&LI->Y);
177 static void AdjustLoadRegInfo (LoadRegInfo* RI, int DelIndex, int Change)
178 /* Adjust a load register info struct after deleting or inserting an entry
182 CHECK (abs (Change) == 1);
185 if (DelIndex < RI->LoadIndex) {
187 } else if (DelIndex == RI->LoadIndex) {
188 /* Has been removed */
194 if (DelIndex <= RI->LoadIndex) {
202 static void FinalizeLoadInfo (LoadInfo* LI, CodeSeg* S)
203 /* Prepare a LoadInfo struct for use */
205 /* Get the entries */
206 FinalizeLoadRegInfo (&LI->A, S);
207 FinalizeLoadRegInfo (&LI->X, S);
208 FinalizeLoadRegInfo (&LI->Y, S);
213 static void AdjustLoadInfo (LoadInfo* LI, int DelIndex, int Change)
214 /* Adjust a load info struct after deleting entry with a given index */
216 AdjustLoadRegInfo (&LI->A, DelIndex, Change);
217 AdjustLoadRegInfo (&LI->X, DelIndex, Change);
218 AdjustLoadRegInfo (&LI->Y, DelIndex, Change);
223 static void TrackLoads (LoadInfo* LI, CodeEntry* E, int I)
224 /* Track loads for a code entry */
226 if (E->Info & OF_LOAD) {
227 if (E->Chg & REG_A) {
230 if (E->Chg & REG_X) {
233 if (E->Chg & REG_Y) {
236 } else if (E->Info & OF_XFR) {
238 case OP65_TAX: LI->X.LoadIndex = LI->A.LoadIndex; break;
239 case OP65_TAY: LI->Y.LoadIndex = LI->A.LoadIndex; break;
240 case OP65_TXA: LI->A.LoadIndex = LI->X.LoadIndex; break;
241 case OP65_TYA: LI->A.LoadIndex = LI->Y.LoadIndex; break;
244 } else if (CE_IsCallTo (E, "ldaxysp")) {
245 /* Both registers set, Y changed */
248 LI->Y.LoadIndex = -1;
250 if (E->Chg & REG_A) {
251 LI->A.LoadIndex = -1;
253 if (E->Chg & REG_X) {
254 LI->X.LoadIndex = -1;
256 if (E->Chg & REG_Y) {
257 LI->Y.LoadIndex = -1;
264 /*****************************************************************************/
266 /*****************************************************************************/
270 static void AdjustStackOffset (StackOpData* D, unsigned Offs)
271 /* Adjust the offset for all stack accesses in the range PushIndex to OpIndex.
272 * OpIndex is adjusted according to the insertions.
275 /* Walk over all entries */
276 int I = D->PushIndex + 1;
277 while (I < D->OpIndex) {
279 CodeEntry* E = CS_GetEntry (D->Code, I);
281 int NeedCorrection = 0;
282 if ((E->Use & REG_SP) != 0) {
284 /* Check for some things that should not happen */
285 CHECK (E->AM == AM65_ZP_INDY || E->RI->In.RegY >= (short) Offs);
286 CHECK (strcmp (E->Arg, "sp") == 0);
288 /* We need to correct this one */
291 } else if (CE_IsCallTo (E, "ldaxysp")) {
293 /* We need to correct this one */
298 if (NeedCorrection) {
300 /* Get the code entry before this one. If it's a LDY, adjust the
303 CodeEntry* P = CS_GetPrevEntry (D->Code, I);
304 if (P && P->OPC == OP65_LDY && CE_IsConstImm (P)) {
306 /* The Y load is just before the stack access, adjust it */
307 CE_SetNumArg (P, P->Num - Offs);
311 /* Insert a new load instruction before the stack access */
312 const char* Arg = MakeHexArg (E->RI->In.RegY - Offs);
313 CodeEntry* X = NewCodeEntry (OP65_LDY, AM65_IMM, Arg, 0, E->LI);
314 CS_InsertEntry (D->Code, X, I++);
316 /* One more inserted entries */
321 /* If we need the value of Y later, be sure to reload it */
322 if (RegYUsed (D->Code, I+1)) {
323 const char* Arg = MakeHexArg (E->RI->In.RegY);
324 CodeEntry* X = NewCodeEntry (OP65_LDY, AM65_IMM, Arg, 0, E->LI);
325 CS_InsertEntry (D->Code, X, I+1);
327 /* One more inserted entries */
330 /* Skip this instruction in the next round */
342 static void InsertEntry (StackOpData* D, CodeEntry* E, int Index)
343 /* Insert a new entry. Depending on Index, D->PushIndex and D->OpIndex will
344 * be adjusted by this function.
347 /* Insert the entry into the code segment */
348 CS_InsertEntry (D->Code, E, Index);
350 /* Adjust register loads if necessary */
351 AdjustLoadInfo (&D->Lhs, Index, 1);
352 AdjustLoadInfo (&D->Rhs, Index, 1);
354 /* Adjust the indices if necessary */
355 if (D->PushEntry && Index <= D->PushIndex) {
358 if (D->OpEntry && Index <= D->OpIndex) {
365 static void DelEntry (StackOpData* D, int Index)
366 /* Delete an entry. Depending on Index, D->PushIndex and D->OpIndex will be
367 * adjusted by this function, and PushEntry/OpEntry may get invalidated.
370 /* Delete the entry from the code segment */
371 CS_DelEntry (D->Code, Index);
373 /* Adjust register loads if necessary */
374 AdjustLoadInfo (&D->Lhs, Index, -1);
375 AdjustLoadInfo (&D->Rhs, Index, -1);
377 /* Adjust the other indices if necessary */
378 if (Index < D->PushIndex) {
380 } else if (Index == D->PushIndex) {
383 if (Index < D->OpIndex) {
385 } else if (Index == D->OpIndex) {
392 static void CheckOneDirectOp (LoadRegInfo* LI, unsigned char Offs)
393 /* Check if the given entry is a lda instruction with an addressing mode
394 * that allows us to replace it by another operation (like ora). If so, we may
395 * use this location for the or and must not save the value in the zero
399 /* Get the load entry */
400 CodeEntry* E = LI->LoadEntry;
406 /* Check the load entry */
408 /* Must check the call first since addressing mode is ABS, so second
409 * "if" will catch otherwise.
411 if (CE_IsCallTo (E, "ldaxysp")) {
412 /* Same as single loads from stack. Since we must distinguish
413 * between A and X here, the necessary offset is passed to the
414 * function as a parameter.
416 LI->Offs = (unsigned char) E->RI->In.RegY - Offs;
417 LI->Flags |= (LI_DIRECT | LI_RELOAD_Y);
418 } else if (E->AM == AM65_IMM || E->AM == AM65_ZP || E->AM == AM65_ABS) {
419 /* These insns are all ok and replaceable */
420 LI->Flags |= LI_DIRECT;
421 } else if (E->AM == AM65_ZP_INDY &&
422 RegValIsKnown (E->RI->In.RegY) &&
423 strcmp (E->Arg, "sp") == 0) {
424 /* A load from the stack with known offset is also ok, but in this
425 * case we must reload the index register later. Please note that
426 * a load indirect via other zero page locations is not ok, since
427 * these locations may change between the push and the actual
430 LI->Offs = (unsigned char) E->RI->In.RegY;
431 LI->Flags |= (LI_DIRECT | LI_RELOAD_Y);
438 static void CheckDirectOp (StackOpData* D)
439 /* Check if the given entry is a lda instruction with an addressing mode
440 * that allows us to replace it by another operation (like ora). If so, we may
441 * use this location for the or and must not save the value in the zero
445 /* Check flags for all load instructions */
446 CheckOneDirectOp (&D->Lhs.A, 1);
447 CheckOneDirectOp (&D->Lhs.X, 0);
448 CheckOneDirectOp (&D->Rhs.A, 1);
449 CheckOneDirectOp (&D->Rhs.X, 0);
454 static void ReplacePushByStore (StackOpData* D)
455 /* Replace the call to the push subroutine by a store into the zero page
456 * location (actually, the push is not replaced, because we need it for
457 * later, but the name is still ok since the push will get removed at the
458 * end of each routine).
463 /* Store the value into the zeropage instead of pushing it. Check high
464 * byte first so that the store is later in A/X order.
466 if ((D->Lhs.X.Flags & LI_DIRECT) == 0) {
467 X = NewCodeEntry (OP65_STX, AM65_ZP, D->ZPHi, 0, D->PushEntry->LI);
468 InsertEntry (D, X, D->PushIndex+1);
470 if ((D->Lhs.A.Flags & LI_DIRECT) == 0) {
471 X = NewCodeEntry (OP65_STA, AM65_ZP, D->ZPLo, 0, D->PushEntry->LI);
472 InsertEntry (D, X, D->PushIndex+1);
478 static void AddOpLow (StackOpData* D, opc_t OPC)
479 /* Add an op for the low byte of an operator. This function honours the
480 * OP_DIRECT and OP_RELOAD_Y flags and generates the necessary instructions.
481 * All code is inserted at the current insertion point.
486 if ((D->Lhs.A.Flags & LI_DIRECT) != 0) {
487 /* Op with a variable location. If the location is on the stack, we
488 * need to reload the Y register.
490 if ((D->Lhs.A.Flags & LI_RELOAD_Y) == 0) {
493 CodeEntry* LoadA = D->Lhs.A.LoadEntry;
494 X = NewCodeEntry (OPC, LoadA->AM, LoadA->Arg, 0, D->OpEntry->LI);
495 InsertEntry (D, X, D->IP++);
500 const char* Arg = MakeHexArg (D->Lhs.A.Offs);
501 X = NewCodeEntry (OP65_LDY, AM65_IMM, Arg, 0, D->OpEntry->LI);
502 InsertEntry (D, X, D->IP++);
505 X = NewCodeEntry (OPC, AM65_ZP_INDY, "sp", 0, D->OpEntry->LI);
506 InsertEntry (D, X, D->IP++);
510 /* In both cases, we can remove the load */
511 D->Lhs.A.Flags |= LI_REMOVE;
515 /* Op with temp storage */
516 X = NewCodeEntry (OPC, AM65_ZP, D->ZPLo, 0, D->OpEntry->LI);
517 InsertEntry (D, X, D->IP++);
524 static void AddOpHigh (StackOpData* D, opc_t OPC)
525 /* Add an op for the high byte of an operator. Special cases (constant values
526 * or similar) have to be checked separately, the function covers only the
527 * generic case. Code is inserted at the insertion point.
533 X = NewCodeEntry (OP65_PHA, AM65_IMP, 0, 0, D->OpEntry->LI);
534 InsertEntry (D, X, D->IP++);
537 X = NewCodeEntry (OP65_TXA, AM65_IMP, 0, 0, D->OpEntry->LI);
538 InsertEntry (D, X, D->IP++);
540 if ((D->Lhs.X.Flags & LI_DIRECT) != 0) {
542 if ((D->Lhs.X.Flags & LI_RELOAD_Y) == 0) {
545 CodeEntry* LoadX = D->Lhs.X.LoadEntry;
546 X = NewCodeEntry (OPC, LoadX->AM, LoadX->Arg, 0, D->OpEntry->LI);
547 InsertEntry (D, X, D->IP++);
552 const char* Arg = MakeHexArg (D->Lhs.X.Offs);
553 X = NewCodeEntry (OP65_LDY, AM65_IMM, Arg, 0, D->OpEntry->LI);
554 InsertEntry (D, X, D->IP++);
557 X = NewCodeEntry (OPC, AM65_ZP_INDY, "sp", 0, D->OpEntry->LI);
558 InsertEntry (D, X, D->IP++);
561 /* In both cases, we can remove the load */
562 D->Lhs.X.Flags |= LI_REMOVE;
566 X = NewCodeEntry (OPC, AM65_ZP, D->ZPHi, 0, D->OpEntry->LI);
567 InsertEntry (D, X, D->IP++);
571 X = NewCodeEntry (OP65_TAX, AM65_IMP, 0, 0, D->OpEntry->LI);
572 InsertEntry (D, X, D->IP++);
575 X = NewCodeEntry (OP65_PLA, AM65_IMP, 0, 0, D->OpEntry->LI);
576 InsertEntry (D, X, D->IP++);
581 static void RemoveRegLoads (StackOpData* D, LoadInfo* LI)
582 /* Remove register load insns */
584 /* Both registers may be loaded with one insn, but DelEntry will in this
585 * case clear the other one.
587 if (LI->A.LoadIndex >= 0 && (LI->A.Flags & LI_REMOVE)) {
588 DelEntry (D, LI->A.LoadIndex);
590 if (LI->X.LoadIndex >= 0 && (LI->X.Flags & LI_REMOVE)) {
591 DelEntry (D, LI->X.LoadIndex);
597 static void RemoveRemainders (StackOpData* D)
598 /* Remove the code that is unnecessary after translation of the sequence */
600 /* Remove the register loads for lhs and rhs */
601 RemoveRegLoads (D, &D->Lhs);
602 RemoveRegLoads (D, &D->Rhs);
604 /* Remove the push and the operator routine */
605 DelEntry (D, D->OpIndex);
606 DelEntry (D, D->PushIndex);
611 static int IsRegVar (StackOpData* D)
612 /* If the value pushed is that of a zeropage variable, replace ZPLo and ZPHi
613 * in the given StackOpData struct by the variable and return true. Otherwise
614 * leave D untouched and return false.
617 CodeEntry* LoadA = D->Lhs.A.LoadEntry;
618 CodeEntry* LoadX = D->Lhs.X.LoadEntry;
621 /* Must have both load insns */
622 if (LoadA == 0 || LoadX == 0) {
626 /* Must be loads from zp */
627 if (LoadA->AM != AM65_ZP || LoadX->AM != AM65_ZP) {
631 /* Must be the same zp loc with high byte in X */
632 Len = strlen (LoadA->Arg);
633 if (strncmp (LoadA->Arg, LoadX->Arg, Len) != 0 ||
634 strcmp (LoadX->Arg + Len, "+1") != 0) {
638 /* Use the zero page location directly */
639 D->ZPLo = LoadA->Arg;
640 D->ZPHi = LoadX->Arg;
646 /*****************************************************************************/
647 /* Actual optimization functions */
648 /*****************************************************************************/
652 static unsigned Opt___bzero (StackOpData* D)
653 /* Optimize the __bzero sequence if possible */
659 /* Check if we're using a register variable */
661 /* Store the value into the zeropage instead of pushing it */
662 ReplacePushByStore (D);
665 /* If the return value of __bzero is used, we have to add code to reload
666 * a/x from the pointer variable.
668 if (RegAXUsed (D->Code, D->OpIndex+1)) {
669 X = NewCodeEntry (OP65_LDA, AM65_ZP, D->ZPLo, 0, D->OpEntry->LI);
670 InsertEntry (D, X, D->OpIndex+1);
671 X = NewCodeEntry (OP65_LDX, AM65_ZP, D->ZPHi, 0, D->OpEntry->LI);
672 InsertEntry (D, X, D->OpIndex+2);
675 /* X is always zero, A contains the size of the data area to zero.
676 * Note: A may be zero, in which case the operation is null op.
678 if (D->OpEntry->RI->In.RegA != 0) {
681 X = NewCodeEntry (OP65_LDA, AM65_IMM, "$00", 0, D->OpEntry->LI);
682 InsertEntry (D, X, D->OpIndex+1);
684 /* The value of A is known */
685 if (D->OpEntry->RI->In.RegA <= 0x81) {
687 /* Loop using the sign bit */
690 Arg = MakeHexArg (D->OpEntry->RI->In.RegA - 1);
691 X = NewCodeEntry (OP65_LDY, AM65_IMM, Arg, 0, D->OpEntry->LI);
692 InsertEntry (D, X, D->OpIndex+2);
695 X = NewCodeEntry (OP65_STA, AM65_ZP_INDY, D->ZPLo, 0, D->OpEntry->LI);
696 InsertEntry (D, X, D->OpIndex+3);
697 L = CS_GenLabel (D->Code, X);
700 X = NewCodeEntry (OP65_DEY, AM65_IMP, 0, 0, D->OpEntry->LI);
701 InsertEntry (D, X, D->OpIndex+4);
704 X = NewCodeEntry (OP65_BPL, AM65_BRA, L->Name, L, D->OpEntry->LI);
705 InsertEntry (D, X, D->OpIndex+5);
709 /* Loop using an explicit compare */
712 X = NewCodeEntry (OP65_LDY, AM65_IMM, "$00", 0, D->OpEntry->LI);
713 InsertEntry (D, X, D->OpIndex+2);
716 X = NewCodeEntry (OP65_STA, AM65_ZP_INDY, D->ZPLo, 0, D->OpEntry->LI);
717 InsertEntry (D, X, D->OpIndex+3);
718 L = CS_GenLabel (D->Code, X);
721 X = NewCodeEntry (OP65_INY, AM65_IMP, 0, 0, D->OpEntry->LI);
722 InsertEntry (D, X, D->OpIndex+4);
725 Arg = MakeHexArg (D->OpEntry->RI->In.RegA);
726 X = NewCodeEntry (OP65_CPY, AM65_IMM, Arg, 0, D->OpEntry->LI);
727 InsertEntry (D, X, D->OpIndex+5);
730 X = NewCodeEntry (OP65_BNE, AM65_BRA, L->Name, L, D->OpEntry->LI);
731 InsertEntry (D, X, D->OpIndex+6);
736 /* Remove the push and the call to the __bzero function */
737 RemoveRemainders (D);
739 /* We changed the sequence */
745 static unsigned Opt_staspidx (StackOpData* D)
746 /* Optimize the staspidx sequence if possible */
750 /* Check if we're using a register variable */
752 /* Store the value into the zeropage instead of pushing it */
753 ReplacePushByStore (D);
756 /* Replace the store subroutine call by a direct op */
757 X = NewCodeEntry (OP65_STA, AM65_ZP_INDY, D->ZPLo, 0, D->OpEntry->LI);
758 InsertEntry (D, X, D->OpIndex+1);
760 /* Remove the push and the call to the staspidx function */
761 RemoveRemainders (D);
763 /* We changed the sequence */
769 static unsigned Opt_staxspidx (StackOpData* D)
770 /* Optimize the staxspidx sequence if possible */
774 /* Check if we're using a register variable */
776 /* Store the value into the zeropage instead of pushing it */
777 ReplacePushByStore (D);
780 /* Inline the store */
783 X = NewCodeEntry (OP65_STA, AM65_ZP_INDY, D->ZPLo, 0, D->OpEntry->LI);
784 InsertEntry (D, X, D->OpIndex+1);
786 if (RegValIsKnown (D->OpEntry->RI->In.RegY)) {
787 /* Value of Y is known */
788 const char* Arg = MakeHexArg (D->OpEntry->RI->In.RegY + 1);
789 X = NewCodeEntry (OP65_LDY, AM65_IMM, Arg, 0, D->OpEntry->LI);
791 X = NewCodeEntry (OP65_INY, AM65_IMP, 0, 0, D->OpEntry->LI);
793 InsertEntry (D, X, D->OpIndex+2);
795 if (RegValIsKnown (D->OpEntry->RI->In.RegX)) {
796 /* Value of X is known */
797 const char* Arg = MakeHexArg (D->OpEntry->RI->In.RegX);
798 X = NewCodeEntry (OP65_LDA, AM65_IMM, Arg, 0, D->OpEntry->LI);
801 X = NewCodeEntry (OP65_TXA, AM65_IMP, 0, 0, D->OpEntry->LI);
803 InsertEntry (D, X, D->OpIndex+3);
806 X = NewCodeEntry (OP65_STA, AM65_ZP_INDY, D->ZPLo, 0, D->OpEntry->LI);
807 InsertEntry (D, X, D->OpIndex+4);
809 /* If we remove staxspidx, we must restore the Y register to what the
810 * function would return.
812 X = NewCodeEntry (OP65_LDY, AM65_IMM, "$00", 0, D->OpEntry->LI);
813 InsertEntry (D, X, D->OpIndex+5);
815 /* Remove the push and the call to the staxspidx function */
816 RemoveRemainders (D);
818 /* We changed the sequence */
824 static unsigned Opt_tosaddax (StackOpData* D)
825 /* Optimize the tosaddax sequence if possible */
830 /* We need the entry behind the add */
831 CHECK (D->NextEntry != 0);
833 /* Check if the X register is known and zero when the add is done, and
834 * if the add is followed by
837 * jsr ldauidx ; or ldaidx
839 * If this is true, the addition does actually add an offset to a pointer
840 * before it is dereferenced. Since both subroutines take an offset in Y,
841 * we can pass the offset (instead of #$00) and remove the addition
844 if (D->OpEntry->RI->In.RegX == 0 &&
845 D->NextEntry->OPC == OP65_LDY &&
846 CE_IsKnownImm (D->NextEntry, 0) &&
847 !CE_HasLabel (D->NextEntry) &&
848 (N = CS_GetNextEntry (D->Code, D->OpIndex + 1)) != 0 &&
849 (CE_IsCallTo (N, "ldauidx") ||
850 CE_IsCallTo (N, "ldaidx"))) {
852 int Signed = (strcmp (N->Arg, "ldaidx") == 0);
854 /* Store the value into the zeropage instead of pushing it */
855 ReplacePushByStore (D);
857 /* Replace the ldy by a tay. Be sure to create the new entry before
858 * deleting the ldy, since we will reference the line info from this
861 X = NewCodeEntry (OP65_TAY, AM65_IMP, 0, 0, D->NextEntry->LI);
862 DelEntry (D, D->OpIndex + 1);
863 InsertEntry (D, X, D->OpIndex + 1);
865 /* Replace the call to ldaidx/ldauidx. Since X is already zero, and
866 * the ptr is in the zero page location, we just need to load from
867 * the pointer, and fix X in case of ldaidx.
869 X = NewCodeEntry (OP65_LDA, AM65_ZP_INDY, D->ZPLo, 0, N->LI);
870 DelEntry (D, D->OpIndex + 2);
871 InsertEntry (D, X, D->OpIndex + 2);
876 /* Add sign extension - N is unused now */
877 N = CS_GetNextEntry (D->Code, D->OpIndex + 2);
879 L = CS_GenLabel (D->Code, N);
881 X = NewCodeEntry (OP65_BPL, AM65_BRA, L->Name, L, X->LI);
882 InsertEntry (D, X, D->OpIndex + 3);
884 X = NewCodeEntry (OP65_DEX, AM65_IMP, 0, 0, X->LI);
885 InsertEntry (D, X, D->OpIndex + 4);
890 /* Check the entry before the push. If it's a lda instruction with an
891 * addressing mode that allows us to replace it, we may use this
892 * location for the op and must not save the value in the zero page
897 /* Store the value into the zeropage instead of pushing it */
898 ReplacePushByStore (D);
901 D->IP = D->OpIndex+1;
904 X = NewCodeEntry (OP65_CLC, AM65_IMP, 0, 0, D->OpEntry->LI);
905 InsertEntry (D, X, D->IP++);
908 AddOpLow (D, OP65_ADC);
911 if (D->PushEntry->RI->In.RegX == 0) {
912 /* The high byte is the value in X plus the carry */
913 CodeLabel* L = CS_GenLabel (D->Code, D->NextEntry);
914 X = NewCodeEntry (OP65_BCC, AM65_BRA, L->Name, L, D->OpEntry->LI);
915 InsertEntry (D, X, D->IP++);
916 X = NewCodeEntry (OP65_INX, AM65_IMP, 0, 0, D->OpEntry->LI);
917 InsertEntry (D, X, D->IP++);
918 } else if (D->OpEntry->RI->In.RegX == 0) {
919 /* The high byte is that of the first operand plus carry */
921 if (RegValIsKnown (D->PushEntry->RI->In.RegX)) {
922 /* Value of first op high byte is known */
923 const char* Arg = MakeHexArg (D->PushEntry->RI->In.RegX);
924 X = NewCodeEntry (OP65_LDX, AM65_IMM, Arg, 0, D->OpEntry->LI);
926 /* Value of first op high byte is unknown */
927 X = NewCodeEntry (OP65_LDX, AM65_ZP, D->ZPHi, 0, D->OpEntry->LI);
929 InsertEntry (D, X, D->IP++);
932 L = CS_GenLabel (D->Code, D->NextEntry);
933 X = NewCodeEntry (OP65_BCC, AM65_BRA, L->Name, L, D->OpEntry->LI);
934 InsertEntry (D, X, D->IP++);
937 X = NewCodeEntry (OP65_INX, AM65_IMP, 0, 0, D->OpEntry->LI);
938 InsertEntry (D, X, D->IP++);
940 /* High byte is unknown */
941 AddOpHigh (D, OP65_ADC);
945 /* Remove the push and the call to the tosaddax function */
946 RemoveRemainders (D);
948 /* We changed the sequence */
954 static unsigned Opt_tosandax (StackOpData* D)
955 /* Optimize the tosandax sequence if possible */
959 /* Check the entry before the push. If it's a lda instruction with an
960 * addressing mode that allows us to replace it, we may use this
961 * location for the op and must not save the value in the zero page
966 /* Store the value into the zeropage instead of pushing it */
967 ReplacePushByStore (D);
969 /* Inline the and, low byte */
970 D->IP = D->OpIndex + 1;
971 AddOpLow (D, OP65_AND);
974 if (D->PushEntry->RI->In.RegX == 0 || D->OpEntry->RI->In.RegX == 0) {
975 /* The high byte is zero */
976 X = NewCodeEntry (OP65_LDX, AM65_IMM, "$00", 0, D->OpEntry->LI);
977 InsertEntry (D, X, D->IP++);
979 /* High byte is unknown */
980 AddOpHigh (D, OP65_AND);
983 /* Remove the push and the call to the tosandax function */
984 RemoveRemainders (D);
986 /* We changed the sequence */
992 static unsigned Opt_tosorax (StackOpData* D)
993 /* Optimize the tosorax sequence if possible */
997 /* Check the entry before the push. If it's a lda instruction with an
998 * addressing mode that allows us to replace it, we may use this
999 * location for the op and must not save the value in the zero page
1004 /* Store the value into the zeropage instead of pushing it */
1005 ReplacePushByStore (D);
1007 /* Inline the or, low byte */
1008 D->IP = D->OpIndex + 1;
1009 AddOpLow (D, OP65_ORA);
1012 if (RegValIsKnown (D->PushEntry->RI->In.RegX) &&
1013 RegValIsKnown (D->OpEntry->RI->In.RegX)) {
1014 /* Both values known, precalculate the result */
1015 unsigned char Result = D->PushEntry->RI->In.RegX | D->OpEntry->RI->In.RegX;
1016 const char* Arg = MakeHexArg (Result);
1017 X = NewCodeEntry (OP65_LDX, AM65_IMM, Arg, 0, D->OpEntry->LI);
1018 InsertEntry (D, X, D->IP++);
1019 } else if (D->PushEntry->RI->In.RegX != 0) {
1020 /* High byte is unknown */
1021 AddOpHigh (D, OP65_ORA);
1024 /* Remove the push and the call to the tosorax function */
1025 RemoveRemainders (D);
1027 /* We changed the sequence */
1033 static unsigned Opt_tossubax (StackOpData* D)
1034 /* Optimize the tossubax sequence if possible. Note: subtraction is not
1040 /* Check the load entry before the push. If it's a lda instruction with an
1041 * addressing mode that allows us to replace it, we may use this
1042 * location for the op and must not save the value in the zero page
1047 /* Inline the sbc */
1048 D->IP = D->OpIndex+1;
1051 X = NewCodeEntry (OP65_SEC, AM65_IMP, 0, 0, D->OpEntry->LI);
1052 InsertEntry (D, X, D->IP++);
1054 /* Must be true because of OP_RHS_LOAD */
1055 CHECK ((D->Rhs.A.Flags & LI_DIRECT) != 0);
1057 /* If the location is on the stack, we need to reload the Y register. */
1058 if ((D->Rhs.A.Flags & LI_RELOAD_Y) == 0) {
1061 CodeEntry* LoadA = D->Rhs.A.LoadEntry;
1062 X = NewCodeEntry (OP65_SBC, LoadA->AM, LoadA->Arg, 0, D->OpEntry->LI);
1063 InsertEntry (D, X, D->IP++);
1068 const char* Arg = MakeHexArg (D->Rhs.A.Offs);
1069 X = NewCodeEntry (OP65_LDY, AM65_IMM, Arg, 0, D->OpEntry->LI);
1070 InsertEntry (D, X, D->IP++);
1073 X = NewCodeEntry (OP65_SBC, AM65_ZP_INDY, "sp", 0, D->OpEntry->LI);
1074 InsertEntry (D, X, D->IP++);
1077 /* In both cases, we can remove the load */
1078 D->Rhs.A.Flags |= LI_REMOVE;
1081 X = NewCodeEntry (OP65_PHA, AM65_IMP, 0, 0, D->OpEntry->LI);
1082 InsertEntry (D, X, D->IP++);
1085 X = NewCodeEntry (OP65_TXA, AM65_IMP, 0, 0, D->OpEntry->LI);
1086 InsertEntry (D, X, D->IP++);
1088 /* Must be true because of OP_RHS_LOAD */
1089 CHECK ((D->Rhs.X.Flags & LI_DIRECT) != 0);
1091 /* If the location is on the stack, we need to reload the Y register. */
1092 if ((D->Rhs.X.Flags & LI_RELOAD_Y) == 0) {
1095 CodeEntry* LoadX = D->Rhs.X.LoadEntry;
1096 X = NewCodeEntry (OP65_SBC, LoadX->AM, LoadX->Arg, 0, D->OpEntry->LI);
1097 InsertEntry (D, X, D->IP++);
1102 const char* Arg = MakeHexArg (D->Rhs.X.Offs);
1103 X = NewCodeEntry (OP65_LDY, AM65_IMM, Arg, 0, D->OpEntry->LI);
1104 InsertEntry (D, X, D->IP++);
1107 X = NewCodeEntry (OP65_SBC, AM65_ZP_INDY, "sp", 0, D->OpEntry->LI);
1108 InsertEntry (D, X, D->IP++);
1111 /* In both cases, we can remove the load */
1112 D->Rhs.X.Flags |= LI_REMOVE;
1115 X = NewCodeEntry (OP65_TAX, AM65_IMP, 0, 0, D->OpEntry->LI);
1116 InsertEntry (D, X, D->IP++);
1119 X = NewCodeEntry (OP65_PLA, AM65_IMP, 0, 0, D->OpEntry->LI);
1120 InsertEntry (D, X, D->IP++);
1122 /* Remove the push and the call to the tossubax function */
1123 RemoveRemainders (D);
1125 /* We changed the sequence */
1131 static unsigned Opt_tosxorax (StackOpData* D)
1132 /* Optimize the tosxorax sequence if possible */
1136 /* Check the entry before the push. If it's a lda instruction with an
1137 * addressing mode that allows us to replace it, we may use this
1138 * location for the op and must not save the value in the zero page
1143 /* Store the value into the zeropage instead of pushing it */
1144 ReplacePushByStore (D);
1146 /* Inline the xor, low byte */
1147 D->IP = D->OpIndex + 1;
1148 AddOpLow (D, OP65_EOR);
1151 if (RegValIsKnown (D->PushEntry->RI->In.RegX) &&
1152 RegValIsKnown (D->OpEntry->RI->In.RegX)) {
1153 /* Both values known, precalculate the result */
1154 const char* Arg = MakeHexArg (D->PushEntry->RI->In.RegX ^ D->OpEntry->RI->In.RegX);
1155 X = NewCodeEntry (OP65_LDX, AM65_IMM, Arg, 0, D->OpEntry->LI);
1156 InsertEntry (D, X, D->IP++);
1157 } else if (D->PushEntry->RI->In.RegX != 0) {
1158 /* High byte is unknown */
1159 AddOpHigh (D, OP65_EOR);
1162 /* Remove the push and the call to the tosandax function */
1163 RemoveRemainders (D);
1165 /* We changed the sequence */
1171 /*****************************************************************************/
1173 /*****************************************************************************/
1177 static const OptFuncDesc FuncTable[] = {
1178 { "__bzero", Opt___bzero, REG_NONE, OP_X_ZERO | OP_A_KNOWN },
1179 { "staspidx", Opt_staspidx, REG_NONE, OP_NONE },
1180 { "staxspidx", Opt_staxspidx, REG_AX, OP_NONE },
1181 { "tosaddax", Opt_tosaddax, REG_NONE, OP_NONE },
1182 { "tosandax", Opt_tosandax, REG_NONE, OP_NONE },
1183 { "tosorax", Opt_tosorax, REG_NONE, OP_NONE },
1185 { "tossubax", Opt_tossubax, REG_NONE, OP_NONE },
1187 { "tosxorax", Opt_tosxorax, REG_NONE, OP_NONE },
1189 #define FUNC_COUNT (sizeof(FuncTable) / sizeof(FuncTable[0]))
1193 static int CmpFunc (const void* Key, const void* Func)
1194 /* Compare function for bsearch */
1196 return strcmp (Key, ((const OptFuncDesc*) Func)->Name);
1201 static const OptFuncDesc* FindFunc (const char* Name)
1202 /* Find the function with the given name. Return a pointer to the table entry
1203 * or NULL if the function was not found.
1206 return bsearch (Name, FuncTable, FUNC_COUNT, sizeof(OptFuncDesc), CmpFunc);
1211 static int CmpHarmless (const void* Key, const void* Entry)
1212 /* Compare function for bsearch */
1214 return strcmp (Key, *(const char**)Entry);
1219 static int HarmlessCall (const char* Name)
1220 /* Check if this is a call to a harmless subroutine that will not interrupt
1221 * the pushax/op sequence when encountered.
1224 static const char* Tab[] = {
1239 void* R = bsearch (Name,
1241 sizeof (Tab) / sizeof (Tab[0]),
1249 static void ResetStackOpData (StackOpData* Data)
1250 /* Reset the given data structure */
1253 Data->UsedRegs = REG_NONE;
1255 ClearLoadInfo (&Data->Lhs);
1256 ClearLoadInfo (&Data->Rhs);
1258 Data->PushIndex = -1;
1264 static int PreCondOk (StackOpData* D)
1265 /* Check if the preconditions for a call to the optimizer subfunction are
1266 * satisfied. As a side effect, this function will also choose the zero page
1270 /* Check the flags */
1271 unsigned UnusedRegs = D->OptFunc->UnusedRegs;
1272 if (UnusedRegs != REG_NONE &&
1273 (GetRegInfo (D->Code, D->OpIndex+1, UnusedRegs) & UnusedRegs) != 0) {
1274 /* Cannot optimize */
1277 if ((D->OptFunc->Flags & OP_A_KNOWN) != 0 &&
1278 RegValIsUnknown (D->OpEntry->RI->In.RegA)) {
1279 /* Cannot optimize */
1282 if ((D->OptFunc->Flags & OP_X_ZERO) != 0 &&
1283 D->OpEntry->RI->In.RegX != 0) {
1284 /* Cannot optimize */
1287 if ((D->OptFunc->Flags & OP_LHS_LOAD) != 0 &&
1288 (D->Lhs.A.LoadIndex < 0 || D->Lhs.X.LoadIndex < 0)) {
1289 /* Cannot optimize */
1292 if ((D->OptFunc->Flags & OP_RHS_LOAD) != 0 &&
1293 (D->Rhs.A.LoadIndex < 0 || D->Rhs.X.LoadIndex < 0)) {
1294 /* Cannot optimize */
1298 /* Determine the zero page locations to use */
1299 if ((D->UsedRegs & REG_PTR1) == REG_NONE) {
1302 } else if ((D->UsedRegs & REG_SREG) == REG_NONE) {
1305 } else if ((D->UsedRegs & REG_PTR2) == REG_NONE) {
1309 /* No registers available */
1313 /* Determine if we have a basic block */
1314 return CS_IsBasicBlock (D->Code, D->PushIndex, D->OpIndex);
1319 /*****************************************************************************/
1321 /*****************************************************************************/
1325 unsigned OptStackOps (CodeSeg* S)
1326 /* Optimize operations that take operands via the stack */
1328 unsigned Changes = 0; /* Number of changes in one run */
1337 } State = Initialize;
1340 /* Generate register info */
1343 /* Remember the code segment in the info struct */
1346 /* Look for a call to pushax followed by a call to some other function
1347 * that takes it's first argument on the stack, and the second argument
1348 * in the primary register.
1349 * It depends on the code between the two if we can handle/transform the
1350 * sequence, so check this code for the following list of things:
1352 * - the range must be a basic block (one entry, one exit)
1353 * - there may not be accesses to local variables with unknown
1354 * offsets (because we have to adjust these offsets).
1355 * - no subroutine calls
1358 * Since we need a zero page register later, do also check the
1359 * intermediate code for zero page use.
1362 while (I < CS_GetEntryCount (S)) {
1364 /* Get the next entry */
1365 CodeEntry* E = CS_GetEntry (S, I);
1367 /* Actions depend on state */
1371 ResetStackOpData (&Data);
1375 /* While searching, track register load insns, so we can tell
1376 * what is in a register once pushax is encountered.
1378 if (CE_IsCallTo (E, "pushax")) {
1382 /* Track load insns */
1383 TrackLoads (&Data.Lhs, E, I);
1388 /* We' found a pushax before. Search for a stack op that may
1389 * follow and in the meantime, track zeropage usage and check
1390 * for code that will disable us from translating the sequence.
1392 if (E->OPC == OP65_JSR) {
1394 /* Subroutine call: Check if this is one of the functions,
1395 * we're going to replace.
1397 Data.OptFunc = FindFunc (E->Arg);
1399 /* Remember the op index and go on */
1404 } else if (!HarmlessCall (E->Arg)) {
1405 /* A call to an unkown subroutine: We need to start
1406 * over after the last pushax. Note: This will also
1407 * happen if we encounter a call to pushax!
1413 /* Track register usage */
1414 Data.UsedRegs |= (E->Use | E->Chg);
1415 TrackLoads (&Data.Rhs, E, I);
1418 } else if (E->Info & OF_STORE && (E->Chg & REG_ZP) == 0) {
1420 /* Too dangerous - there may be a change of a variable
1421 * within the sequence.
1427 } else if ((E->Use & REG_SP) != 0 &&
1428 (E->AM != AM65_ZP_INDY ||
1429 RegValIsUnknown (E->RI->In.RegY) ||
1430 E->RI->In.RegY < 2)) {
1432 /* If we are using the stack, and we don't have "indirect Y"
1433 * addressing mode, or the value of Y is unknown, or less
1434 * than two, we cannot cope with this piece of code. Having
1435 * an unknown value of Y means that we cannot correct the
1436 * stack offset, while having an offset less than two means
1437 * that the code works with the value on stack which is to
1445 /* Other stuff: Track register usage */
1446 Data.UsedRegs |= (E->Use | E->Chg);
1447 TrackLoads (&Data.Rhs, E, I);
1452 /* Track zero page location usage beyond this point */
1453 Data.UsedRegs |= GetRegInfo (S, I, REG_SREG | REG_PTR1 | REG_PTR2);
1455 /* Finalize the load info */
1456 FinalizeLoadInfo (&Data.Lhs, S);
1457 FinalizeLoadInfo (&Data.Rhs, S);
1459 /* If the Lhs loads do load from zeropage, we have to include
1460 * them into UsedRegs registers used. The Rhs loads have already
1463 if (Data.Lhs.A.LoadEntry && Data.Lhs.A.LoadEntry->AM == AM65_ZP) {
1464 Data.UsedRegs |= Data.Lhs.A.LoadEntry->Use;
1466 if (Data.Lhs.X.LoadEntry && Data.Lhs.X.LoadEntry->AM == AM65_ZP) {
1467 Data.UsedRegs |= Data.Lhs.X.LoadEntry->Use;
1470 /* Check the preconditions. If they aren't ok, reset the insn
1471 * pointer to the pushax and start over. We will loose part of
1472 * load tracking but at least a/x has probably lost between
1473 * pushax and here and will be tracked again when restarting.
1475 if (!PreCondOk (&Data)) {
1481 /* Adjust stack offsets to account for the upcoming removal */
1482 AdjustStackOffset (&Data, 2);
1484 /* Regenerate register info, since AdjustStackOffset changed
1489 /* Prepare the remainder of the data structure. */
1490 Data.PrevEntry = CS_GetPrevEntry (S, Data.PushIndex);
1491 Data.PushEntry = CS_GetEntry (S, Data.PushIndex);
1492 Data.OpEntry = CS_GetEntry (S, Data.OpIndex);
1493 Data.NextEntry = CS_GetNextEntry (S, Data.OpIndex);
1495 /* Regenerate register info, since AdjustStackOffset changed
1500 /* Call the optimizer function */
1501 Changes += Data.OptFunc->Func (&Data);
1503 /* Regenerate register info */
1517 /* Free the register info */
1520 /* Return the number of changes made */