1 /*****************************************************************************/
5 /* Optimize operations that take operands via the stack */
9 /* (C) 2001-2009 Ullrich von Bassewitz */
10 /* Roemerstrasse 52 */
11 /* D-70794 Filderstadt */
12 /* EMail: uz@cc65.org */
15 /* This software is provided 'as-is', without any expressed or implied */
16 /* warranty. In no event will the authors be held liable for any damages */
17 /* arising from the use of this software. */
19 /* Permission is granted to anyone to use this software for any purpose, */
20 /* including commercial applications, and to alter it and redistribute it */
21 /* freely, subject to the following restrictions: */
23 /* 1. The origin of this software must not be misrepresented; you must not */
24 /* claim that you wrote the original software. If you use this software */
25 /* in a product, an acknowledgment in the product documentation would be */
26 /* appreciated but is not required. */
27 /* 2. Altered source versions must be plainly marked as such, and must not */
28 /* be misrepresented as being the original software. */
29 /* 3. This notice may not be removed or altered from any source */
32 /*****************************************************************************/
48 /*****************************************************************************/
50 /*****************************************************************************/
54 /* Flags for the functions */
56 STOP_NONE = 0x00, /* Nothing special */
57 STOP_A_UNUSED = 0x01, /* Call only if a unused later */
58 STOP_A_KNOWN = 0x02, /* Call only if A is known */
59 STOP_X_ZERO = 0x04 /* Call only if X is zero */
62 /* Structure forward decl */
63 typedef struct StackOpData StackOpData;
65 /* Structure that describes an optimizer subfunction for a specific op */
66 typedef unsigned (*OptFunc) (StackOpData* D);
67 typedef struct OptFuncDesc OptFuncDesc;
69 const char* Name; /* Name of the replaced runtime function */
70 OptFunc Func; /* Function pointer */
71 STOP_FLAGS Flags; /* Flags */
74 /* LoadData flags set by DirectOp */
75 #define LD_DIRECT 0x01 /* Direct op may be used */
76 #define LD_RELOAD_Y 0x02 /* Reload index register Y */
77 #define LD_REMOVE 0x04 /* Load may be removed */
79 /* Structure that tells us how to load the lhs values */
80 typedef struct LoadData LoadData;
82 unsigned char Flags; /* Tells us how to load */
83 unsigned char Offs; /* Stack offset if data is on stack */
86 /* Structure that holds the needed data */
88 CodeSeg* Code; /* Pointer to code segment */
89 unsigned Flags; /* Flags to remember things */
91 /* Pointer to optimizer subfunction description */
92 const OptFuncDesc* OptFunc;
94 /* ZP register usage inside the sequence */
97 /* Several indices of insns in the code segment */
98 int LoadAIndex; /* Index of load insns, -1 = invalid */
101 int PushIndex; /* Index of call to pushax in codeseg */
102 int OpIndex; /* Index of actual operation */
104 /* Pointers to insns in the code segment */
105 CodeEntry* LoadAEntry; /* Entry that loads A or NULL */
106 CodeEntry* LoadXEntry; /* Entry that loads X or NULL */
107 CodeEntry* PrevEntry; /* Entry before the call to pushax */
108 CodeEntry* PushEntry; /* Pointer to entry with call to pushax */
109 CodeEntry* OpEntry; /* Pointer to entry with op */
110 CodeEntry* NextEntry; /* Entry after the op */
112 /* Stack offsets if the lhs is loaded from stack */
117 const char* ZPLo; /* Lo byte of zero page loc to use */
118 const char* ZPHi; /* Hi byte of zero page loc to use */
119 unsigned IP; /* Insertion point used by some routines */
124 /*****************************************************************************/
126 /*****************************************************************************/
130 static void AdjustStackOffset (StackOpData* D, unsigned Offs)
131 /* Adjust the offset for all stack accesses in the range PushIndex to OpIndex.
132 * OpIndex is adjusted according to the insertions.
135 /* Walk over all entries */
136 int I = D->PushIndex + 1;
137 while (I < D->OpIndex) {
139 CodeEntry* E = CS_GetEntry (D->Code, I);
141 int NeedCorrection = 0;
142 if ((E->Use & REG_SP) != 0) {
144 /* Check for some things that should not happen */
145 CHECK (E->AM == AM65_ZP_INDY || E->RI->In.RegY >= (short) Offs);
146 CHECK (strcmp (E->Arg, "sp") == 0);
148 /* We need to correct this one */
151 } else if (CE_IsCallTo (E, "ldaxysp")) {
153 /* We need to correct this one */
158 if (NeedCorrection) {
160 /* Get the code entry before this one. If it's a LDY, adjust the
163 CodeEntry* P = CS_GetPrevEntry (D->Code, I);
164 if (P && P->OPC == OP65_LDY && CE_IsConstImm (P)) {
166 /* The Y load is just before the stack access, adjust it */
167 CE_SetNumArg (P, P->Num - Offs);
171 /* Insert a new load instruction before the stack access */
172 const char* Arg = MakeHexArg (E->RI->In.RegY - Offs);
173 CodeEntry* X = NewCodeEntry (OP65_LDY, AM65_IMM, Arg, 0, E->LI);
174 CS_InsertEntry (D->Code, X, I++);
176 /* One more inserted entries */
181 /* If we need the value of Y later, be sure to reload it */
182 if (RegYUsed (D->Code, I+1)) {
183 const char* Arg = MakeHexArg (E->RI->In.RegY);
184 CodeEntry* X = NewCodeEntry (OP65_LDY, AM65_IMM, Arg, 0, E->LI);
185 CS_InsertEntry (D->Code, X, I+1);
187 /* One more inserted entries */
190 /* Skip this instruction in the next round */
202 static void InsertEntry (StackOpData* D, CodeEntry* E, int Index)
203 /* Insert a new entry. Depending on Index, D->PushIndex and D->OpIndex will
204 * be adjusted by this function.
207 /* Insert the entry into the code segment */
208 CS_InsertEntry (D->Code, E, Index);
210 /* Adjust the indices if necessary */
211 if (D->PushEntry && Index <= D->PushIndex) {
214 if (D->OpEntry && Index <= D->OpIndex) {
221 static void DelEntry (StackOpData* D, int Index)
222 /* Delete an entry. Depending on Index, D->PushIndex and D->OpIndex will be
223 * adjusted by this function, and PushEntry/OpEntry may get invalidated.
226 /* Delete the entry from the code segment */
227 CS_DelEntry (D->Code, Index);
229 /* Adjust the indices if necessary */
230 if (Index < D->PushIndex) {
232 } else if (Index == D->PushIndex) {
235 if (Index < D->OpIndex) {
237 } else if (Index == D->OpIndex) {
244 static void CheckOneDirectOp (CodeEntry* E, LoadData* L, unsigned char Offs)
245 /* Check if the given entry is a lda instruction with an addressing mode
246 * that allows us to replace it by another operation (like ora). If so, we may
247 * use this location for the or and must not save the value in the zero
251 /* Check the load entry */
253 /* Must check the call first since addressing mode is ABS, so second
254 * "if" will catch otherwise.
256 if (CE_IsCallTo (E, "ldaxysp")) {
257 /* Same as single loads from stack. Since we must distinguish
258 * between A and X here, the necessary offset is passed to the
259 * function as a parameter.
261 L->Offs = (unsigned char) E->RI->In.RegY - Offs;
262 L->Flags |= (LD_DIRECT | LD_RELOAD_Y);
263 } else if (E->AM == AM65_IMM || E->AM == AM65_ZP || E->AM == AM65_ABS) {
264 /* These insns are all ok and replaceable */
265 L->Flags |= LD_DIRECT;
266 } else if (E->AM == AM65_ZP_INDY &&
267 RegValIsKnown (E->RI->In.RegY) &&
268 strcmp (E->Arg, "sp") == 0) {
269 /* A load from the stack with known offset is also ok, but in this
270 * case we must reload the index register later. Please note that
271 * a load indirect via other zero page locations is not ok, since
272 * these locations may change between the push and the actual
275 L->Offs = (unsigned char) E->RI->In.RegY;
276 L->Flags |= (LD_DIRECT | LD_RELOAD_Y);
283 static void CheckDirectOp (StackOpData* D)
284 /* Check if the given entry is a lda instruction with an addressing mode
285 * that allows us to replace it by another operation (like ora). If so, we may
286 * use this location for the or and must not save the value in the zero
290 /* Check flags for A and X load instructions */
291 CheckOneDirectOp (D->LoadAEntry, &D->AData, 1);
292 CheckOneDirectOp (D->LoadXEntry, &D->XData, 0);
297 static void ReplacePushByStore (StackOpData* D)
298 /* Replace the call to the push subroutine by a store into the zero page
299 * location (actually, the push is not replaced, because we need it for
300 * later, but the name is still ok since the push will get removed at the
301 * end of each routine).
306 /* Store the value into the zeropage instead of pushing it. Check high
307 * byte first so that the store is later in A/X order.
309 if ((D->XData.Flags & LD_DIRECT) == 0) {
310 X = NewCodeEntry (OP65_STX, AM65_ZP, D->ZPHi, 0, D->PushEntry->LI);
311 InsertEntry (D, X, D->PushIndex+1);
313 if ((D->AData.Flags & LD_DIRECT) == 0) {
314 X = NewCodeEntry (OP65_STA, AM65_ZP, D->ZPLo, 0, D->PushEntry->LI);
315 InsertEntry (D, X, D->PushIndex+1);
321 static void AddOpLow (StackOpData* D, opc_t OPC)
322 /* Add an op for the low byte of an operator. This function honours the
323 * OP_DIRECT and OP_RELOAD_Y flags and generates the necessary instructions.
324 * All code is inserted at the current insertion point.
329 if ((D->AData.Flags & LD_DIRECT) != 0) {
330 /* Op with a variable location. If the location is on the stack, we
331 * need to reload the Y register.
333 if ((D->AData.Flags & LD_RELOAD_Y) == 0) {
336 CodeEntry* LoadA = D->LoadAEntry;
337 X = NewCodeEntry (OPC, LoadA->AM, LoadA->Arg, 0, D->OpEntry->LI);
338 InsertEntry (D, X, D->IP++);
343 const char* Arg = MakeHexArg (D->AData.Offs);
344 X = NewCodeEntry (OP65_LDY, AM65_IMM, Arg, 0, D->OpEntry->LI);
345 InsertEntry (D, X, D->IP++);
348 X = NewCodeEntry (OPC, AM65_ZP_INDY, "sp", 0, D->OpEntry->LI);
349 InsertEntry (D, X, D->IP++);
353 /* In both cases, we can remove the load */
354 D->AData.Flags |= LD_REMOVE;
358 /* Op with temp storage */
359 X = NewCodeEntry (OPC, AM65_ZP, D->ZPLo, 0, D->OpEntry->LI);
360 InsertEntry (D, X, D->IP++);
367 static void AddOpHigh (StackOpData* D, opc_t OPC)
368 /* Add an op for the high byte of an operator. Special cases (constant values
369 * or similar) have to be checked separately, the function covers only the
370 * generic case. Code is inserted at the insertion point.
376 X = NewCodeEntry (OP65_PHA, AM65_IMP, 0, 0, D->OpEntry->LI);
377 InsertEntry (D, X, D->IP++);
380 X = NewCodeEntry (OP65_TXA, AM65_IMP, 0, 0, D->OpEntry->LI);
381 InsertEntry (D, X, D->IP++);
383 if ((D->XData.Flags & LD_DIRECT) != 0) {
385 if ((D->XData.Flags & LD_RELOAD_Y) == 0) {
388 CodeEntry* LoadX = D->LoadXEntry;
389 X = NewCodeEntry (OPC, LoadX->AM, LoadX->Arg, 0, D->OpEntry->LI);
390 InsertEntry (D, X, D->IP++);
395 const char* Arg = MakeHexArg (D->XData.Offs);
396 X = NewCodeEntry (OP65_LDY, AM65_IMM, Arg, 0, D->OpEntry->LI);
397 InsertEntry (D, X, D->IP++);
400 X = NewCodeEntry (OPC, AM65_ZP_INDY, "sp", 0, D->OpEntry->LI);
401 InsertEntry (D, X, D->IP++);
404 /* In both cases, we can remove the load */
405 D->XData.Flags |= LD_REMOVE;
409 X = NewCodeEntry (OPC, AM65_ZP, D->ZPHi, 0, D->OpEntry->LI);
410 InsertEntry (D, X, D->IP++);
414 X = NewCodeEntry (OP65_TAX, AM65_IMP, 0, 0, D->OpEntry->LI);
415 InsertEntry (D, X, D->IP++);
418 X = NewCodeEntry (OP65_PLA, AM65_IMP, 0, 0, D->OpEntry->LI);
419 InsertEntry (D, X, D->IP++);
424 static void RemoveRemainders (StackOpData* D)
425 /* Remove the code that is unnecessary after translation of the sequence */
427 /* Remove the push and the operator routine */
428 DelEntry (D, D->OpIndex);
429 DelEntry (D, D->PushIndex);
431 /* Remove the register loads before the push. Beware: There may only be
434 if (D->LoadAIndex >= 0 && D->LoadAIndex == D->LoadXIndex) {
435 /* Common load routine */
436 if ((D->AData.Flags & D->XData.Flags) & LD_REMOVE) {
437 /* Both say: remove */
438 DelEntry (D, D->LoadAIndex);
440 } else if (D->LoadAIndex >= 0 && (D->AData.Flags & LD_REMOVE)) {
441 DelEntry (D, D->LoadAIndex);
442 } else if (D->LoadXIndex >= 0 && (D->XData.Flags & LD_REMOVE)) {
443 DelEntry (D, D->LoadXIndex);
449 static int IsRegVar (StackOpData* D)
450 /* If the value pushed is that of a zeropage variable, replace ZPLo and ZPHi
451 * in the given StackOpData struct by the variable and return true. Otherwise
452 * leave D untouched and return false.
455 CodeEntry* LoadA = D->LoadAEntry;
456 CodeEntry* LoadX = D->LoadXEntry;
459 /* Must have both load insns */
460 if (LoadA == 0 || LoadX == 0) {
464 /* Must be loads from zp */
465 if (LoadA->AM != AM65_ZP || LoadX->AM != AM65_ZP) {
469 /* Must be the same zp loc with high byte in X */
470 Len = strlen (LoadA->Arg);
471 if (strncmp (LoadA->Arg, LoadX->Arg, Len) != 0 ||
472 strcmp (LoadX->Arg + Len, "+1") != 0) {
476 /* Use the zero page location directly */
477 D->ZPLo = LoadA->Arg;
478 D->ZPHi = LoadX->Arg;
484 /*****************************************************************************/
485 /* Actual optimization functions */
486 /*****************************************************************************/
490 static unsigned Opt___bzero (StackOpData* D)
491 /* Optimize the __bzero sequence if possible */
497 /* Check if we're using a register variable */
499 /* Store the value into the zeropage instead of pushing it */
500 ReplacePushByStore (D);
503 /* If the return value of __bzero is used, we have to add code to reload
504 * a/x from the pointer variable.
506 if (RegAXUsed (D->Code, D->OpIndex+1)) {
507 X = NewCodeEntry (OP65_LDA, AM65_ZP, D->ZPLo, 0, D->OpEntry->LI);
508 InsertEntry (D, X, D->OpIndex+1);
509 X = NewCodeEntry (OP65_LDX, AM65_ZP, D->ZPHi, 0, D->OpEntry->LI);
510 InsertEntry (D, X, D->OpIndex+2);
513 /* X is always zero, A contains the size of the data area to zero.
514 * Note: A may be zero, in which case the operation is null op.
516 if (D->OpEntry->RI->In.RegA != 0) {
519 X = NewCodeEntry (OP65_LDA, AM65_IMM, "$00", 0, D->OpEntry->LI);
520 InsertEntry (D, X, D->OpIndex+1);
522 /* The value of A is known */
523 if (D->OpEntry->RI->In.RegA <= 0x81) {
525 /* Loop using the sign bit */
528 Arg = MakeHexArg (D->OpEntry->RI->In.RegA - 1);
529 X = NewCodeEntry (OP65_LDY, AM65_IMM, Arg, 0, D->OpEntry->LI);
530 InsertEntry (D, X, D->OpIndex+2);
533 X = NewCodeEntry (OP65_STA, AM65_ZP_INDY, D->ZPLo, 0, D->OpEntry->LI);
534 InsertEntry (D, X, D->OpIndex+3);
535 L = CS_GenLabel (D->Code, X);
538 X = NewCodeEntry (OP65_DEY, AM65_IMP, 0, 0, D->OpEntry->LI);
539 InsertEntry (D, X, D->OpIndex+4);
542 X = NewCodeEntry (OP65_BPL, AM65_BRA, L->Name, L, D->OpEntry->LI);
543 InsertEntry (D, X, D->OpIndex+5);
547 /* Loop using an explicit compare */
550 X = NewCodeEntry (OP65_LDY, AM65_IMM, "$00", 0, D->OpEntry->LI);
551 InsertEntry (D, X, D->OpIndex+2);
554 X = NewCodeEntry (OP65_STA, AM65_ZP_INDY, D->ZPLo, 0, D->OpEntry->LI);
555 InsertEntry (D, X, D->OpIndex+3);
556 L = CS_GenLabel (D->Code, X);
559 X = NewCodeEntry (OP65_INY, AM65_IMP, 0, 0, D->OpEntry->LI);
560 InsertEntry (D, X, D->OpIndex+4);
563 Arg = MakeHexArg (D->OpEntry->RI->In.RegA);
564 X = NewCodeEntry (OP65_CPY, AM65_IMM, Arg, 0, D->OpEntry->LI);
565 InsertEntry (D, X, D->OpIndex+5);
568 X = NewCodeEntry (OP65_BNE, AM65_BRA, L->Name, L, D->OpEntry->LI);
569 InsertEntry (D, X, D->OpIndex+6);
574 /* Remove the push and the call to the __bzero function */
575 RemoveRemainders (D);
577 /* We changed the sequence */
583 static unsigned Opt_staspidx (StackOpData* D)
584 /* Optimize the staspidx sequence if possible */
588 /* Check if we're using a register variable */
590 /* Store the value into the zeropage instead of pushing it */
591 ReplacePushByStore (D);
594 /* Replace the store subroutine call by a direct op */
595 X = NewCodeEntry (OP65_STA, AM65_ZP_INDY, D->ZPLo, 0, D->OpEntry->LI);
596 InsertEntry (D, X, D->OpIndex+1);
598 /* Remove the push and the call to the staspidx function */
599 RemoveRemainders (D);
601 /* We changed the sequence */
607 static unsigned Opt_staxspidx (StackOpData* D)
608 /* Optimize the staxspidx sequence if possible */
612 /* Check if we're using a register variable */
614 /* Store the value into the zeropage instead of pushing it */
615 ReplacePushByStore (D);
618 /* Inline the store */
621 X = NewCodeEntry (OP65_STA, AM65_ZP_INDY, D->ZPLo, 0, D->OpEntry->LI);
622 InsertEntry (D, X, D->OpIndex+1);
624 if (RegValIsKnown (D->OpEntry->RI->In.RegY)) {
625 /* Value of Y is known */
626 const char* Arg = MakeHexArg (D->OpEntry->RI->In.RegY + 1);
627 X = NewCodeEntry (OP65_LDY, AM65_IMM, Arg, 0, D->OpEntry->LI);
629 X = NewCodeEntry (OP65_INY, AM65_IMP, 0, 0, D->OpEntry->LI);
631 InsertEntry (D, X, D->OpIndex+2);
633 if (RegValIsKnown (D->OpEntry->RI->In.RegX)) {
634 /* Value of X is known */
635 const char* Arg = MakeHexArg (D->OpEntry->RI->In.RegX);
636 X = NewCodeEntry (OP65_LDA, AM65_IMM, Arg, 0, D->OpEntry->LI);
639 X = NewCodeEntry (OP65_TXA, AM65_IMP, 0, 0, D->OpEntry->LI);
641 InsertEntry (D, X, D->OpIndex+3);
644 X = NewCodeEntry (OP65_STA, AM65_ZP_INDY, D->ZPLo, 0, D->OpEntry->LI);
645 InsertEntry (D, X, D->OpIndex+4);
647 /* If we remove staspidx, we must restore the Y register to what the
648 * function would return.
650 X = NewCodeEntry (OP65_LDY, AM65_IMM, "$00", 0, D->OpEntry->LI);
651 InsertEntry (D, X, D->OpIndex+5);
653 /* Remove the push and the call to the staxspidx function */
654 RemoveRemainders (D);
656 /* We changed the sequence */
662 static unsigned Opt_tosaddax (StackOpData* D)
663 /* Optimize the tosaddax sequence if possible */
668 /* We need the entry behind the add */
669 CHECK (D->NextEntry != 0);
671 /* Check if the X register is known and zero when the add is done, and
672 * if the add is followed by
675 * jsr ldauidx ; or ldaidx
677 * If this is true, the addition does actually add an offset to a pointer
678 * before it is dereferenced. Since both subroutines take an offset in Y,
679 * we can pass the offset (instead of #$00) and remove the addition
682 if (D->OpEntry->RI->In.RegX == 0 &&
683 D->NextEntry->OPC == OP65_LDY &&
684 CE_IsKnownImm (D->NextEntry, 0) &&
685 !CE_HasLabel (D->NextEntry) &&
686 (N = CS_GetNextEntry (D->Code, D->OpIndex + 1)) != 0 &&
687 (CE_IsCallTo (N, "ldauidx") ||
688 CE_IsCallTo (N, "ldaidx"))) {
690 int Signed = (strcmp (N->Arg, "ldaidx") == 0);
692 /* Store the value into the zeropage instead of pushing it */
693 ReplacePushByStore (D);
695 /* Replace the ldy by a tay. Be sure to create the new entry before
696 * deleting the ldy, since we will reference the line info from this
699 X = NewCodeEntry (OP65_TAY, AM65_IMP, 0, 0, D->NextEntry->LI);
700 DelEntry (D, D->OpIndex + 1);
701 InsertEntry (D, X, D->OpIndex + 1);
703 /* Replace the call to ldaidx/ldauidx. Since X is already zero, and
704 * the ptr is in the zero page location, we just need to load from
705 * the pointer, and fix X in case of ldaidx.
707 X = NewCodeEntry (OP65_LDA, AM65_ZP_INDY, D->ZPLo, 0, N->LI);
708 DelEntry (D, D->OpIndex + 2);
709 InsertEntry (D, X, D->OpIndex + 2);
714 /* Add sign extension - N is unused now */
715 N = CS_GetNextEntry (D->Code, D->OpIndex + 2);
717 L = CS_GenLabel (D->Code, N);
719 X = NewCodeEntry (OP65_BPL, AM65_BRA, L->Name, L, X->LI);
720 InsertEntry (D, X, D->OpIndex + 3);
722 X = NewCodeEntry (OP65_DEX, AM65_IMP, 0, 0, X->LI);
723 InsertEntry (D, X, D->OpIndex + 4);
728 /* Check the entry before the push. If it's a lda instruction with an
729 * addressing mode that allows us to replace it, we may use this
730 * location for the op and must not save the value in the zero page
735 /* Store the value into the zeropage instead of pushing it */
736 ReplacePushByStore (D);
739 D->IP = D->OpIndex+1;
742 X = NewCodeEntry (OP65_CLC, AM65_IMP, 0, 0, D->OpEntry->LI);
743 InsertEntry (D, X, D->IP++);
746 AddOpLow (D, OP65_ADC);
749 if (D->PushEntry->RI->In.RegX == 0) {
750 /* The high byte is the value in X plus the carry */
751 CodeLabel* L = CS_GenLabel (D->Code, D->NextEntry);
752 X = NewCodeEntry (OP65_BCC, AM65_BRA, L->Name, L, D->OpEntry->LI);
753 InsertEntry (D, X, D->IP++);
754 X = NewCodeEntry (OP65_INX, AM65_IMP, 0, 0, D->OpEntry->LI);
755 InsertEntry (D, X, D->IP++);
756 } else if (D->OpEntry->RI->In.RegX == 0) {
757 /* The high byte is that of the first operand plus carry */
759 if (RegValIsKnown (D->PushEntry->RI->In.RegX)) {
760 /* Value of first op high byte is known */
761 const char* Arg = MakeHexArg (D->PushEntry->RI->In.RegX);
762 X = NewCodeEntry (OP65_LDX, AM65_IMM, Arg, 0, D->OpEntry->LI);
764 /* Value of first op high byte is unknown */
765 X = NewCodeEntry (OP65_LDX, AM65_ZP, D->ZPHi, 0, D->OpEntry->LI);
767 InsertEntry (D, X, D->IP++);
770 L = CS_GenLabel (D->Code, D->NextEntry);
771 X = NewCodeEntry (OP65_BCC, AM65_BRA, L->Name, L, D->OpEntry->LI);
772 InsertEntry (D, X, D->IP++);
775 X = NewCodeEntry (OP65_INX, AM65_IMP, 0, 0, D->OpEntry->LI);
776 InsertEntry (D, X, D->IP++);
778 /* High byte is unknown */
779 AddOpHigh (D, OP65_ADC);
783 /* Remove the push and the call to the tosaddax function */
784 RemoveRemainders (D);
786 /* We changed the sequence */
792 static unsigned Opt_tosandax (StackOpData* D)
793 /* Optimize the tosandax sequence if possible */
797 /* Check the entry before the push. If it's a lda instruction with an
798 * addressing mode that allows us to replace it, we may use this
799 * location for the op and must not save the value in the zero page
804 /* Store the value into the zeropage instead of pushing it */
805 ReplacePushByStore (D);
807 /* Inline the and, low byte */
808 D->IP = D->OpIndex + 1;
809 AddOpLow (D, OP65_AND);
812 if (D->PushEntry->RI->In.RegX == 0 || D->OpEntry->RI->In.RegX == 0) {
813 /* The high byte is zero */
814 X = NewCodeEntry (OP65_LDX, AM65_IMM, "$00", 0, D->OpEntry->LI);
815 InsertEntry (D, X, D->IP++);
817 /* High byte is unknown */
818 AddOpHigh (D, OP65_AND);
821 /* Remove the push and the call to the tosandax function */
822 RemoveRemainders (D);
824 /* We changed the sequence */
830 static unsigned Opt_tosorax (StackOpData* D)
831 /* Optimize the tosorax sequence if possible */
835 /* Check the entry before the push. If it's a lda instruction with an
836 * addressing mode that allows us to replace it, we may use this
837 * location for the op and must not save the value in the zero page
842 /* Store the value into the zeropage instead of pushing it */
843 ReplacePushByStore (D);
845 /* Inline the or, low byte */
846 D->IP = D->OpIndex + 1;
847 AddOpLow (D, OP65_ORA);
850 if (RegValIsKnown (D->PushEntry->RI->In.RegX) &&
851 RegValIsKnown (D->OpEntry->RI->In.RegX)) {
852 /* Both values known, precalculate the result */
853 unsigned char Result = D->PushEntry->RI->In.RegX | D->OpEntry->RI->In.RegX;
854 const char* Arg = MakeHexArg (Result);
855 X = NewCodeEntry (OP65_LDX, AM65_IMM, Arg, 0, D->OpEntry->LI);
856 InsertEntry (D, X, D->IP++);
857 } else if (D->PushEntry->RI->In.RegX != 0) {
858 /* High byte is unknown */
859 AddOpHigh (D, OP65_ORA);
862 /* Remove the push and the call to the tosorax function */
863 RemoveRemainders (D);
865 /* We changed the sequence */
871 static unsigned Opt_tossubax (StackOpData* D)
872 /* Optimize the tossubax sequence if possible */
876 /* Check the load entry before the push. If it's a lda instruction with an
877 * addressing mode that allows us to replace it, we may use this
878 * location for the op and must not save the value in the zero page
883 /* Store the value into the zeropage instead of pushing it */
884 ReplacePushByStore (D);
887 D->IP = D->OpIndex+1;
890 X = NewCodeEntry (OP65_SEC, AM65_IMP, 0, 0, D->OpEntry->LI);
891 InsertEntry (D, X, D->IP++);
894 AddOpLow (D, OP65_SBC);
897 AddOpHigh (D, OP65_SBC);
899 /* Remove the push and the call to the tosaddax function */
900 RemoveRemainders (D);
902 /* We changed the sequence */
908 static unsigned Opt_tosxorax (StackOpData* D)
909 /* Optimize the tosxorax sequence if possible */
913 /* Check the entry before the push. If it's a lda instruction with an
914 * addressing mode that allows us to replace it, we may use this
915 * location for the op and must not save the value in the zero page
920 /* Store the value into the zeropage instead of pushing it */
921 ReplacePushByStore (D);
923 /* Inline the xor, low byte */
924 D->IP = D->OpIndex + 1;
925 AddOpLow (D, OP65_EOR);
928 if (RegValIsKnown (D->PushEntry->RI->In.RegX) &&
929 RegValIsKnown (D->OpEntry->RI->In.RegX)) {
930 /* Both values known, precalculate the result */
931 const char* Arg = MakeHexArg (D->PushEntry->RI->In.RegX ^ D->OpEntry->RI->In.RegX);
932 X = NewCodeEntry (OP65_LDX, AM65_IMM, Arg, 0, D->OpEntry->LI);
933 InsertEntry (D, X, D->IP++);
934 } else if (D->PushEntry->RI->In.RegX != 0) {
935 /* High byte is unknown */
936 AddOpHigh (D, OP65_EOR);
939 /* Remove the push and the call to the tosandax function */
940 RemoveRemainders (D);
942 /* We changed the sequence */
948 /*****************************************************************************/
950 /*****************************************************************************/
954 static const OptFuncDesc FuncTable[] = {
955 { "__bzero", Opt___bzero, STOP_X_ZERO | STOP_A_KNOWN },
956 { "staspidx", Opt_staspidx, STOP_NONE },
957 { "staxspidx", Opt_staxspidx, STOP_A_UNUSED },
958 { "tosaddax", Opt_tosaddax, STOP_NONE },
959 { "tosandax", Opt_tosandax, STOP_NONE },
960 { "tosorax", Opt_tosorax, STOP_NONE },
961 { "tossubax", Opt_tossubax, STOP_NONE },
962 { "tosxorax", Opt_tosxorax, STOP_NONE },
964 #define FUNC_COUNT (sizeof(FuncTable) / sizeof(FuncTable[0]))
968 static int CmpFunc (const void* Key, const void* Func)
969 /* Compare function for bsearch */
971 return strcmp (Key, ((const OptFuncDesc*) Func)->Name);
976 static const OptFuncDesc* FindFunc (const char* Name)
977 /* Find the function with the given name. Return a pointer to the table entry
978 * or NULL if the function was not found.
981 return bsearch (Name, FuncTable, FUNC_COUNT, sizeof(OptFuncDesc), CmpFunc);
986 static int CmpHarmless (const void* Key, const void* Entry)
987 /* Compare function for bsearch */
989 return strcmp (Key, *(const char**)Entry);
994 static int HarmlessCall (const char* Name)
995 /* Check if this is a call to a harmless subroutine that will not interrupt
996 * the pushax/op sequence when encountered.
999 static const char* Tab[] = {
1013 void* R = bsearch (Name,
1015 sizeof (Tab) / sizeof (Tab[0]),
1023 static void ResetStackOpData (StackOpData* Data)
1024 /* Reset the given data structure */
1026 Data->AData.Flags = 0;
1027 Data->XData.Flags = 0;
1030 Data->LoadAIndex = -1;
1031 Data->LoadXIndex = -1;
1032 Data->LoadYIndex = -1;
1033 Data->PushIndex = -1;
1036 Data->LoadAEntry = 0;
1037 Data->LoadXEntry = 0;
1039 Data->UsedRegs = REG_NONE;
1044 static int PreCondOk (StackOpData* D)
1045 /* Check if the preconditions for a call to the optimizer subfunction are
1046 * satisfied. As a side effect, this function will also choose the zero page
1050 /* Check the flags */
1051 if ((D->OptFunc->Flags & STOP_A_UNUSED) != 0 &&
1052 RegAUsed (D->Code, D->OpIndex+1)) {
1053 /* Cannot optimize */
1055 } else if ((D->OptFunc->Flags & STOP_A_KNOWN) != 0 &&
1056 RegValIsUnknown (D->OpEntry->RI->In.RegA)) {
1057 /* Cannot optimize */
1059 } else if ((D->OptFunc->Flags & STOP_X_ZERO) != 0 &&
1060 D->OpEntry->RI->In.RegX != 0) {
1061 /* Cannot optimize */
1065 /* Determine the zero page locations to use */
1066 if ((D->UsedRegs & REG_PTR1) == REG_NONE) {
1069 } else if ((D->UsedRegs & REG_SREG) == REG_NONE) {
1072 } else if ((D->UsedRegs & REG_PTR1) == REG_NONE) {
1075 } else if ((D->UsedRegs & REG_PTR2) == REG_NONE) {
1079 /* No registers available */
1083 /* Determine if we have a basic block */
1084 return CS_IsBasicBlock (D->Code, D->PushIndex, D->OpIndex);
1089 /*****************************************************************************/
1091 /*****************************************************************************/
1095 unsigned OptStackOps (CodeSeg* S)
1096 /* Optimize operations that take operands via the stack */
1098 unsigned Changes = 0; /* Number of changes in one run */
1106 } State = Searching;
1109 /* Generate register info */
1114 ResetStackOpData (&Data);
1116 /* Look for a call to pushax followed by a call to some other function
1117 * that takes it's first argument on the stack, and the second argument
1118 * in the primary register.
1119 * It depends on the code between the two if we can handle/transform the
1120 * sequence, so check this code for the following list of things:
1122 * - the range must be a basic block (one entry, one exit)
1123 * - there may not be accesses to local variables with unknown
1124 * offsets (because we have to adjust these offsets).
1125 * - no subroutine calls
1128 * Since we need a zero page register later, do also check the
1129 * intermediate code for zero page use.
1132 while (I < CS_GetEntryCount (S)) {
1134 /* Get the next entry */
1135 CodeEntry* E = CS_GetEntry (S, I);
1137 /* Actions depend on state */
1141 /* While searching, track register load insns, so we can tell
1142 * what is in a register once pushax is encountered.
1144 if (CE_IsCallTo (E, "pushax")) {
1147 } else if (E->Info & OF_LOAD) {
1148 if (E->Chg & REG_A) {
1149 Data.LoadAIndex = I;
1151 if (E->Chg & REG_X) {
1152 Data.LoadXIndex = I;
1154 if (E->Chg & REG_Y) {
1155 Data.LoadYIndex = I;
1157 } else if (E->Info & OF_XFR) {
1159 case OP65_TAX: Data.LoadXIndex = Data.LoadAIndex; break;
1160 case OP65_TAY: Data.LoadYIndex = Data.LoadAIndex; break;
1161 case OP65_TXA: Data.LoadAIndex = Data.LoadXIndex; break;
1162 case OP65_TYA: Data.LoadAIndex = Data.LoadYIndex; break;
1165 } else if (CE_IsCallTo (E, "ldaxysp")) {
1166 /* Both registers set */
1167 Data.LoadAIndex = I;
1168 Data.LoadXIndex = I;
1170 if (E->Chg & REG_A) {
1171 Data.LoadAIndex = -1;
1173 if (E->Chg & REG_X) {
1174 Data.LoadXIndex = -1;
1176 if (E->Chg & REG_Y) {
1177 Data.LoadYIndex = -1;
1183 /* We' found a pushax before. Search for a stack op that may
1184 * follow and in the meantime, track zeropage usage and check
1185 * for code that will disable us from translating the sequence.
1187 if (E->OPC == OP65_JSR) {
1189 /* Subroutine call: Check if this is one of the functions,
1190 * we're going to replace.
1192 Data.OptFunc = FindFunc (E->Arg);
1194 /* Remember the op index and go on */
1199 } else if (HarmlessCall (E->Arg)) {
1200 /* Track zeropage register usage */
1201 Data.UsedRegs |= (E->Use | E->Chg);
1203 /* A call to an unkown subroutine: We need to start
1204 * over after the last pushax. Note: This will also
1205 * happen if we encounter a call to pushax!
1208 ResetStackOpData (&Data);
1213 } else if ((E->Use & REG_SP) != 0 &&
1214 (E->AM != AM65_ZP_INDY || RegValIsUnknown (E->RI->In.RegY) ||
1215 E->RI->In.RegY < 2)) {
1217 /* If we are using the stack, and we don't have "indirect Y"
1218 * addressing mode, or the value of Y is unknown, or less
1219 * than two, we cannot cope with this piece of code. Having
1220 * an unknown value of Y means that we cannot correct the
1221 * stack offset, while having an offset less than two means
1222 * that the code works with the value on stack which is to
1226 ResetStackOpData (&Data);
1231 /* Other stuff: Track zeropage register usage */
1232 Data.UsedRegs |= (E->Use | E->Chg);
1237 /* Track zero page location usage beyond this point */
1238 Data.UsedRegs |= GetRegInfo (S, I, REG_SREG | REG_PTR1 | REG_PTR2);
1240 /* Check the preconditions. If they aren't ok, reset the insn
1241 * pointer to the pushax and start over. We will loose part of
1242 * load tracking but at least a/x has probably lost between
1243 * pushax and here and will be tracked again when restarting.
1245 if (!PreCondOk (&Data)) {
1247 ResetStackOpData (&Data);
1252 /* Preconditions are ok, so call the optimizer function */
1254 /* Adjust stack offsets to account for the upcoming removal */
1255 AdjustStackOffset (&Data, 2);
1257 /* Prepare the remainder of the data structure */
1258 if (Data.LoadAIndex >= 0) {
1259 Data.LoadAEntry = CS_GetEntry (S, Data.LoadAIndex);
1261 if (Data.LoadXIndex >= 0) {
1262 Data.LoadXEntry = CS_GetEntry (S, Data.LoadXIndex);
1264 Data.PrevEntry = CS_GetPrevEntry (S, Data.PushIndex);
1265 Data.PushEntry = CS_GetEntry (S, Data.PushIndex);
1266 Data.OpEntry = CS_GetEntry (S, Data.OpIndex);
1267 Data.NextEntry = CS_GetNextEntry (S, Data.OpIndex);
1269 /* Call the optimizer function */
1270 Changes += Data.OptFunc->Func (&Data);
1272 /* Regenerate register info */
1276 ResetStackOpData (&Data);
1287 /* Free the register info */
1290 /* Return the number of changes made */