1 /*****************************************************************************/
5 /* Optimize operations that take operands via the stack */
9 /* (C) 2001-2003 Ullrich von Bassewitz */
10 /* Römerstrasse 52 */
11 /* D-70794 Filderstadt */
12 /* EMail: uz@cc65.org */
15 /* This software is provided 'as-is', without any expressed or implied */
16 /* warranty. In no event will the authors be held liable for any damages */
17 /* arising from the use of this software. */
19 /* Permission is granted to anyone to use this software for any purpose, */
20 /* including commercial applications, and to alter it and redistribute it */
21 /* freely, subject to the following restrictions: */
23 /* 1. The origin of this software must not be misrepresented; you must not */
24 /* claim that you wrote the original software. If you use this software */
25 /* in a product, an acknowledgment in the product documentation would be */
26 /* appreciated but is not required. */
27 /* 2. Altered source versions must be plainly marked as such, and must not */
28 /* be misrepresented as being the original software. */
29 /* 3. This notice may not be removed or altered from any source */
32 /*****************************************************************************/
46 /*****************************************************************************/
48 /*****************************************************************************/
52 /* Structure that holds the needed data */
53 typedef struct StackOpData StackOpData;
55 CodeSeg* Code; /* Pointer to code segment */
56 unsigned Flags; /* Flags to remember things */
57 unsigned PushIndex; /* Index of call to pushax in codeseg */
58 unsigned OpIndex; /* Index of actual operation */
59 CodeEntry* PrevEntry; /* Entry before the call to pushax */
60 CodeEntry* PushEntry; /* Pointer to entry with call to pushax */
61 CodeEntry* OpEntry; /* Pointer to entry with op */
62 CodeEntry* NextEntry; /* Entry after the op */
63 const char* ZPLo; /* Lo byte of zero page loc to use */
64 const char* ZPHi; /* Hi byte of zero page loc to use */
65 unsigned IP; /* Insertion point used by some routines */
68 /* Flags returned by DirectOp */
69 #define OP_DIRECT 0x01 /* Direct op may be used */
70 #define OP_RELOAD_Y 0x02 /* Must reload index register Y */
74 /*****************************************************************************/
76 /*****************************************************************************/
80 static unsigned AdjustStackOffset (CodeSeg* S, unsigned Start, unsigned Stop,
82 /* Adjust the offset for all stack accesses in the range Start to Stop, both
83 * inclusive. The function returns the number of instructions that have been
87 /* Number of inserted instructions */
88 unsigned Inserted = 0;
90 /* Walk over all entries */
94 CodeEntry* E = CS_GetEntry (S, I);
96 int NeedCorrection = 0;
97 if ((E->Use & REG_SP) != 0) {
99 /* Check for some things that should not happen */
100 CHECK (E->AM == AM65_ZP_INDY || E->RI->In.RegY >= (short) Offs);
101 CHECK (strcmp (E->Arg, "sp") == 0);
103 /* We need to correct this one */
106 } else if (CE_IsCallTo (E, "ldaxysp")) {
108 /* We need to correct this one */
113 if (NeedCorrection) {
117 /* If the Y register value is needed later, we have to reload the
118 * register after changing it.
120 int NeedY = RegYUsed (S, I+1);
121 unsigned YVal = E->RI->In.RegY;
123 /* Get the code entry before this one. If it's a LDY, adjust the
126 P = CS_GetPrevEntry (S, I);
127 if (P && P->OPC == OP65_LDY && CE_KnownImm (P)) {
129 /* The Y load is just before the stack access, adjust it */
130 CE_SetNumArg (P, P->Num - Offs);
134 /* Insert a new load instruction before the stack access */
135 const char* Arg = MakeHexArg (YVal - Offs);
136 CodeEntry* X = NewCodeEntry (OP65_LDY, AM65_IMM, Arg, 0, E->LI);
137 CS_InsertEntry (S, X, I);
139 /* One more inserted entries */
143 /* Be sure to skip the stack access for the next round */
148 /* If we need the value of Y later, be sure to reload it */
150 const char* Arg = MakeHexArg (YVal);
151 CodeEntry* X = NewCodeEntry (OP65_LDY, AM65_IMM, Arg, 0, E->LI);
152 CS_InsertEntry (S, X, I+1);
154 /* One more inserted entries */
158 /* Skip this instruction int the next round */
167 /* Return the number of inserted entries */
173 static void InsertEntry (StackOpData* D, CodeEntry* E, unsigned Index)
174 /* Insert a new entry. Depending on Index, D->PushIndex and D->OpIndex will
175 * be adjusted by this function.
178 /* Insert the entry into the code segment */
179 CS_InsertEntry (D->Code, E, Index);
181 /* Adjust the indices if necessary */
182 if (D->PushEntry && Index <= D->PushIndex) {
185 if (D->OpEntry && Index <= D->OpIndex) {
192 static void DelEntry (StackOpData* D, unsigned Index)
193 /* Delete an entry. Depending on Index, D->PushIndex and D->OpIndex will be
194 * adjusted by this function, and PushEntry/OpEntry may get invalidated.
197 /* Delete the entry from the code segment */
198 CS_DelEntry (D->Code, Index);
200 /* Adjust the indices if necessary */
201 if (Index < D->PushIndex) {
203 } else if (Index == D->PushIndex) {
206 if (Index < D->OpIndex) {
208 } else if (Index == D->OpIndex) {
215 static void CheckDirectOp (StackOpData* D)
216 /* Check if the given entry is a lda instruction with an addressing mode
217 * that allows us to replace it by another operation (like ora). If so, we may
218 * use this location for the or and must not save the value in the zero
222 /* We need the entry before the push */
224 CHECK ((E = D->PrevEntry) != 0);
226 if (E->OPC == OP65_LDA) {
227 if (E->AM == AM65_IMM || E->AM == AM65_ZP || E->AM == AM65_ABS) {
228 /* These insns are all ok and replaceable */
229 D->Flags |= OP_DIRECT;
230 } else if (E->AM == AM65_ZP_INDY && RegValIsKnown (E->RI->In.RegY) &&
231 strcmp (E->Arg, "sp") == 0) {
232 /* A load from the stack with known offset is also ok, but in this
233 * case we must reload the index register later. Please note that
234 * a load indirect via other zero page locations is not ok, since
235 * these locations may change between the push and the actual
238 D->Flags |= (OP_DIRECT | OP_RELOAD_Y);
245 static void ReplacePushByStore (StackOpData* D)
246 /* Replace the call to the push subroutine by a store into the zero page
247 * location (actually, the push is not replaced, because we need it for
248 * later, but the name is still ok since the push will get removed at the
249 * end of each routine).
254 /* Store the value into the zeropage instead of pushing it */
255 X = NewCodeEntry (OP65_STX, AM65_ZP, D->ZPHi, 0, D->PushEntry->LI);
256 InsertEntry (D, X, D->PushIndex+1);
257 if ((D->Flags & OP_DIRECT) == 0) {
258 X = NewCodeEntry (OP65_STA, AM65_ZP, D->ZPLo, 0, D->PushEntry->LI);
259 InsertEntry (D, X, D->PushIndex+1);
265 static void AddOpLow (StackOpData* D, opc_t OPC)
266 /* Add an op for the low byte of an operator. This function honours the
267 * OP_DIRECT and OP_RELOAD_Y flags and generates the necessary instructions.
268 * All code is inserted at the current insertion point.
273 if ((D->Flags & OP_DIRECT) != 0) {
274 /* Op with a variable location. If the location is on the stack, we
275 * need to reload the Y register.
277 if ((D->Flags & OP_RELOAD_Y) != 0) {
278 const char* Arg = MakeHexArg (D->PrevEntry->RI->In.RegY);
279 X = NewCodeEntry (OP65_LDY, AM65_IMM, Arg, 0, D->OpEntry->LI);
280 InsertEntry (D, X, D->IP++);
282 X = NewCodeEntry (OPC, D->PrevEntry->AM, D->PrevEntry->Arg, 0, D->OpEntry->LI);
284 /* Op with temp storage */
285 X = NewCodeEntry (OPC, AM65_ZP, D->ZPLo, 0, D->OpEntry->LI);
287 InsertEntry (D, X, D->IP++);
292 static void AddOpHigh (StackOpData* D, opc_t OPC)
293 /* Add an op for the high byte of an operator. Special cases (constant values
294 * or similar have to be checked separately, the function covers only the
295 * generic case. Code is inserted at the insertion point.
300 /* High byte is unknown */
301 X = NewCodeEntry (OP65_STA, AM65_ZP, D->ZPLo, 0, D->OpEntry->LI);
302 InsertEntry (D, X, D->IP++);
303 X = NewCodeEntry (OP65_TXA, AM65_IMP, 0, 0, D->OpEntry->LI);
304 InsertEntry (D, X, D->IP++);
305 X = NewCodeEntry (OPC, AM65_ZP, D->ZPHi, 0, D->OpEntry->LI);
306 InsertEntry (D, X, D->IP++);
307 X = NewCodeEntry (OP65_TAX, AM65_IMP, 0, 0, D->OpEntry->LI);
308 InsertEntry (D, X, D->IP++);
309 X = NewCodeEntry (OP65_LDA, AM65_ZP, D->ZPLo, 0, D->OpEntry->LI);
310 InsertEntry (D, X, D->IP++);
315 static void RemovePushAndOp (StackOpData* D)
316 /* Remove the call to pushax and the call to the operator subroutine */
318 DelEntry (D, D->OpIndex);
319 DelEntry (D, D->PushIndex);
324 static const char* IsRegVar (const StackOpData* D)
325 /* If the value pushed is that of a register variable, return the name of the
326 * entry in the register bank. Otherwise return NULL.
331 if (D->PushIndex >= 2 &&
332 (P = D->PrevEntry) != 0 &&
333 P->OPC == OP65_LDX &&
335 strncmp (P->Arg, "regbank+", 7) == 0 &&
336 isdigit (P->Arg[8]) &&
337 (P = CS_GetEntry (D->Code, D->PushIndex-2)) != 0 &&
338 P->OPC == OP65_LDA &&
340 strncmp (P->Arg, "regbank+", 7) == 0 &&
341 isdigit (P->Arg[8])) {
342 /* Ok, it loads the register variable */
351 /*****************************************************************************/
352 /* Actual optimization functions */
353 /*****************************************************************************/
357 static unsigned Opt_staspidx (StackOpData* D)
358 /* Optimize the staspidx sequence if possible */
363 /* Check if we're using a register variable */
364 if ((ZPLo = IsRegVar (D)) == 0) {
366 /* Store the value into the zeropage instead of pushing it */
367 ReplacePushByStore (D);
369 /* Use the given zero page loc */
373 /* Replace the store subroutine call by a direct op */
374 X = NewCodeEntry (OP65_STA, AM65_ZP_INDY, ZPLo, 0, D->OpEntry->LI);
375 InsertEntry (D, X, D->OpIndex+1);
377 /* Remove the push and the call to the staspidx function */
380 /* We changed the sequence */
386 static unsigned Opt_staxspidx (StackOpData* D)
387 /* Optimize the staxspidx sequence if possible */
392 /* Check if we're using a register variable */
393 if ((ZPLo = IsRegVar (D)) == 0) {
395 /* Store the value into the zeropage instead of pushing it */
396 ReplacePushByStore (D);
398 /* Use the given zero page loc */
402 /* Inline the store */
403 X = NewCodeEntry (OP65_STA, AM65_ZP_INDY, ZPLo, 0, D->OpEntry->LI);
404 InsertEntry (D, X, D->OpIndex+1);
405 X = NewCodeEntry (OP65_INY, AM65_IMP, 0, 0, D->OpEntry->LI);
406 InsertEntry (D, X, D->OpIndex+2);
407 if (RegValIsKnown (D->OpEntry->RI->In.RegX)) {
408 /* Value of X is known */
409 const char* Arg = MakeHexArg (D->OpEntry->RI->In.RegX);
410 X = NewCodeEntry (OP65_LDA, AM65_IMM, Arg, 0, D->OpEntry->LI);
413 X = NewCodeEntry (OP65_TXA, AM65_IMP, 0, 0, D->OpEntry->LI);
415 InsertEntry (D, X, D->OpIndex+3);
416 X = NewCodeEntry (OP65_STA, AM65_ZP_INDY, ZPLo, 0, D->OpEntry->LI);
417 InsertEntry (D, X, D->OpIndex+4);
419 /* Remove the push and the call to the staspidx function */
422 /* We changed the sequence */
428 static unsigned Opt_tosaddax (StackOpData* D)
429 /* Optimize the tosaddax sequence if possible */
434 /* We need the entry behind the add */
435 CHECK (D->NextEntry != 0);
437 /* Check the entry before the push. If it's a lda instruction with an
438 * addressing mode that allows us to replace it, we may use this
439 * location for the op and must not save the value in the zero page
444 /* Store the value into the zeropage instead of pushing it */
445 ReplacePushByStore (D);
448 D->IP = D->OpIndex+1;
449 X = NewCodeEntry (OP65_CLC, AM65_IMP, 0, 0, D->OpEntry->LI);
450 InsertEntry (D, X, D->IP++);
453 AddOpLow (D, OP65_ADC);
456 if (D->PushEntry->RI->In.RegX == 0) {
457 /* The high byte is the value in X plus the carry */
458 CodeLabel* L = CS_GenLabel (D->Code, D->NextEntry);
459 X = NewCodeEntry (OP65_BCC, AM65_BRA, L->Name, L, D->OpEntry->LI);
460 InsertEntry (D, X, D->IP++);
461 X = NewCodeEntry (OP65_INX, AM65_IMP, 0, 0, D->OpEntry->LI);
462 InsertEntry (D, X, D->IP++);
463 } else if (D->OpEntry->RI->In.RegX == 0) {
464 /* The high byte is that of the first operand plus carry */
466 if (RegValIsKnown (D->PushEntry->RI->In.RegX)) {
467 /* Value of first op high byte is known */
468 const char* Arg = MakeHexArg (D->PushEntry->RI->In.RegX);
469 X = NewCodeEntry (OP65_LDX, AM65_IMM, Arg, 0, D->OpEntry->LI);
471 /* Value of first op high byte is unknown */
472 X = NewCodeEntry (OP65_LDX, AM65_ZP, D->ZPHi, 0, D->OpEntry->LI);
474 InsertEntry (D, X, D->IP++);
475 L = CS_GenLabel (D->Code, D->NextEntry);
476 X = NewCodeEntry (OP65_BCC, AM65_BRA, L->Name, L, D->OpEntry->LI);
477 InsertEntry (D, X, D->IP++);
478 X = NewCodeEntry (OP65_INX, AM65_IMP, 0, 0, D->OpEntry->LI);
479 InsertEntry (D, X, D->IP++);
481 /* High byte is unknown */
482 AddOpHigh (D, OP65_ADC);
485 /* Remove the push and the call to the tosaddax function */
488 /* We changed the sequence */
494 static unsigned Opt_tosandax (StackOpData* D)
495 /* Optimize the tosandax sequence if possible */
499 /* Check the entry before the push. If it's a lda instruction with an
500 * addressing mode that allows us to replace it, we may use this
501 * location for the op and must not save the value in the zero page
506 /* Store the value into the zeropage instead of pushing it */
507 ReplacePushByStore (D);
509 /* Inline the and, low byte */
510 D->IP = D->OpIndex + 1;
511 AddOpLow (D, OP65_AND);
514 if (D->PushEntry->RI->In.RegX == 0 || D->OpEntry->RI->In.RegX == 0) {
515 /* The high byte is zero */
516 X = NewCodeEntry (OP65_LDX, AM65_IMM, "$00", 0, D->OpEntry->LI);
517 InsertEntry (D, X, D->IP++);
519 /* High byte is unknown */
520 AddOpHigh (D, OP65_AND);
523 /* Remove the push and the call to the tosandax function */
526 /* We changed the sequence */
532 static unsigned Opt_tosorax (StackOpData* D)
533 /* Optimize the tosorax sequence if possible */
537 /* Check the entry before the push. If it's a lda instruction with an
538 * addressing mode that allows us to replace it, we may use this
539 * location for the op and must not save the value in the zero page
544 /* Store the value into the zeropage instead of pushing it */
545 ReplacePushByStore (D);
547 /* Inline the or, low byte */
548 D->IP = D->OpIndex + 1;
549 AddOpLow (D, OP65_ORA);
552 if (RegValIsKnown (D->PushEntry->RI->In.RegX) &&
553 RegValIsKnown (D->OpEntry->RI->In.RegX)) {
554 /* Both values known, precalculate the result */
555 const char* Arg = MakeHexArg (D->PushEntry->RI->In.RegX | D->OpEntry->RI->In.RegX);
556 X = NewCodeEntry (OP65_LDX, AM65_IMM, Arg, 0, D->OpEntry->LI);
557 InsertEntry (D, X, D->IP++);
558 } else if (D->PushEntry->RI->In.RegX != 0) {
559 /* High byte is unknown */
560 AddOpHigh (D, OP65_ORA);
563 /* Remove the push and the call to the tosorax function */
566 /* We changed the sequence */
572 static unsigned Opt_tosxorax (StackOpData* D)
573 /* Optimize the tosxorax sequence if possible */
577 /* Check the entry before the push. If it's a lda instruction with an
578 * addressing mode that allows us to replace it, we may use this
579 * location for the op and must not save the value in the zero page
584 /* Store the value into the zeropage instead of pushing it */
585 ReplacePushByStore (D);
587 /* Inline the xor, low byte */
588 D->IP = D->OpIndex + 1;
589 AddOpLow (D, OP65_EOR);
592 if (RegValIsKnown (D->PushEntry->RI->In.RegX) &&
593 RegValIsKnown (D->OpEntry->RI->In.RegX)) {
594 /* Both values known, precalculate the result */
595 const char* Arg = MakeHexArg (D->PushEntry->RI->In.RegX ^ D->OpEntry->RI->In.RegX);
596 X = NewCodeEntry (OP65_LDX, AM65_IMM, Arg, 0, D->OpEntry->LI);
597 InsertEntry (D, X, D->IP++);
598 } else if (D->PushEntry->RI->In.RegX != 0) {
599 /* High byte is unknown */
600 AddOpHigh (D, OP65_EOR);
603 /* Remove the push and the call to the tosandax function */
606 /* We changed the sequence */
612 /*****************************************************************************/
614 /*****************************************************************************/
618 /* Flags for the functions */
620 STOP_NONE, /* Nothing special */
621 STOP_A_UNUSED /* Call only if a unused later */
625 typedef unsigned (*OptFunc) (StackOpData* D);
626 typedef struct OptFuncDesc OptFuncDesc;
628 const char* Name; /* Name of the replaced runtime function */
629 OptFunc Func; /* Function pointer */
630 STOP_FLAGS Flags; /* Flags */
633 static const OptFuncDesc FuncTable[] = {
634 { "staspidx", Opt_staspidx, STOP_NONE },
635 { "staxspidx", Opt_staxspidx, STOP_A_UNUSED },
636 { "tosaddax", Opt_tosaddax, STOP_NONE },
637 { "tosandax", Opt_tosandax, STOP_NONE },
638 { "tosorax", Opt_tosorax, STOP_NONE },
639 { "tosxorax", Opt_tosxorax, STOP_NONE },
641 #define FUNC_COUNT (sizeof(FuncTable) / sizeof(FuncTable[0]))
645 static int CmpFunc (const void* Key, const void* Func)
646 /* Compare function for bsearch */
648 return strcmp (Key, ((const OptFuncDesc*) Func)->Name);
653 static const OptFuncDesc* FindFunc (const char* Name)
654 /* Find the function with the given name. Return a pointer to the table entry
655 * or NULL if the function was not found.
658 return bsearch (Name, FuncTable, FUNC_COUNT, sizeof(OptFuncDesc), CmpFunc);
663 static int CmpHarmless (const void* Key, const void* Entry)
664 /* Compare function for bsearch */
666 return strcmp (Key, *(const char**)Entry);
671 static int HarmlessCall (const char* Name)
672 /* Check if this is a call to a harmless subroutine that will not interrupt
673 * the pushax/op sequence when encountered.
676 static const char* Tab[] = {
682 void* R = bsearch (Name,
684 sizeof (Tab) / sizeof (Tab[0]),
692 /*****************************************************************************/
694 /*****************************************************************************/
698 unsigned OptStackOps (CodeSeg* S)
699 /* Optimize operations that take operands via the stack */
701 unsigned Changes = 0; /* Number of changes in one run */
702 int InSeq = 0; /* Inside a sequence */
703 unsigned Push = 0; /* Index of pushax */
704 unsigned UsedRegs = 0; /* Zeropage registers used in sequence */
708 /* Generate register info */
711 /* Look for a call to pushax followed by a call to some other function
712 * that takes it's first argument on the stack, and the second argument
713 * in the primary register.
714 * It depends on the code between the two if we can handle/transform the
715 * sequence, so check this code for the following list of things:
717 * - the range must be a basic block (one entry, one exit)
718 * - there may not be accesses to local variables with unknown
719 * offsets (because we have to adjust these offsets).
720 * - no subroutine calls
723 * Since we need a zero page register later, do also check the
724 * intermediate code for zero page use.
727 while (I < CS_GetEntryCount (S)) {
729 /* Get the next entry */
730 CodeEntry* E = CS_GetEntry (S, I);
732 /* Handling depends if we're inside a sequence or not */
735 /* If we are using the stack, and we don't have "indirect Y"
736 * addressing mode, or the value of Y is unknown, or less than
737 * two, we cannot cope with this piece of code. Having an unknown
738 * value of Y means that we cannot correct the stack offset, while
739 * having an offset less than two means that the code works with
740 * the value on stack which is to be removed.
742 if ((E->Use & REG_SP) != 0 &&
743 (E->AM != AM65_ZP_INDY || RegValIsUnknown (E->RI->In.RegY) ||
744 E->RI->In.RegY < 2)) {
746 /* All this stuff is not allowed in a sequence */
749 } else if (E->OPC == OP65_JSR) {
751 /* Subroutine call: Check if this is one of our functions */
752 const OptFuncDesc* F = FindFunc (E->Arg);
758 /* Check the flags */
759 if (F->Flags & STOP_A_UNUSED) {
760 /* a must be unused later */
761 if (RegAUsed (S, I+1)) {
762 /* Cannot optimize */
767 /* Determine the zero page locations to use */
769 UsedRegs |= GetRegInfo (S, I+1, REG_SREG | REG_PTR1 | REG_PTR2);
770 if ((UsedRegs & REG_SREG) == REG_NONE) {
771 /* SREG is available */
773 Data.ZPHi = "sreg+1";
774 } else if ((UsedRegs & REG_PTR1) == REG_NONE) {
776 Data.ZPHi = "ptr1+1";
777 } else if ((UsedRegs & REG_PTR2) == REG_NONE) {
779 Data.ZPHi = "ptr2+1";
781 /* No registers available */
786 /* Determine if we have a basic block */
788 PreCondOk = CS_IsBasicBlock (S, Push, I);
791 /* If preconditions are ok, call the optimizer function */
794 /* Adjust stack offsets */
795 Data.OpIndex = I + AdjustStackOffset (S, Push, I, 2);
797 /* Prepare the remainder of the data structure */
800 Data.PushIndex = Push;
801 Data.PrevEntry = CS_GetPrevEntry (S, Data.PushIndex);
802 Data.PushEntry = CS_GetEntry (S, Data.PushIndex);
804 Data.NextEntry = CS_GetNextEntry (S, Data.OpIndex);
806 /* Call the optimizer function */
807 Changes += F->Func (&Data);
809 /* Regenerate register info */
813 /* End of sequence */
816 } else if (strcmp (E->Arg, "pushax") == 0) {
817 /* Restart the sequence */
820 } else if (HarmlessCall (E->Arg)) {
821 /* Track zeropage register usage */
822 UsedRegs |= (E->Use | E->Chg);
824 /* A call to an unkown subroutine ends the sequence */
829 /* Other stuff: Track zeropage register usage */
830 UsedRegs |= (E->Use | E->Chg);
833 } else if (CE_IsCallTo (E, "pushax")) {
835 /* This starts a sequence */
847 /* Free the register info */
850 /* Return the number of changes made */