1 /*****************************************************************************/
5 /* Optimize operations that take operands via the stack */
9 /* (C) 2001-2002 Ullrich von Bassewitz */
11 /* D-70597 Stuttgart */
12 /* EMail: uz@cc65.org */
15 /* This software is provided 'as-is', without any expressed or implied */
16 /* warranty. In no event will the authors be held liable for any damages */
17 /* arising from the use of this software. */
19 /* Permission is granted to anyone to use this software for any purpose, */
20 /* including commercial applications, and to alter it and redistribute it */
21 /* freely, subject to the following restrictions: */
23 /* 1. The origin of this software must not be misrepresented; you must not */
24 /* claim that you wrote the original software. If you use this software */
25 /* in a product, an acknowledgment in the product documentation would be */
26 /* appreciated but is not required. */
27 /* 2. Altered source versions must be plainly marked as such, and must not */
28 /* be misrepresented as being the original software. */
29 /* 3. This notice may not be removed or altered from any source */
32 /*****************************************************************************/
46 /*****************************************************************************/
48 /*****************************************************************************/
52 /* Structure that holds the needed data */
53 typedef struct StackOpData StackOpData;
55 CodeSeg* Code; /* Pointer to code segment */
56 unsigned Flags; /* Flags to remember things */
57 unsigned PushIndex; /* Index of call to pushax in codeseg */
58 unsigned OpIndex; /* Index of actual operation */
59 CodeEntry* PrevEntry; /* Entry before the call to pushax */
60 CodeEntry* PushEntry; /* Pointer to entry with call to pushax */
61 CodeEntry* OpEntry; /* Pointer to entry with op */
62 CodeEntry* NextEntry; /* Entry after the op */
63 const char* ZPLo; /* Lo byte of zero page loc to use */
64 const char* ZPHi; /* Hi byte of zero page loc to use */
65 unsigned IP; /* Insertion point used by some routines */
68 /* Flags returned by DirectOp */
69 #define OP_DIRECT 0x01 /* Direct op may be used */
70 #define OP_RELOAD_Y 0x02 /* Must reload index register Y */
74 /*****************************************************************************/
76 /*****************************************************************************/
80 static unsigned AdjustStackOffset (CodeSeg* S, unsigned Start, unsigned Stop,
82 /* Adjust the offset for all stack accesses in the range Start to Stop, both
83 * inclusive. The function returns the number of instructions that have been
87 /* Number of inserted instructions */
88 unsigned Inserted = 0;
90 /* Walk over all entries */
94 CodeEntry* E = CS_GetEntry (S, I);
96 int NeedCorrection = 0;
97 if ((E->Use & REG_SP) != 0) {
99 /* Check for some things that should not happen */
100 CHECK (E->AM == AM65_ZP_INDY || E->RI->In.RegY >= (short) Offs);
101 CHECK (strcmp (E->Arg, "sp") == 0);
103 /* We need to correct this one */
106 } else if (CE_IsCallTo (E, "ldaxysp")) {
108 /* We need to correct this one */
113 if (NeedCorrection) {
117 /* Get the code entry before this one. If it's a LDY, adjust the
120 P = CS_GetPrevEntry (S, I);
121 if (P && P->OPC == OP65_LDY && CE_KnownImm (P)) {
123 /* The Y load is just before the stack access, adjust it */
124 CE_SetNumArg (P, P->Num - Offs);
128 /* Insert a new load instruction before the stack access */
129 const char* Arg = MakeHexArg (E->RI->In.RegY - Offs);
130 CodeEntry* X = NewCodeEntry (OP65_LDY, AM65_IMM, Arg, 0, E->LI);
131 CS_InsertEntry (S, X, I);
133 /* One more inserted entries */
137 /* Be sure to skip the stack access for the next round */
148 /* Return the number of inserted entries */
154 static void InsertEntry (StackOpData* D, CodeEntry* E, unsigned Index)
155 /* Insert a new entry. Depending on Index, D->PushIndex and D->OpIndex will
156 * be adjusted by this function.
159 /* Insert the entry into the code segment */
160 CS_InsertEntry (D->Code, E, Index);
162 /* Adjust the indices if necessary */
163 if (D->PushEntry && Index <= D->PushIndex) {
166 if (D->OpEntry && Index <= D->OpIndex) {
173 static void DelEntry (StackOpData* D, unsigned Index)
174 /* Delete an entry. Depending on Index, D->PushIndex and D->OpIndex will be
175 * adjusted by this function, and PushEntry/OpEntry may get invalidated.
178 /* Delete the entry from the code segment */
179 CS_DelEntry (D->Code, Index);
181 /* Adjust the indices if necessary */
182 if (Index < D->PushIndex) {
184 } else if (Index == D->PushIndex) {
187 if (Index < D->OpIndex) {
189 } else if (Index == D->OpIndex) {
196 static void CheckDirectOp (StackOpData* D)
197 /* Check if the given entry is a lda instruction with an addressing mode
198 * that allows us to replace it by another operation (like ora). If so, we may
199 * use this location for the or and must not save the value in the zero
203 /* We need the entry before the push */
205 CHECK ((E = D->PrevEntry) != 0);
207 if (E->OPC == OP65_LDA) {
208 if (E->AM == AM65_IMM || E->AM == AM65_ZP || E->AM == AM65_ABS) {
209 /* These insns are all ok and replaceable */
210 D->Flags |= OP_DIRECT;
211 } else if (E->AM == AM65_ZP_INDY && RegValIsKnown (E->RI->In.RegY) &&
212 strcmp (E->Arg, D->ZPLo) != 0 && strcmp (E->Arg, D->ZPHi) != 0) {
213 /* Load indirect with known offset is also ok, provided that
214 * the zeropage location used is not the same as the one we're
215 * using for the temp storage.
217 D->Flags |= (OP_DIRECT | OP_RELOAD_Y);
224 static void ReplacePushByStore (StackOpData* D)
225 /* Replace the call to the push subroutine by a store into the zero page
226 * location (actually, the push is not replaced, because we need it for
227 * later, but the name is still ok since the push will get removed at the
228 * end of each routine).
233 /* Store the value into the zeropage instead of pushing it */
234 X = NewCodeEntry (OP65_STX, AM65_ZP, D->ZPHi, 0, D->PushEntry->LI);
235 InsertEntry (D, X, D->PushIndex+1);
236 if ((D->Flags & OP_DIRECT) == 0) {
237 X = NewCodeEntry (OP65_STA, AM65_ZP, D->ZPLo, 0, D->PushEntry->LI);
238 InsertEntry (D, X, D->PushIndex+1);
244 static void AddOpLow (StackOpData* D, opc_t OPC)
245 /* Add an op for the low byte of an operator. This function honours the
246 * OP_DIRECT and OP_RELOAD_Y flags and generates the necessary instructions.
247 * All code is inserted at the current insertion point.
252 if ((D->Flags & OP_DIRECT) != 0) {
253 /* Op with a variable location. If the location is on the stack, we
254 * need to reload the Y register.
256 if ((D->Flags & OP_RELOAD_Y) != 0) {
257 const char* Arg = MakeHexArg (D->PrevEntry->RI->In.RegY);
258 X = NewCodeEntry (OP65_LDY, AM65_IMM, Arg, 0, D->OpEntry->LI);
259 InsertEntry (D, X, D->IP++);
261 X = NewCodeEntry (OPC, D->PrevEntry->AM, D->PrevEntry->Arg, 0, D->OpEntry->LI);
263 /* Op with temp storage */
264 X = NewCodeEntry (OPC, AM65_ZP, D->ZPLo, 0, D->OpEntry->LI);
266 InsertEntry (D, X, D->IP++);
271 static void AddOpHigh (StackOpData* D, opc_t OPC)
272 /* Add an op for the high byte of an operator. Special cases (constant values
273 * or similar have to be checked separately, the function covers only the
274 * generic case. Code is inserted at the insertion point.
279 /* High byte is unknown */
280 X = NewCodeEntry (OP65_STA, AM65_ZP, D->ZPLo, 0, D->OpEntry->LI);
281 InsertEntry (D, X, D->IP++);
282 X = NewCodeEntry (OP65_TXA, AM65_IMP, 0, 0, D->OpEntry->LI);
283 InsertEntry (D, X, D->IP++);
284 X = NewCodeEntry (OPC, AM65_ZP, D->ZPHi, 0, D->OpEntry->LI);
285 InsertEntry (D, X, D->IP++);
286 X = NewCodeEntry (OP65_TAX, AM65_IMP, 0, 0, D->OpEntry->LI);
287 InsertEntry (D, X, D->IP++);
288 X = NewCodeEntry (OP65_LDA, AM65_ZP, D->ZPLo, 0, D->OpEntry->LI);
289 InsertEntry (D, X, D->IP++);
294 static void RemovePushAndOp (StackOpData* D)
295 /* Remove the call to pushax and the call to the operator subroutine */
297 DelEntry (D, D->OpIndex);
298 DelEntry (D, D->PushIndex);
303 static const char* IsRegVar (const StackOpData* D)
304 /* If the value pushed is that of a register variable, return the name of the
305 * entry in the register bank. Otherwise return NULL.
310 if (D->PushIndex >= 2 &&
311 (P = D->PrevEntry) != 0 &&
312 P->OPC == OP65_LDX &&
314 strncmp (P->Arg, "regbank+", 7) == 0 &&
315 isdigit (P->Arg[8]) &&
316 (P = CS_GetEntry (D->Code, D->PushIndex-2)) != 0 &&
317 P->OPC == OP65_LDA &&
319 strncmp (P->Arg, "regbank+", 7) == 0 &&
320 isdigit (P->Arg[8])) {
321 /* Ok, it loads the register variable */
330 /*****************************************************************************/
331 /* Actual optimization functions */
332 /*****************************************************************************/
336 static unsigned Opt_staspidx (StackOpData* D)
337 /* Optimize the staspidx sequence if possible */
342 /* Check if we're using a register variable */
343 if ((ZPLo = IsRegVar (D)) == 0) {
345 /* Store the value into the zeropage instead of pushing it */
346 ReplacePushByStore (D);
348 /* Use the given zero page loc */
352 /* Replace the store subroutine call by a direct op */
353 X = NewCodeEntry (OP65_STA, AM65_ZP_INDY, ZPLo, 0, D->OpEntry->LI);
354 InsertEntry (D, X, D->OpIndex+1);
356 /* Remove the push and the call to the staspidx function */
359 /* We changed the sequence */
365 static unsigned Opt_staxspidx (StackOpData* D)
366 /* Optimize the staxspidx sequence if possible */
371 /* Check if we're using a register variable */
372 if ((ZPLo = IsRegVar (D)) == 0) {
374 /* Store the value into the zeropage instead of pushing it */
375 ReplacePushByStore (D);
377 /* Use the given zero page loc */
381 /* Inline the store */
382 X = NewCodeEntry (OP65_STA, AM65_ZP_INDY, ZPLo, 0, D->OpEntry->LI);
383 InsertEntry (D, X, D->OpIndex+1);
384 X = NewCodeEntry (OP65_INY, AM65_IMP, 0, 0, D->OpEntry->LI);
385 InsertEntry (D, X, D->OpIndex+2);
386 if (RegValIsKnown (D->OpEntry->RI->In.RegX)) {
387 /* Value of X is known */
388 const char* Arg = MakeHexArg (D->OpEntry->RI->In.RegX);
389 X = NewCodeEntry (OP65_LDA, AM65_IMM, Arg, 0, D->OpEntry->LI);
392 X = NewCodeEntry (OP65_TXA, AM65_IMP, 0, 0, D->OpEntry->LI);
394 InsertEntry (D, X, D->OpIndex+3);
395 X = NewCodeEntry (OP65_STA, AM65_ZP_INDY, ZPLo, 0, D->OpEntry->LI);
396 InsertEntry (D, X, D->OpIndex+4);
398 /* Remove the push and the call to the staspidx function */
401 /* We changed the sequence */
407 static unsigned Opt_tosaddax (StackOpData* D)
408 /* Optimize the tosaddax sequence if possible */
413 /* We need the entry behind the add */
414 CHECK (D->NextEntry != 0);
416 /* Check the entry before the push. If it's a lda instruction with an
417 * addressing mode that allows us to replace it, we may use this
418 * location for the op and must not save the value in the zero page
423 /* Store the value into the zeropage instead of pushing it */
424 ReplacePushByStore (D);
427 D->IP = D->OpIndex+1;
428 X = NewCodeEntry (OP65_CLC, AM65_IMP, 0, 0, D->OpEntry->LI);
429 InsertEntry (D, X, D->IP++);
432 AddOpLow (D, OP65_ADC);
435 if (D->PushEntry->RI->In.RegX == 0) {
436 /* The high byte is the value in X plus the carry */
437 CodeLabel* L = CS_GenLabel (D->Code, D->NextEntry);
438 X = NewCodeEntry (OP65_BCC, AM65_BRA, L->Name, L, D->OpEntry->LI);
439 InsertEntry (D, X, D->IP++);
440 X = NewCodeEntry (OP65_INX, AM65_IMP, 0, 0, D->OpEntry->LI);
441 InsertEntry (D, X, D->IP++);
442 } else if (D->OpEntry->RI->In.RegX == 0) {
443 /* The high byte is that of the first operand plus carry */
445 if (RegValIsKnown (D->PushEntry->RI->In.RegX)) {
446 /* Value of first op high byte is known */
447 const char* Arg = MakeHexArg (D->PushEntry->RI->In.RegX);
448 X = NewCodeEntry (OP65_LDX, AM65_IMM, Arg, 0, D->OpEntry->LI);
450 /* Value of first op high byte is unknown */
451 X = NewCodeEntry (OP65_LDX, AM65_ZP, D->ZPHi, 0, D->OpEntry->LI);
453 InsertEntry (D, X, D->IP++);
454 L = CS_GenLabel (D->Code, D->NextEntry);
455 X = NewCodeEntry (OP65_BCC, AM65_BRA, L->Name, L, D->OpEntry->LI);
456 InsertEntry (D, X, D->IP++);
457 X = NewCodeEntry (OP65_INX, AM65_IMP, 0, 0, D->OpEntry->LI);
458 InsertEntry (D, X, D->IP++);
460 /* High byte is unknown */
461 AddOpHigh (D, OP65_ADC);
464 /* Remove the push and the call to the tosaddax function */
467 /* We changed the sequence */
473 static unsigned Opt_tosandax (StackOpData* D)
474 /* Optimize the tosandax sequence if possible */
478 /* Check the entry before the push. If it's a lda instruction with an
479 * addressing mode that allows us to replace it, we may use this
480 * location for the op and must not save the value in the zero page
485 /* Store the value into the zeropage instead of pushing it */
486 ReplacePushByStore (D);
488 /* Inline the and, low byte */
489 D->IP = D->OpIndex + 1;
490 AddOpLow (D, OP65_AND);
493 if (D->PushEntry->RI->In.RegX == 0 || D->OpEntry->RI->In.RegX == 0) {
494 /* The high byte is zero */
495 X = NewCodeEntry (OP65_LDX, AM65_IMM, "$00", 0, D->OpEntry->LI);
496 InsertEntry (D, X, D->IP++);
498 /* High byte is unknown */
499 AddOpHigh (D, OP65_AND);
502 /* Remove the push and the call to the tosandax function */
505 /* We changed the sequence */
511 static unsigned Opt_tosorax (StackOpData* D)
512 /* Optimize the tosorax sequence if possible */
516 /* Check the entry before the push. If it's a lda instruction with an
517 * addressing mode that allows us to replace it, we may use this
518 * location for the op and must not save the value in the zero page
523 /* Store the value into the zeropage instead of pushing it */
524 ReplacePushByStore (D);
526 /* Inline the or, low byte */
527 D->IP = D->OpIndex + 1;
528 AddOpLow (D, OP65_ORA);
531 if (RegValIsKnown (D->PushEntry->RI->In.RegX) &&
532 RegValIsKnown (D->OpEntry->RI->In.RegX)) {
533 /* Both values known, precalculate the result */
534 const char* Arg = MakeHexArg (D->PushEntry->RI->In.RegX | D->OpEntry->RI->In.RegX);
535 X = NewCodeEntry (OP65_LDX, AM65_IMM, Arg, 0, D->OpEntry->LI);
536 InsertEntry (D, X, D->IP++);
537 } else if (D->PushEntry->RI->In.RegX != 0) {
538 /* High byte is unknown */
539 AddOpHigh (D, OP65_ORA);
542 /* Remove the push and the call to the tosorax function */
545 /* We changed the sequence */
551 static unsigned Opt_tosxorax (StackOpData* D)
552 /* Optimize the tosxorax sequence if possible */
556 /* Check the entry before the push. If it's a lda instruction with an
557 * addressing mode that allows us to replace it, we may use this
558 * location for the op and must not save the value in the zero page
563 /* Store the value into the zeropage instead of pushing it */
564 ReplacePushByStore (D);
566 /* Inline the xor, low byte */
567 D->IP = D->OpIndex + 1;
568 AddOpLow (D, OP65_EOR);
571 if (RegValIsKnown (D->PushEntry->RI->In.RegX) &&
572 RegValIsKnown (D->OpEntry->RI->In.RegX)) {
573 /* Both values known, precalculate the result */
574 const char* Arg = MakeHexArg (D->PushEntry->RI->In.RegX ^ D->OpEntry->RI->In.RegX);
575 X = NewCodeEntry (OP65_LDX, AM65_IMM, Arg, 0, D->OpEntry->LI);
576 InsertEntry (D, X, D->IP++);
577 } else if (D->PushEntry->RI->In.RegX != 0) {
578 /* High byte is unknown */
579 AddOpHigh (D, OP65_EOR);
582 /* Remove the push and the call to the tosandax function */
585 /* We changed the sequence */
591 /*****************************************************************************/
593 /*****************************************************************************/
597 /* Flags for the functions */
599 STOP_NONE, /* Nothing special */
600 STOP_A_UNUSED /* Call only if a unused later */
604 typedef unsigned (*OptFunc) (StackOpData* D);
605 typedef struct OptFuncDesc OptFuncDesc;
607 const char* Name; /* Name of the replaced runtime function */
608 OptFunc Func; /* Function pointer */
609 STOP_FLAGS Flags; /* Flags */
612 static const OptFuncDesc FuncTable[] = {
613 { "staspidx", Opt_staspidx, STOP_NONE },
614 { "staxspidx", Opt_staxspidx, STOP_A_UNUSED },
615 { "tosaddax", Opt_tosaddax, STOP_NONE },
616 { "tosandax", Opt_tosandax, STOP_NONE },
617 { "tosorax", Opt_tosorax, STOP_NONE },
618 { "tosxorax", Opt_tosxorax, STOP_NONE },
620 #define FUNC_COUNT (sizeof(FuncTable) / sizeof(FuncTable[0]))
624 static int CmpFunc (const void* Key, const void* Func)
625 /* Compare function for bsearch */
627 return strcmp (Key, ((const OptFuncDesc*) Func)->Name);
632 static const OptFuncDesc* FindFunc (const char* Name)
633 /* Find the function with the given name. Return a pointer to the table entry
634 * or NULL if the function was not found.
637 return bsearch (Name, FuncTable, FUNC_COUNT, sizeof(OptFuncDesc), CmpFunc);
642 static int CmpHarmless (const void* Key, const void* Entry)
643 /* Compare function for bsearch */
645 return strcmp (Key, *(const char**)Entry);
650 static int HarmlessCall (const char* Name)
651 /* Check if this is a call to a harmless subroutine that will not interrupt
652 * the pushax/op sequence when encountered.
655 static const char* Tab[] = {
661 void* R = bsearch (Name,
663 sizeof (Tab) / sizeof (Tab[0]),
671 /*****************************************************************************/
673 /*****************************************************************************/
677 unsigned OptStackOps (CodeSeg* S)
678 /* Optimize operations that take operands via the stack */
680 unsigned Changes = 0; /* Number of changes in one run */
681 int InSeq = 0; /* Inside a sequence */
682 unsigned Push = 0; /* Index of pushax */
683 unsigned UsedRegs = 0; /* Zeropage registers used in sequence */
687 /* Generate register info */
690 /* Look for a call to pushax followed by a call to some other function
691 * that takes it's first argument on the stack, and the second argument
692 * in the primary register.
693 * It depends on the code between the two if we can handle/transform the
694 * sequence, so check this code for the following list of things:
696 * - the range must be a basic block (one entry, one exit)
697 * - there may not be accesses to local variables with unknown
698 * offsets (because we have to adjust these offsets).
699 * - no subroutine calls
702 * Since we need a zero page register later, do also check the
703 * intermediate code for zero page use.
706 while (I < CS_GetEntryCount (S)) {
708 /* Get the next entry */
709 CodeEntry* E = CS_GetEntry (S, I);
711 /* Handling depends if we're inside a sequence or not */
714 /* If we are using the stack, and we don't have "indirect Y"
715 * addressing mode, or the value of Y is unknown, or less than
716 * two, we cannot cope with this piece of code. Having an unknown
717 * value of Y means that we cannot correct the stack offset, while
718 * having an offset less than two means that the code works with
719 * the value on stack which is to be removed.
721 if ((E->Use & REG_SP) != 0 &&
722 (E->AM != AM65_ZP_INDY || RegValIsUnknown (E->RI->In.RegY) ||
723 E->RI->In.RegY < 2)) {
725 /* All this stuff is not allowed in a sequence */
728 } else if (E->OPC == OP65_JSR) {
730 /* Subroutine call: Check if this is one of our functions */
731 const OptFuncDesc* F = FindFunc (E->Arg);
737 /* Check the flags */
738 if (F->Flags & STOP_A_UNUSED) {
739 /* a must be unused later */
740 if (RegAUsed (S, I+1)) {
741 /* Cannot optimize */
746 /* Determine the zero page locations to use */
748 UsedRegs |= GetRegInfo (S, I+1, REG_SREG | REG_PTR1 | REG_PTR2);
749 if ((UsedRegs & REG_SREG) == REG_NONE) {
750 /* SREG is available */
752 Data.ZPHi = "sreg+1";
753 } else if ((UsedRegs & REG_PTR1) == REG_NONE) {
755 Data.ZPHi = "ptr1+1";
756 } else if ((UsedRegs & REG_PTR2) == REG_NONE) {
758 Data.ZPHi = "ptr2+1";
760 /* No registers available */
765 /* Determine if we have a basic block */
767 PreCondOk = CS_IsBasicBlock (S, Push, I);
770 /* If preconditions are ok, call the optimizer function */
773 /* Adjust stack offsets */
774 Data.OpIndex = I + AdjustStackOffset (S, Push, I, 2);
776 /* Prepare the remainder of the data structure */
779 Data.PushIndex = Push;
780 Data.PrevEntry = CS_GetPrevEntry (S, Data.PushIndex);
781 Data.PushEntry = CS_GetEntry (S, Data.PushIndex);
783 Data.NextEntry = CS_GetNextEntry (S, Data.OpIndex);
785 /* Call the optimizer function */
786 Changes += F->Func (&Data);
788 /* Regenerate register info */
792 /* End of sequence */
795 } else if (strcmp (E->Arg, "pushax") == 0) {
796 /* Restart the sequence */
799 } else if (HarmlessCall (E->Arg)) {
800 /* Track zeropage register usage */
801 UsedRegs |= (E->Use | E->Chg);
803 /* A call to an unkown subroutine ends the sequence */
808 /* Other stuff: Track zeropage register usage */
809 UsedRegs |= (E->Use | E->Chg);
812 } else if (CE_IsCallTo (E, "pushax")) {
814 /* This starts a sequence */
826 /* Free the register info */
829 /* Return the number of changes made */