1 /*****************************************************************************/
5 /* Optimize operations that take operands via the stack */
9 /* (C) 2001-2002 Ullrich von Bassewitz */
11 /* D-70597 Stuttgart */
12 /* EMail: uz@cc65.org */
15 /* This software is provided 'as-is', without any expressed or implied */
16 /* warranty. In no event will the authors be held liable for any damages */
17 /* arising from the use of this software. */
19 /* Permission is granted to anyone to use this software for any purpose, */
20 /* including commercial applications, and to alter it and redistribute it */
21 /* freely, subject to the following restrictions: */
23 /* 1. The origin of this software must not be misrepresented; you must not */
24 /* claim that you wrote the original software. If you use this software */
25 /* in a product, an acknowledgment in the product documentation would be */
26 /* appreciated but is not required. */
27 /* 2. Altered source versions must be plainly marked as such, and must not */
28 /* be misrepresented as being the original software. */
29 /* 3. This notice may not be removed or altered from any source */
32 /*****************************************************************************/
46 /*****************************************************************************/
48 /*****************************************************************************/
52 /* Structure that holds the needed data */
53 typedef struct StackOpData StackOpData;
55 CodeSeg* Code; /* Pointer to code segment */
56 unsigned Flags; /* Flags to remember things */
57 unsigned PushIndex; /* Index of call to pushax in codeseg */
58 unsigned OpIndex; /* Index of actual operation */
59 CodeEntry* PrevEntry; /* Entry before the call to pushax */
60 CodeEntry* PushEntry; /* Pointer to entry with call to pushax */
61 CodeEntry* OpEntry; /* Pointer to entry with op */
62 CodeEntry* NextEntry; /* Entry after the op */
63 const char* ZPLo; /* Lo byte of zero page loc to use */
64 const char* ZPHi; /* Hi byte of zero page loc to use */
65 unsigned IP; /* Insertion point used by some routines */
68 /* Flags returned by DirectOp */
69 #define OP_DIRECT 0x01 /* Direct op may be used */
70 #define OP_RELOAD_Y 0x02 /* Must reload index register Y */
74 /*****************************************************************************/
76 /*****************************************************************************/
80 static unsigned AdjustStackOffset (CodeSeg* S, unsigned Start, unsigned Stop,
82 /* Adjust the offset for all stack accesses in the range Start to Stop, both
83 * inclusive. The function returns the number of instructions that have been
87 /* Number of inserted instructions */
88 unsigned Inserted = 0;
90 /* Walk over all entries */
94 CodeEntry* E = CS_GetEntry (S, I);
96 int NeedCorrection = 0;
97 if ((E->Use & REG_SP) != 0) {
99 /* Check for some things that should not happen */
100 CHECK (E->AM == AM65_ZP_INDY || E->RI->In.RegY >= (short) Offs);
101 CHECK (strcmp (E->Arg, "sp") == 0);
103 /* We need to correct this one */
106 } else if (CE_IsCallTo (E, "ldaxysp")) {
108 /* We need to correct this one */
113 if (NeedCorrection) {
117 /* Get the code entry before this one. If it's a LDY, adjust the
120 P = CS_GetPrevEntry (S, I);
121 if (P && P->OPC == OP65_LDY && CE_KnownImm (P)) {
123 /* The Y load is just before the stack access, adjust it */
124 CE_SetNumArg (P, P->Num - Offs);
128 /* Insert a new load instruction before the stack access */
129 const char* Arg = MakeHexArg (E->RI->In.RegY - Offs);
130 CodeEntry* X = NewCodeEntry (OP65_LDY, AM65_IMM, Arg, 0, E->LI);
131 CS_InsertEntry (S, X, I);
133 /* One more inserted entries */
137 /* Be sure to skip the stack access for the next round */
148 /* Return the number of inserted entries */
154 static void InsertEntry (StackOpData* D, CodeEntry* E, unsigned Index)
155 /* Insert a new entry. Depending on Index, D->PushIndex and D->OpIndex will
156 * be adjusted by this function.
159 /* Insert the entry into the code segment */
160 CS_InsertEntry (D->Code, E, Index);
162 /* Adjust the indices if necessary */
163 if (D->PushEntry && Index <= D->PushIndex) {
166 if (D->OpEntry && Index <= D->OpIndex) {
173 static void DelEntry (StackOpData* D, unsigned Index)
174 /* Delete an entry. Depending on Index, D->PushIndex and D->OpIndex will be
175 * adjusted by this function, and PushEntry/OpEntry may get invalidated.
178 /* Delete the entry from the code segment */
179 CS_DelEntry (D->Code, Index);
181 /* Adjust the indices if necessary */
182 if (Index < D->PushIndex) {
184 } else if (Index == D->PushIndex) {
187 if (Index < D->OpIndex) {
189 } else if (Index == D->OpIndex) {
196 static void CheckDirectOp (StackOpData* D)
197 /* Check if the given entry is a lda instruction with an addressing mode
198 * that allows us to replace it by another operation (like ora). If so, we may
199 * use this location for the or and must not save the value in the zero
203 /* We need the entry before the push */
205 CHECK ((E = D->PrevEntry) != 0);
207 if (E->OPC == OP65_LDA) {
208 if (E->AM == AM65_IMM || E->AM == AM65_ZP || E->AM == AM65_ABS) {
209 /* These insns are all ok and replaceable */
210 D->Flags |= OP_DIRECT;
211 } else if (E->AM == AM65_ZP_INDY && RegValIsKnown (E->RI->In.RegY)) {
212 /* Load indirect with known offset is also ok */
213 D->Flags |= (OP_DIRECT | OP_RELOAD_Y);
220 static void ReplacePushByStore (StackOpData* D)
221 /* Replace the call to the push subroutine by a store into the zero page
222 * location (actually, the push is not replaced, because we need it for
223 * later, but the name is still ok since the push will get removed at the
224 * end of each routine).
229 /* Store the value into the zeropage instead of pushing it */
230 X = NewCodeEntry (OP65_STX, AM65_ZP, D->ZPHi, 0, D->PushEntry->LI);
231 InsertEntry (D, X, D->PushIndex+1);
232 if ((D->Flags & OP_DIRECT) == 0) {
233 X = NewCodeEntry (OP65_STA, AM65_ZP, D->ZPLo, 0, D->PushEntry->LI);
234 InsertEntry (D, X, D->PushIndex+1);
240 static void AddOpLow (StackOpData* D, opc_t OPC)
241 /* Add an op for the low byte of an operator. This function honours the
242 * OP_DIRECT and OP_RELOAD_Y flags and generates the necessary instructions.
243 * All code is inserted at the current insertion point.
248 if ((D->Flags & OP_DIRECT) != 0) {
249 /* Op with a variable location. If the location is on the stack, we
250 * need to reload the Y register.
252 if ((D->Flags & OP_RELOAD_Y) != 0) {
253 const char* Arg = MakeHexArg (D->PrevEntry->RI->In.RegY);
254 X = NewCodeEntry (OP65_LDY, AM65_IMM, Arg, 0, D->OpEntry->LI);
255 InsertEntry (D, X, D->IP++);
257 X = NewCodeEntry (OPC, D->PrevEntry->AM, D->PrevEntry->Arg, 0, D->OpEntry->LI);
259 /* Op with temp storage */
260 X = NewCodeEntry (OPC, AM65_ZP, D->ZPLo, 0, D->OpEntry->LI);
262 InsertEntry (D, X, D->IP++);
267 static void AddOpHigh (StackOpData* D, opc_t OPC)
268 /* Add an op for the high byte of an operator. Special cases (constant values
269 * or similar have to be checked separately, the function covers only the
270 * generic case. Code is inserted at the insertion point.
275 /* High byte is unknown */
276 X = NewCodeEntry (OP65_STA, AM65_ZP, D->ZPLo, 0, D->OpEntry->LI);
277 InsertEntry (D, X, D->IP++);
278 X = NewCodeEntry (OP65_TXA, AM65_IMP, 0, 0, D->OpEntry->LI);
279 InsertEntry (D, X, D->IP++);
280 X = NewCodeEntry (OPC, AM65_ZP, D->ZPHi, 0, D->OpEntry->LI);
281 InsertEntry (D, X, D->IP++);
282 X = NewCodeEntry (OP65_TAX, AM65_IMP, 0, 0, D->OpEntry->LI);
283 InsertEntry (D, X, D->IP++);
284 X = NewCodeEntry (OP65_LDA, AM65_ZP, D->ZPLo, 0, D->OpEntry->LI);
285 InsertEntry (D, X, D->IP++);
290 static void RemovePushAndOp (StackOpData* D)
291 /* Remove the call to pushax and the call to the operator subroutine */
293 DelEntry (D, D->OpIndex);
294 DelEntry (D, D->PushIndex);
299 static const char* IsRegVar (const StackOpData* D)
300 /* If the value pushed is that of a register variable, return the name of the
301 * entry in the register bank. Otherwise return NULL.
306 if (D->PushIndex >= 2 &&
307 (P = D->PrevEntry) != 0 &&
308 P->OPC == OP65_LDX &&
310 strncmp (P->Arg, "regbank+", 7) == 0 &&
311 isdigit (P->Arg[8]) &&
312 (P = CS_GetEntry (D->Code, D->PushIndex-2)) != 0 &&
313 P->OPC == OP65_LDA &&
315 strncmp (P->Arg, "regbank+", 7) == 0 &&
316 isdigit (P->Arg[8])) {
317 /* Ok, it loads the register variable */
326 /*****************************************************************************/
327 /* Actual optimization functions */
328 /*****************************************************************************/
332 static unsigned Opt_staspidx (StackOpData* D)
333 /* Optimize the staspidx sequence if possible */
338 /* Check if we're using a register variable */
339 if ((ZPLo = IsRegVar (D)) == 0) {
341 /* Store the value into the zeropage instead of pushing it */
342 ReplacePushByStore (D);
344 /* Use the given zero page loc */
348 /* Replace the store subroutine call by a direct op */
349 X = NewCodeEntry (OP65_STA, AM65_ZP_INDY, ZPLo, 0, D->OpEntry->LI);
350 InsertEntry (D, X, D->OpIndex+1);
352 /* Remove the push and the call to the staspidx function */
355 /* We changed the sequence */
361 static unsigned Opt_staxspidx (StackOpData* D)
362 /* Optimize the staxspidx sequence if possible */
367 /* Check if we're using a register variable */
368 if ((ZPLo = IsRegVar (D)) == 0) {
370 /* Store the value into the zeropage instead of pushing it */
371 ReplacePushByStore (D);
373 /* Use the given zero page loc */
377 /* Inline the store */
378 X = NewCodeEntry (OP65_STA, AM65_ZP_INDY, ZPLo, 0, D->OpEntry->LI);
379 InsertEntry (D, X, D->OpIndex+1);
380 X = NewCodeEntry (OP65_INY, AM65_IMP, 0, 0, D->OpEntry->LI);
381 InsertEntry (D, X, D->OpIndex+2);
382 if (RegValIsKnown (D->OpEntry->RI->In.RegX)) {
383 /* Value of X is known */
384 const char* Arg = MakeHexArg (D->OpEntry->RI->In.RegX);
385 X = NewCodeEntry (OP65_LDA, AM65_IMM, Arg, 0, D->OpEntry->LI);
388 X = NewCodeEntry (OP65_TXA, AM65_IMP, 0, 0, D->OpEntry->LI);
390 InsertEntry (D, X, D->OpIndex+3);
391 X = NewCodeEntry (OP65_STA, AM65_ZP_INDY, ZPLo, 0, D->OpEntry->LI);
392 InsertEntry (D, X, D->OpIndex+4);
394 /* Remove the push and the call to the staspidx function */
397 /* We changed the sequence */
403 static unsigned Opt_tosaddax (StackOpData* D)
404 /* Optimize the tosaddax sequence if possible */
409 /* We need the entry behind the add */
410 CHECK (D->NextEntry != 0);
412 /* Check the entry before the push. If it's a lda instruction with an
413 * addressing mode that allows us to replace it, we may use this
414 * location for the op and must not save the value in the zero page
419 /* Store the value into the zeropage instead of pushing it */
420 ReplacePushByStore (D);
423 D->IP = D->OpIndex+1;
424 X = NewCodeEntry (OP65_CLC, AM65_IMP, 0, 0, D->OpEntry->LI);
425 InsertEntry (D, X, D->IP++);
428 AddOpLow (D, OP65_ADC);
431 if (D->PushEntry->RI->In.RegX == 0) {
432 /* The high byte is the value in X plus the carry */
433 CodeLabel* L = CS_GenLabel (D->Code, D->NextEntry);
434 X = NewCodeEntry (OP65_BCC, AM65_BRA, L->Name, L, D->OpEntry->LI);
435 InsertEntry (D, X, D->IP++);
436 X = NewCodeEntry (OP65_INX, AM65_IMP, 0, 0, D->OpEntry->LI);
437 InsertEntry (D, X, D->IP++);
438 } else if (D->OpEntry->RI->In.RegX == 0) {
439 /* The high byte is that of the first operand plus carry */
441 if (RegValIsKnown (D->PushEntry->RI->In.RegX)) {
442 /* Value of first op high byte is known */
443 const char* Arg = MakeHexArg (D->PushEntry->RI->In.RegX);
444 X = NewCodeEntry (OP65_LDX, AM65_IMM, Arg, 0, D->OpEntry->LI);
446 /* Value of first op high byte is unknown */
447 X = NewCodeEntry (OP65_LDX, AM65_ZP, D->ZPHi, 0, D->OpEntry->LI);
449 InsertEntry (D, X, D->IP++);
450 L = CS_GenLabel (D->Code, D->NextEntry);
451 X = NewCodeEntry (OP65_BCC, AM65_BRA, L->Name, L, D->OpEntry->LI);
452 InsertEntry (D, X, D->IP++);
453 X = NewCodeEntry (OP65_INX, AM65_IMP, 0, 0, D->OpEntry->LI);
454 InsertEntry (D, X, D->IP++);
456 /* High byte is unknown */
457 AddOpHigh (D, OP65_ADC);
460 /* Remove the push and the call to the tosaddax function */
463 /* We changed the sequence */
469 static unsigned Opt_tosandax (StackOpData* D)
470 /* Optimize the tosandax sequence if possible */
474 /* Check the entry before the push. If it's a lda instruction with an
475 * addressing mode that allows us to replace it, we may use this
476 * location for the op and must not save the value in the zero page
481 /* Store the value into the zeropage instead of pushing it */
482 ReplacePushByStore (D);
484 /* Inline the and, low byte */
485 D->IP = D->OpIndex + 1;
486 AddOpLow (D, OP65_AND);
489 if (D->PushEntry->RI->In.RegX == 0 || D->OpEntry->RI->In.RegX == 0) {
490 /* The high byte is zero */
491 X = NewCodeEntry (OP65_LDX, AM65_IMM, "$00", 0, D->OpEntry->LI);
492 InsertEntry (D, X, D->IP++);
494 /* High byte is unknown */
495 AddOpHigh (D, OP65_AND);
498 /* Remove the push and the call to the tosandax function */
501 /* We changed the sequence */
507 static unsigned Opt_tosorax (StackOpData* D)
508 /* Optimize the tosorax sequence if possible */
512 /* Check the entry before the push. If it's a lda instruction with an
513 * addressing mode that allows us to replace it, we may use this
514 * location for the op and must not save the value in the zero page
519 /* Store the value into the zeropage instead of pushing it */
520 ReplacePushByStore (D);
522 /* Inline the or, low byte */
523 D->IP = D->OpIndex + 1;
524 AddOpLow (D, OP65_ORA);
527 if (RegValIsKnown (D->PushEntry->RI->In.RegX) &&
528 RegValIsKnown (D->OpEntry->RI->In.RegX)) {
529 /* Both values known, precalculate the result */
530 const char* Arg = MakeHexArg (D->PushEntry->RI->In.RegX | D->OpEntry->RI->In.RegX);
531 X = NewCodeEntry (OP65_LDX, AM65_IMM, Arg, 0, D->OpEntry->LI);
532 InsertEntry (D, X, D->IP++);
533 } else if (D->PushEntry->RI->In.RegX != 0) {
534 /* High byte is unknown */
535 AddOpHigh (D, OP65_ORA);
538 /* Remove the push and the call to the tosorax function */
541 /* We changed the sequence */
547 static unsigned Opt_tosxorax (StackOpData* D)
548 /* Optimize the tosxorax sequence if possible */
552 /* Check the entry before the push. If it's a lda instruction with an
553 * addressing mode that allows us to replace it, we may use this
554 * location for the op and must not save the value in the zero page
559 /* Store the value into the zeropage instead of pushing it */
560 ReplacePushByStore (D);
562 /* Inline the xor, low byte */
563 D->IP = D->OpIndex + 1;
564 AddOpLow (D, OP65_EOR);
567 if (RegValIsKnown (D->PushEntry->RI->In.RegX) &&
568 RegValIsKnown (D->OpEntry->RI->In.RegX)) {
569 /* Both values known, precalculate the result */
570 const char* Arg = MakeHexArg (D->PushEntry->RI->In.RegX ^ D->OpEntry->RI->In.RegX);
571 X = NewCodeEntry (OP65_LDX, AM65_IMM, Arg, 0, D->OpEntry->LI);
572 InsertEntry (D, X, D->IP++);
573 } else if (D->PushEntry->RI->In.RegX != 0) {
574 /* High byte is unknown */
575 AddOpHigh (D, OP65_EOR);
578 /* Remove the push and the call to the tosandax function */
581 /* We changed the sequence */
587 /*****************************************************************************/
589 /*****************************************************************************/
593 /* Flags for the functions */
595 STOP_NONE, /* Nothing special */
596 STOP_A_UNUSED /* Call only if a unused later */
600 typedef unsigned (*OptFunc) (StackOpData* D);
601 typedef struct OptFuncDesc OptFuncDesc;
603 const char* Name; /* Name of the replaced runtime function */
604 OptFunc Func; /* Function pointer */
605 STOP_FLAGS Flags; /* Flags */
608 static const OptFuncDesc FuncTable[] = {
609 { "staspidx", Opt_staspidx, STOP_NONE },
610 { "staxspidx", Opt_staxspidx, STOP_A_UNUSED },
611 { "tosaddax", Opt_tosaddax, STOP_NONE },
612 { "tosandax", Opt_tosandax, STOP_NONE },
613 { "tosorax", Opt_tosorax, STOP_NONE },
614 { "tosxorax", Opt_tosxorax, STOP_NONE },
616 #define FUNC_COUNT (sizeof(FuncTable) / sizeof(FuncTable[0]))
620 static int CmpFunc (const void* Key, const void* Func)
621 /* Compare function for bsearch */
623 return strcmp (Key, ((const OptFuncDesc*) Func)->Name);
628 static const OptFuncDesc* FindFunc (const char* Name)
629 /* Find the function with the given name. Return a pointer to the table entry
630 * or NULL if the function was not found.
633 return bsearch (Name, FuncTable, FUNC_COUNT, sizeof(OptFuncDesc), CmpFunc);
638 static int CmpHarmless (const void* Key, const void* Entry)
639 /* Compare function for bsearch */
641 return strcmp (Key, *(const char**)Entry);
646 static int HarmlessCall (const char* Name)
647 /* Check if this is a call to a harmless subroutine that will not interrupt
648 * the pushax/op sequence when encountered.
651 static const char* Tab[] = {
656 void* R = bsearch (Name,
658 sizeof (Tab) / sizeof (Tab[0]),
666 /*****************************************************************************/
668 /*****************************************************************************/
672 unsigned OptStackOps (CodeSeg* S)
673 /* Optimize operations that take operands via the stack */
675 unsigned Changes = 0; /* Number of changes in one run */
676 int InSeq = 0; /* Inside a sequence */
677 unsigned Push = 0; /* Index of pushax */
678 unsigned UsedRegs = 0; /* Zeropage registers used in sequence */
682 /* Generate register info */
685 /* Look for a call to pushax followed by a call to some other function
686 * that takes it's first argument on the stack, and the second argument
687 * in the primary register.
688 * It depends on the code between the two if we can handle/transform the
689 * sequence, so check this code for the following list of things:
691 * - the range must be a basic block (one entry, one exit)
692 * - there may not be accesses to local variables with unknown
693 * offsets (because we have to adjust these offsets).
694 * - no subroutine calls
697 * Since we need a zero page register later, do also check the
698 * intermediate code for zero page use.
701 while (I < CS_GetEntryCount (S)) {
703 /* Get the next entry */
704 CodeEntry* E = CS_GetEntry (S, I);
706 /* Handling depends if we're inside a sequence or not */
709 if (((E->Use & REG_SP) != 0 &&
710 (E->AM != AM65_ZP_INDY || RegValIsUnknown (E->RI->In.RegY)))) {
712 /* All this stuff is not allowed in a sequence */
715 } else if (E->OPC == OP65_JSR) {
717 /* Subroutine call: Check if this is one of our functions */
718 const OptFuncDesc* F = FindFunc (E->Arg);
724 /* Check the flags */
725 if (F->Flags & STOP_A_UNUSED) {
726 /* a must be unused later */
727 if (RegAUsed (S, I+1)) {
728 /* Cannot optimize */
733 /* Determine the zero page locations to use */
735 UsedRegs |= GetRegInfo (S, I+1, REG_SREG | REG_PTR1 | REG_PTR2);
736 if ((UsedRegs & REG_SREG) == REG_NONE) {
737 /* SREG is available */
739 Data.ZPHi = "sreg+1";
740 } else if ((UsedRegs & REG_PTR1) == REG_NONE) {
742 Data.ZPHi = "ptr1+1";
743 } else if ((UsedRegs & REG_PTR2) == REG_NONE) {
745 Data.ZPHi = "ptr2+1";
747 /* No registers available */
752 /* Determine if we have a basic block */
754 PreCondOk = CS_IsBasicBlock (S, Push, I);
757 /* If preconditions are ok, call the optimizer function */
760 /* Adjust stack offsets */
761 Data.OpIndex = I + AdjustStackOffset (S, Push, I, 2);
763 /* Prepare the remainder of the data structure */
766 Data.PushIndex = Push;
767 Data.PrevEntry = CS_GetPrevEntry (S, Data.PushIndex);
768 Data.PushEntry = CS_GetEntry (S, Data.PushIndex);
770 Data.NextEntry = CS_GetNextEntry (S, Data.OpIndex);
772 /* Call the optimizer function */
773 Changes += F->Func (&Data);
775 /* Regenerate register info */
779 /* End of sequence */
782 } else if (strcmp (E->Arg, "pushax") == 0) {
783 /* Restart the sequence */
786 } else if (HarmlessCall (E->Arg)) {
787 /* Track zeropage register usage */
788 UsedRegs |= (E->Use | E->Chg);
790 /* A call to an unkown subroutine ends the sequence */
795 /* Other stuff: Track zeropage register usage */
796 UsedRegs |= (E->Use | E->Chg);
799 } else if (CE_IsCallTo (E, "pushax")) {
801 /* This starts a sequence */
813 /* Free the register info */
816 /* Return the number of changes made */