1 /*****************************************************************************/
5 /* Optimize operations that take operands via the stack */
9 /* (C) 2001-2002 Ullrich von Bassewitz */
11 /* D-70597 Stuttgart */
12 /* EMail: uz@cc65.org */
15 /* This software is provided 'as-is', without any expressed or implied */
16 /* warranty. In no event will the authors be held liable for any damages */
17 /* arising from the use of this software. */
19 /* Permission is granted to anyone to use this software for any purpose, */
20 /* including commercial applications, and to alter it and redistribute it */
21 /* freely, subject to the following restrictions: */
23 /* 1. The origin of this software must not be misrepresented; you must not */
24 /* claim that you wrote the original software. If you use this software */
25 /* in a product, an acknowledgment in the product documentation would be */
26 /* appreciated but is not required. */
27 /* 2. Altered source versions must be plainly marked as such, and must not */
28 /* be misrepresented as being the original software. */
29 /* 3. This notice may not be removed or altered from any source */
32 /*****************************************************************************/
46 /*****************************************************************************/
48 /*****************************************************************************/
52 /* Structure that holds the needed data */
53 typedef struct StackOpData StackOpData;
55 CodeSeg* Code; /* Pointer to code segment */
56 unsigned Flags; /* Flags to remember things */
57 unsigned PushIndex; /* Index of call to pushax in codeseg */
58 unsigned OpIndex; /* Index of actual operation */
59 CodeEntry* PrevEntry; /* Entry before the call to pushax */
60 CodeEntry* PushEntry; /* Pointer to entry with call to pushax */
61 CodeEntry* OpEntry; /* Pointer to entry with op */
62 CodeEntry* NextEntry; /* Entry after the op */
63 const char* ZPLo; /* Lo byte of zero page loc to use */
64 const char* ZPHi; /* Hi byte of zero page loc to use */
65 unsigned IP; /* Insertion point used by some routines */
68 /* Flags returned by DirectOp */
69 #define OP_DIRECT 0x01 /* Direct op may be used */
70 #define OP_RELOAD_Y 0x02 /* Must reload index register Y */
74 /*****************************************************************************/
76 /*****************************************************************************/
80 static unsigned AdjustStackOffset (CodeSeg* S, unsigned Start, unsigned Stop,
82 /* Adjust the offset for all stack accesses in the range Start to Stop, both
83 * inclusive. The function returns the number of instructions that have been
87 /* Number of inserted instructions */
88 unsigned Inserted = 0;
90 /* Walk over all entries */
94 CodeEntry* E = CS_GetEntry (S, I);
96 int NeedCorrection = 0;
97 if ((E->Use & REG_SP) != 0) {
99 /* Check for some things that should not happen */
100 CHECK (E->AM == AM65_ZP_INDY || E->RI->In.RegY >= (short) Offs);
101 CHECK (strcmp (E->Arg, "sp") == 0);
103 /* We need to correct this one */
106 } else if (CE_IsCallTo (E, "ldaxysp")) {
108 /* We need to correct this one */
113 if (NeedCorrection) {
117 /* Get the code entry before this one. If it's a LDY, adjust the
120 P = CS_GetPrevEntry (S, I);
121 if (P && P->OPC == OP65_LDY && CE_KnownImm (P)) {
123 /* The Y load is just before the stack access, adjust it */
124 CE_SetNumArg (P, P->Num - Offs);
128 /* Insert a new load instruction before the stack access */
129 const char* Arg = MakeHexArg (E->RI->In.RegY - Offs);
130 CodeEntry* X = NewCodeEntry (OP65_LDY, AM65_IMM, Arg, 0, E->LI);
131 CS_InsertEntry (S, X, I);
133 /* One more inserted entries */
137 /* Be sure to skip the stack access for the next round */
148 /* Return the number of inserted entries */
154 static void InsertEntry (StackOpData* D, CodeEntry* E, unsigned Index)
155 /* Insert a new entry. Depending on Index, D->PushIndex and D->OpIndex will
156 * be adjusted by this function.
159 /* Insert the entry into the code segment */
160 CS_InsertEntry (D->Code, E, Index);
162 /* Adjust the indices if necessary */
163 if (D->PushEntry && Index <= D->PushIndex) {
166 if (D->OpEntry && Index <= D->OpIndex) {
173 static void DelEntry (StackOpData* D, unsigned Index)
174 /* Delete an entry. Depending on Index, D->PushIndex and D->OpIndex will be
175 * adjusted by this function, and PushEntry/OpEntry may get invalidated.
178 /* Delete the entry from the code segment */
179 CS_DelEntry (D->Code, Index);
181 /* Adjust the indices if necessary */
182 if (Index < D->PushIndex) {
184 } else if (Index == D->PushIndex) {
187 if (Index < D->OpIndex) {
189 } else if (Index == D->OpIndex) {
196 static void CheckDirectOp (StackOpData* D)
197 /* Check if the given entry is a lda instruction with an addressing mode
198 * that allows us to replace it by another operation (like ora). If so, we may
199 * use this location for the or and must not save the value in the zero
203 /* We need the entry before the push */
205 CHECK ((E = D->PrevEntry) != 0);
207 if (E->OPC == OP65_LDA) {
208 if (E->AM == AM65_IMM || E->AM == AM65_ZP || E->AM == AM65_ABS) {
209 /* These insns are all ok and replaceable */
210 D->Flags |= OP_DIRECT;
211 } else if (E->AM == AM65_ZP_INDY && RegValIsKnown (E->RI->In.RegY) &&
212 strcmp (E->Arg, "sp") == 0) {
213 /* A load from the stack with known offset is also ok, but in this
214 * case we must reload the index register later. Please note that
215 * a load indirect via other zero page locations is not ok, since
216 * these locations may change between the push and the actual
219 D->Flags |= (OP_DIRECT | OP_RELOAD_Y);
226 static void ReplacePushByStore (StackOpData* D)
227 /* Replace the call to the push subroutine by a store into the zero page
228 * location (actually, the push is not replaced, because we need it for
229 * later, but the name is still ok since the push will get removed at the
230 * end of each routine).
235 /* Store the value into the zeropage instead of pushing it */
236 X = NewCodeEntry (OP65_STX, AM65_ZP, D->ZPHi, 0, D->PushEntry->LI);
237 InsertEntry (D, X, D->PushIndex+1);
238 if ((D->Flags & OP_DIRECT) == 0) {
239 X = NewCodeEntry (OP65_STA, AM65_ZP, D->ZPLo, 0, D->PushEntry->LI);
240 InsertEntry (D, X, D->PushIndex+1);
246 static void AddOpLow (StackOpData* D, opc_t OPC)
247 /* Add an op for the low byte of an operator. This function honours the
248 * OP_DIRECT and OP_RELOAD_Y flags and generates the necessary instructions.
249 * All code is inserted at the current insertion point.
254 if ((D->Flags & OP_DIRECT) != 0) {
255 /* Op with a variable location. If the location is on the stack, we
256 * need to reload the Y register.
258 if ((D->Flags & OP_RELOAD_Y) != 0) {
259 const char* Arg = MakeHexArg (D->PrevEntry->RI->In.RegY);
260 X = NewCodeEntry (OP65_LDY, AM65_IMM, Arg, 0, D->OpEntry->LI);
261 InsertEntry (D, X, D->IP++);
263 X = NewCodeEntry (OPC, D->PrevEntry->AM, D->PrevEntry->Arg, 0, D->OpEntry->LI);
265 /* Op with temp storage */
266 X = NewCodeEntry (OPC, AM65_ZP, D->ZPLo, 0, D->OpEntry->LI);
268 InsertEntry (D, X, D->IP++);
273 static void AddOpHigh (StackOpData* D, opc_t OPC)
274 /* Add an op for the high byte of an operator. Special cases (constant values
275 * or similar have to be checked separately, the function covers only the
276 * generic case. Code is inserted at the insertion point.
281 /* High byte is unknown */
282 X = NewCodeEntry (OP65_STA, AM65_ZP, D->ZPLo, 0, D->OpEntry->LI);
283 InsertEntry (D, X, D->IP++);
284 X = NewCodeEntry (OP65_TXA, AM65_IMP, 0, 0, D->OpEntry->LI);
285 InsertEntry (D, X, D->IP++);
286 X = NewCodeEntry (OPC, AM65_ZP, D->ZPHi, 0, D->OpEntry->LI);
287 InsertEntry (D, X, D->IP++);
288 X = NewCodeEntry (OP65_TAX, AM65_IMP, 0, 0, D->OpEntry->LI);
289 InsertEntry (D, X, D->IP++);
290 X = NewCodeEntry (OP65_LDA, AM65_ZP, D->ZPLo, 0, D->OpEntry->LI);
291 InsertEntry (D, X, D->IP++);
296 static void RemovePushAndOp (StackOpData* D)
297 /* Remove the call to pushax and the call to the operator subroutine */
299 DelEntry (D, D->OpIndex);
300 DelEntry (D, D->PushIndex);
305 static const char* IsRegVar (const StackOpData* D)
306 /* If the value pushed is that of a register variable, return the name of the
307 * entry in the register bank. Otherwise return NULL.
312 if (D->PushIndex >= 2 &&
313 (P = D->PrevEntry) != 0 &&
314 P->OPC == OP65_LDX &&
316 strncmp (P->Arg, "regbank+", 7) == 0 &&
317 isdigit (P->Arg[8]) &&
318 (P = CS_GetEntry (D->Code, D->PushIndex-2)) != 0 &&
319 P->OPC == OP65_LDA &&
321 strncmp (P->Arg, "regbank+", 7) == 0 &&
322 isdigit (P->Arg[8])) {
323 /* Ok, it loads the register variable */
332 /*****************************************************************************/
333 /* Actual optimization functions */
334 /*****************************************************************************/
338 static unsigned Opt_staspidx (StackOpData* D)
339 /* Optimize the staspidx sequence if possible */
344 /* Check if we're using a register variable */
345 if ((ZPLo = IsRegVar (D)) == 0) {
347 /* Store the value into the zeropage instead of pushing it */
348 ReplacePushByStore (D);
350 /* Use the given zero page loc */
354 /* Replace the store subroutine call by a direct op */
355 X = NewCodeEntry (OP65_STA, AM65_ZP_INDY, ZPLo, 0, D->OpEntry->LI);
356 InsertEntry (D, X, D->OpIndex+1);
358 /* Remove the push and the call to the staspidx function */
361 /* We changed the sequence */
367 static unsigned Opt_staxspidx (StackOpData* D)
368 /* Optimize the staxspidx sequence if possible */
373 /* Check if we're using a register variable */
374 if ((ZPLo = IsRegVar (D)) == 0) {
376 /* Store the value into the zeropage instead of pushing it */
377 ReplacePushByStore (D);
379 /* Use the given zero page loc */
383 /* Inline the store */
384 X = NewCodeEntry (OP65_STA, AM65_ZP_INDY, ZPLo, 0, D->OpEntry->LI);
385 InsertEntry (D, X, D->OpIndex+1);
386 X = NewCodeEntry (OP65_INY, AM65_IMP, 0, 0, D->OpEntry->LI);
387 InsertEntry (D, X, D->OpIndex+2);
388 if (RegValIsKnown (D->OpEntry->RI->In.RegX)) {
389 /* Value of X is known */
390 const char* Arg = MakeHexArg (D->OpEntry->RI->In.RegX);
391 X = NewCodeEntry (OP65_LDA, AM65_IMM, Arg, 0, D->OpEntry->LI);
394 X = NewCodeEntry (OP65_TXA, AM65_IMP, 0, 0, D->OpEntry->LI);
396 InsertEntry (D, X, D->OpIndex+3);
397 X = NewCodeEntry (OP65_STA, AM65_ZP_INDY, ZPLo, 0, D->OpEntry->LI);
398 InsertEntry (D, X, D->OpIndex+4);
400 /* Remove the push and the call to the staspidx function */
403 /* We changed the sequence */
409 static unsigned Opt_tosaddax (StackOpData* D)
410 /* Optimize the tosaddax sequence if possible */
415 /* We need the entry behind the add */
416 CHECK (D->NextEntry != 0);
418 /* Check the entry before the push. If it's a lda instruction with an
419 * addressing mode that allows us to replace it, we may use this
420 * location for the op and must not save the value in the zero page
425 /* Store the value into the zeropage instead of pushing it */
426 ReplacePushByStore (D);
429 D->IP = D->OpIndex+1;
430 X = NewCodeEntry (OP65_CLC, AM65_IMP, 0, 0, D->OpEntry->LI);
431 InsertEntry (D, X, D->IP++);
434 AddOpLow (D, OP65_ADC);
437 if (D->PushEntry->RI->In.RegX == 0) {
438 /* The high byte is the value in X plus the carry */
439 CodeLabel* L = CS_GenLabel (D->Code, D->NextEntry);
440 X = NewCodeEntry (OP65_BCC, AM65_BRA, L->Name, L, D->OpEntry->LI);
441 InsertEntry (D, X, D->IP++);
442 X = NewCodeEntry (OP65_INX, AM65_IMP, 0, 0, D->OpEntry->LI);
443 InsertEntry (D, X, D->IP++);
444 } else if (D->OpEntry->RI->In.RegX == 0) {
445 /* The high byte is that of the first operand plus carry */
447 if (RegValIsKnown (D->PushEntry->RI->In.RegX)) {
448 /* Value of first op high byte is known */
449 const char* Arg = MakeHexArg (D->PushEntry->RI->In.RegX);
450 X = NewCodeEntry (OP65_LDX, AM65_IMM, Arg, 0, D->OpEntry->LI);
452 /* Value of first op high byte is unknown */
453 X = NewCodeEntry (OP65_LDX, AM65_ZP, D->ZPHi, 0, D->OpEntry->LI);
455 InsertEntry (D, X, D->IP++);
456 L = CS_GenLabel (D->Code, D->NextEntry);
457 X = NewCodeEntry (OP65_BCC, AM65_BRA, L->Name, L, D->OpEntry->LI);
458 InsertEntry (D, X, D->IP++);
459 X = NewCodeEntry (OP65_INX, AM65_IMP, 0, 0, D->OpEntry->LI);
460 InsertEntry (D, X, D->IP++);
462 /* High byte is unknown */
463 AddOpHigh (D, OP65_ADC);
466 /* Remove the push and the call to the tosaddax function */
469 /* We changed the sequence */
475 static unsigned Opt_tosandax (StackOpData* D)
476 /* Optimize the tosandax sequence if possible */
480 /* Check the entry before the push. If it's a lda instruction with an
481 * addressing mode that allows us to replace it, we may use this
482 * location for the op and must not save the value in the zero page
487 /* Store the value into the zeropage instead of pushing it */
488 ReplacePushByStore (D);
490 /* Inline the and, low byte */
491 D->IP = D->OpIndex + 1;
492 AddOpLow (D, OP65_AND);
495 if (D->PushEntry->RI->In.RegX == 0 || D->OpEntry->RI->In.RegX == 0) {
496 /* The high byte is zero */
497 X = NewCodeEntry (OP65_LDX, AM65_IMM, "$00", 0, D->OpEntry->LI);
498 InsertEntry (D, X, D->IP++);
500 /* High byte is unknown */
501 AddOpHigh (D, OP65_AND);
504 /* Remove the push and the call to the tosandax function */
507 /* We changed the sequence */
513 static unsigned Opt_tosorax (StackOpData* D)
514 /* Optimize the tosorax sequence if possible */
518 /* Check the entry before the push. If it's a lda instruction with an
519 * addressing mode that allows us to replace it, we may use this
520 * location for the op and must not save the value in the zero page
525 /* Store the value into the zeropage instead of pushing it */
526 ReplacePushByStore (D);
528 /* Inline the or, low byte */
529 D->IP = D->OpIndex + 1;
530 AddOpLow (D, OP65_ORA);
533 if (RegValIsKnown (D->PushEntry->RI->In.RegX) &&
534 RegValIsKnown (D->OpEntry->RI->In.RegX)) {
535 /* Both values known, precalculate the result */
536 const char* Arg = MakeHexArg (D->PushEntry->RI->In.RegX | D->OpEntry->RI->In.RegX);
537 X = NewCodeEntry (OP65_LDX, AM65_IMM, Arg, 0, D->OpEntry->LI);
538 InsertEntry (D, X, D->IP++);
539 } else if (D->PushEntry->RI->In.RegX != 0) {
540 /* High byte is unknown */
541 AddOpHigh (D, OP65_ORA);
544 /* Remove the push and the call to the tosorax function */
547 /* We changed the sequence */
553 static unsigned Opt_tosxorax (StackOpData* D)
554 /* Optimize the tosxorax sequence if possible */
558 /* Check the entry before the push. If it's a lda instruction with an
559 * addressing mode that allows us to replace it, we may use this
560 * location for the op and must not save the value in the zero page
565 /* Store the value into the zeropage instead of pushing it */
566 ReplacePushByStore (D);
568 /* Inline the xor, low byte */
569 D->IP = D->OpIndex + 1;
570 AddOpLow (D, OP65_EOR);
573 if (RegValIsKnown (D->PushEntry->RI->In.RegX) &&
574 RegValIsKnown (D->OpEntry->RI->In.RegX)) {
575 /* Both values known, precalculate the result */
576 const char* Arg = MakeHexArg (D->PushEntry->RI->In.RegX ^ D->OpEntry->RI->In.RegX);
577 X = NewCodeEntry (OP65_LDX, AM65_IMM, Arg, 0, D->OpEntry->LI);
578 InsertEntry (D, X, D->IP++);
579 } else if (D->PushEntry->RI->In.RegX != 0) {
580 /* High byte is unknown */
581 AddOpHigh (D, OP65_EOR);
584 /* Remove the push and the call to the tosandax function */
587 /* We changed the sequence */
593 /*****************************************************************************/
595 /*****************************************************************************/
599 /* Flags for the functions */
601 STOP_NONE, /* Nothing special */
602 STOP_A_UNUSED /* Call only if a unused later */
606 typedef unsigned (*OptFunc) (StackOpData* D);
607 typedef struct OptFuncDesc OptFuncDesc;
609 const char* Name; /* Name of the replaced runtime function */
610 OptFunc Func; /* Function pointer */
611 STOP_FLAGS Flags; /* Flags */
614 static const OptFuncDesc FuncTable[] = {
615 { "staspidx", Opt_staspidx, STOP_NONE },
616 { "staxspidx", Opt_staxspidx, STOP_A_UNUSED },
617 { "tosaddax", Opt_tosaddax, STOP_NONE },
618 { "tosandax", Opt_tosandax, STOP_NONE },
619 { "tosorax", Opt_tosorax, STOP_NONE },
620 { "tosxorax", Opt_tosxorax, STOP_NONE },
622 #define FUNC_COUNT (sizeof(FuncTable) / sizeof(FuncTable[0]))
626 static int CmpFunc (const void* Key, const void* Func)
627 /* Compare function for bsearch */
629 return strcmp (Key, ((const OptFuncDesc*) Func)->Name);
634 static const OptFuncDesc* FindFunc (const char* Name)
635 /* Find the function with the given name. Return a pointer to the table entry
636 * or NULL if the function was not found.
639 return bsearch (Name, FuncTable, FUNC_COUNT, sizeof(OptFuncDesc), CmpFunc);
644 static int CmpHarmless (const void* Key, const void* Entry)
645 /* Compare function for bsearch */
647 return strcmp (Key, *(const char**)Entry);
652 static int HarmlessCall (const char* Name)
653 /* Check if this is a call to a harmless subroutine that will not interrupt
654 * the pushax/op sequence when encountered.
657 static const char* Tab[] = {
663 void* R = bsearch (Name,
665 sizeof (Tab) / sizeof (Tab[0]),
673 /*****************************************************************************/
675 /*****************************************************************************/
679 unsigned OptStackOps (CodeSeg* S)
680 /* Optimize operations that take operands via the stack */
682 unsigned Changes = 0; /* Number of changes in one run */
683 int InSeq = 0; /* Inside a sequence */
684 unsigned Push = 0; /* Index of pushax */
685 unsigned UsedRegs = 0; /* Zeropage registers used in sequence */
689 /* Generate register info */
692 /* Look for a call to pushax followed by a call to some other function
693 * that takes it's first argument on the stack, and the second argument
694 * in the primary register.
695 * It depends on the code between the two if we can handle/transform the
696 * sequence, so check this code for the following list of things:
698 * - the range must be a basic block (one entry, one exit)
699 * - there may not be accesses to local variables with unknown
700 * offsets (because we have to adjust these offsets).
701 * - no subroutine calls
704 * Since we need a zero page register later, do also check the
705 * intermediate code for zero page use.
708 while (I < CS_GetEntryCount (S)) {
710 /* Get the next entry */
711 CodeEntry* E = CS_GetEntry (S, I);
713 /* Handling depends if we're inside a sequence or not */
716 /* If we are using the stack, and we don't have "indirect Y"
717 * addressing mode, or the value of Y is unknown, or less than
718 * two, we cannot cope with this piece of code. Having an unknown
719 * value of Y means that we cannot correct the stack offset, while
720 * having an offset less than two means that the code works with
721 * the value on stack which is to be removed.
723 if ((E->Use & REG_SP) != 0 &&
724 (E->AM != AM65_ZP_INDY || RegValIsUnknown (E->RI->In.RegY) ||
725 E->RI->In.RegY < 2)) {
727 /* All this stuff is not allowed in a sequence */
730 } else if (E->OPC == OP65_JSR) {
732 /* Subroutine call: Check if this is one of our functions */
733 const OptFuncDesc* F = FindFunc (E->Arg);
739 /* Check the flags */
740 if (F->Flags & STOP_A_UNUSED) {
741 /* a must be unused later */
742 if (RegAUsed (S, I+1)) {
743 /* Cannot optimize */
748 /* Determine the zero page locations to use */
750 UsedRegs |= GetRegInfo (S, I+1, REG_SREG | REG_PTR1 | REG_PTR2);
751 if ((UsedRegs & REG_SREG) == REG_NONE) {
752 /* SREG is available */
754 Data.ZPHi = "sreg+1";
755 } else if ((UsedRegs & REG_PTR1) == REG_NONE) {
757 Data.ZPHi = "ptr1+1";
758 } else if ((UsedRegs & REG_PTR2) == REG_NONE) {
760 Data.ZPHi = "ptr2+1";
762 /* No registers available */
767 /* Determine if we have a basic block */
769 PreCondOk = CS_IsBasicBlock (S, Push, I);
772 /* If preconditions are ok, call the optimizer function */
775 /* Adjust stack offsets */
776 Data.OpIndex = I + AdjustStackOffset (S, Push, I, 2);
778 /* Prepare the remainder of the data structure */
781 Data.PushIndex = Push;
782 Data.PrevEntry = CS_GetPrevEntry (S, Data.PushIndex);
783 Data.PushEntry = CS_GetEntry (S, Data.PushIndex);
785 Data.NextEntry = CS_GetNextEntry (S, Data.OpIndex);
787 /* Call the optimizer function */
788 Changes += F->Func (&Data);
790 /* Regenerate register info */
794 /* End of sequence */
797 } else if (strcmp (E->Arg, "pushax") == 0) {
798 /* Restart the sequence */
801 } else if (HarmlessCall (E->Arg)) {
802 /* Track zeropage register usage */
803 UsedRegs |= (E->Use | E->Chg);
805 /* A call to an unkown subroutine ends the sequence */
810 /* Other stuff: Track zeropage register usage */
811 UsedRegs |= (E->Use | E->Chg);
814 } else if (CE_IsCallTo (E, "pushax")) {
816 /* This starts a sequence */
828 /* Free the register info */
831 /* Return the number of changes made */