1 /*****************************************************************************/
5 /* Optimize operations that take operands via the stack */
9 /* (C) 2001-2002 Ullrich von Bassewitz */
11 /* D-70597 Stuttgart */
12 /* EMail: uz@cc65.org */
15 /* This software is provided 'as-is', without any expressed or implied */
16 /* warranty. In no event will the authors be held liable for any damages */
17 /* arising from the use of this software. */
19 /* Permission is granted to anyone to use this software for any purpose, */
20 /* including commercial applications, and to alter it and redistribute it */
21 /* freely, subject to the following restrictions: */
23 /* 1. The origin of this software must not be misrepresented; you must not */
24 /* claim that you wrote the original software. If you use this software */
25 /* in a product, an acknowledgment in the product documentation would be */
26 /* appreciated but is not required. */
27 /* 2. Altered source versions must be plainly marked as such, and must not */
28 /* be misrepresented as being the original software. */
29 /* 3. This notice may not be removed or altered from any source */
32 /*****************************************************************************/
45 /*****************************************************************************/
47 /*****************************************************************************/
51 /* Structure that holds the needed data */
52 typedef struct StackOpData StackOpData;
54 CodeSeg* Code; /* Pointer to code segment */
55 unsigned Flags; /* Flags to remember things */
56 unsigned PushIndex; /* Index of call to pushax in codeseg */
57 unsigned OpIndex; /* Index of actual operation */
58 CodeEntry* PrevEntry; /* Entry before the call to pushax */
59 CodeEntry* PushEntry; /* Pointer to entry with call to pushax */
60 CodeEntry* OpEntry; /* Pointer to entry with op */
61 CodeEntry* NextEntry; /* Entry after the op */
62 const char* ZPLo; /* Lo byte of zero page loc to use */
63 const char* ZPHi; /* Hi byte of zero page loc to use */
64 unsigned IP; /* Insertion point used by some routines */
67 /* Flags returned by DirectOp */
68 #define OP_DIRECT 0x01 /* Direct op may be used */
69 #define OP_ONSTACK 0x02 /* Operand is on stack */
73 /*****************************************************************************/
75 /*****************************************************************************/
79 static unsigned AdjustStackOffset (CodeSeg* S, unsigned Start, unsigned Stop,
81 /* Adjust the offset for all stack accesses in the range Start to Stop, both
82 * inclusive. The function returns the number of instructions that have been
86 /* Number of inserted instructions */
87 unsigned Inserted = 0;
89 /* Walk over all entries */
93 CodeEntry* E = CS_GetEntry (S, I);
95 int NeedCorrection = 0;
96 if ((E->Use & REG_SP) != 0) {
98 /* Check for some things that should not happen */
99 CHECK (E->AM == AM65_ZP_INDY || E->RI->In.RegY >= (short) Offs);
100 CHECK (strcmp (E->Arg, "sp") == 0);
102 /* We need to correct this one */
105 } else if (CE_IsCallTo (E, "ldaxysp")) {
107 /* We need to correct this one */
112 if (NeedCorrection) {
116 /* Get the code entry before this one. If it's a LDY, adjust the
119 P = CS_GetPrevEntry (S, I);
120 if (P && P->OPC == OP65_LDY && CE_KnownImm (P)) {
122 /* The Y load is just before the stack access, adjust it */
123 CE_SetNumArg (P, P->Num - Offs);
127 /* Insert a new load instruction before the stack access */
128 const char* Arg = MakeHexArg (E->RI->In.RegY - Offs);
129 CodeEntry* X = NewCodeEntry (OP65_LDY, AM65_IMM, Arg, 0, E->LI);
130 CS_InsertEntry (S, X, I);
132 /* One more inserted entries */
136 /* Be sure to skip the stack access for the next round */
147 /* Return the number of inserted entries */
153 static void InsertEntry (StackOpData* D, CodeEntry* E, unsigned Index)
154 /* Insert a new entry. Depending on Index, D->PushIndex and D->OpIndex will
155 * be adjusted by this function.
158 /* Insert the entry into the code segment */
159 CS_InsertEntry (D->Code, E, Index);
161 /* Adjust the indices if necessary */
162 if (D->PushEntry && Index <= D->PushIndex) {
165 if (D->OpEntry && Index <= D->OpIndex) {
172 static void DelEntry (StackOpData* D, unsigned Index)
173 /* Delete an entry. Depending on Index, D->PushIndex and D->OpIndex will be
174 * adjusted by this function, and PushEntry/OpEntry may get invalidated.
177 /* Delete the entry from the code segment */
178 CS_DelEntry (D->Code, Index);
180 /* Adjust the indices if necessary */
181 if (Index < D->PushIndex) {
183 } else if (Index == D->PushIndex) {
186 if (Index < D->OpIndex) {
188 } else if (Index == D->OpIndex) {
195 static void CheckDirectOp (StackOpData* D)
196 /* Check if the given entry is a lda instruction with an addressing mode
197 * that allows us to replace it by another operation (like ora). If so, we may
198 * use this location for the or and must not save the value in the zero
202 /* We need the entry before the push */
204 CHECK ((E = D->PrevEntry) != 0);
206 if (E->OPC == OP65_LDA) {
207 if (E->AM == AM65_IMM || E->AM == AM65_ZP || E->AM == AM65_ABS) {
208 /* These insns are all ok and replaceable */
209 D->Flags |= OP_DIRECT;
210 } else if (E->AM == AM65_ZP_INDY &&
211 RegValIsKnown (E->RI->In.RegY) &&
212 (E->Use & REG_SP) != 0) {
213 /* Load from stack with known offset is also ok */
214 D->Flags |= (OP_DIRECT | OP_ONSTACK);
221 static void ReplacePushByStore (StackOpData* D)
222 /* Replace the call to the push subroutine by a store into the zero page
223 * location (actually, the push is not replaced, because we need it for
224 * later, but the name is still ok since the push will get removed at the
225 * end of each routine.
230 /* Store the value into the zeropage instead of pushing it */
231 X = NewCodeEntry (OP65_STX, AM65_ZP, D->ZPHi, 0, D->PushEntry->LI);
232 InsertEntry (D, X, D->PushIndex+1);
233 if ((D->Flags & OP_DIRECT) == 0) {
234 X = NewCodeEntry (OP65_STA, AM65_ZP, D->ZPLo, 0, D->PushEntry->LI);
235 InsertEntry (D, X, D->PushIndex+1);
241 static void AddOpLow (StackOpData* D, opc_t OPC)
242 /* Add an op for the low byte of an operator. This function honours the
243 * OP_DIRECT and OP_ONSTACK flags and generates the necessary instructions.
244 * All code is inserted at the current insertion point.
249 if ((D->Flags & OP_DIRECT) != 0) {
250 /* Op with a variable location. If the location is on the stack, we
251 * need to reload the Y register.
253 if ((D->Flags & OP_ONSTACK) != 0) {
254 const char* Arg = MakeHexArg (D->PrevEntry->RI->In.RegY);
255 X = NewCodeEntry (OP65_LDY, AM65_IMM, Arg, 0, D->OpEntry->LI);
256 InsertEntry (D, X, D->IP++);
258 X = NewCodeEntry (OPC, D->PrevEntry->AM, D->PrevEntry->Arg, 0, D->OpEntry->LI);
260 /* Op with temp storage */
261 X = NewCodeEntry (OPC, AM65_ZP, D->ZPLo, 0, D->OpEntry->LI);
263 InsertEntry (D, X, D->IP++);
268 static void AddOpHigh (StackOpData* D, opc_t OPC)
269 /* Add an op for the high byte of an operator. Special cases (constant values
270 * or similar have to be checked separately, the function covers only the
271 * generic case. Code is inserted at the insertion point.
276 /* High byte is unknown */
277 X = NewCodeEntry (OP65_STA, AM65_ZP, D->ZPLo, 0, D->OpEntry->LI);
278 InsertEntry (D, X, D->IP++);
279 X = NewCodeEntry (OP65_TXA, AM65_IMP, 0, 0, D->OpEntry->LI);
280 InsertEntry (D, X, D->IP++);
281 X = NewCodeEntry (OPC, AM65_ZP, D->ZPHi, 0, D->OpEntry->LI);
282 InsertEntry (D, X, D->IP++);
283 X = NewCodeEntry (OP65_TAX, AM65_IMP, 0, 0, D->OpEntry->LI);
284 InsertEntry (D, X, D->IP++);
285 X = NewCodeEntry (OP65_LDA, AM65_ZP, D->ZPLo, 0, D->OpEntry->LI);
286 InsertEntry (D, X, D->IP++);
291 static void RemovePushAndOp (StackOpData* D)
292 /* Remove the call to pushax and the call to the operator subroutine */
294 DelEntry (D, D->OpIndex);
295 DelEntry (D, D->PushIndex);
300 /*****************************************************************************/
301 /* Actual optimization functions */
302 /*****************************************************************************/
306 static unsigned Opt_staspidx (StackOpData* D)
307 /* Optimize the staspidx sequence if possible */
311 /* Store the value into the zeropage instead of pushing it */
312 ReplacePushByStore (D);
314 /* Replace the store subroutine call by a direct op */
315 X = NewCodeEntry (OP65_STA, AM65_ZP_INDY, D->ZPLo, 0, D->OpEntry->LI);
316 InsertEntry (D, X, D->OpIndex+1);
318 /* Remove the push and the call to the staspidx function */
321 /* We changed the sequence */
327 static unsigned Opt_staxspidx (StackOpData* D)
328 /* Optimize the staxspidx sequence if possible */
332 /* Store the value into the zeropage instead of pushing it */
333 ReplacePushByStore (D);
335 /* Inline the store */
336 X = NewCodeEntry (OP65_STA, AM65_ZP_INDY, D->ZPLo, 0, D->OpEntry->LI);
337 InsertEntry (D, X, D->OpIndex+1);
338 X = NewCodeEntry (OP65_INY, AM65_IMP, 0, 0, D->OpEntry->LI);
339 InsertEntry (D, X, D->OpIndex+2);
340 if (RegValIsKnown (D->OpEntry->RI->In.RegX)) {
341 /* Value of X is known */
342 const char* Arg = MakeHexArg (D->OpEntry->RI->In.RegX);
343 X = NewCodeEntry (OP65_LDA, AM65_IMM, Arg, 0, D->OpEntry->LI);
346 X = NewCodeEntry (OP65_TXA, AM65_IMP, 0, 0, D->OpEntry->LI);
348 InsertEntry (D, X, D->OpIndex+3);
349 X = NewCodeEntry (OP65_STA, AM65_ZP_INDY, D->ZPLo, 0, D->OpEntry->LI);
350 InsertEntry (D, X, D->OpIndex+4);
352 /* Remove the push and the call to the staspidx function */
355 /* We changed the sequence */
361 static unsigned Opt_tosaddax (StackOpData* D)
362 /* Optimize the tosaddax sequence if possible */
367 /* We need the entry behind the add */
368 CHECK (D->NextEntry != 0);
370 /* Check the entry before the push. If it's a lda instruction with an
371 * addressing mode that allows us to replace it, we may use this
372 * location for the op and must not save the value in the zero page
377 /* Store the value into the zeropage instead of pushing it */
378 ReplacePushByStore (D);
381 D->IP = D->OpIndex+1;
382 X = NewCodeEntry (OP65_CLC, AM65_IMP, 0, 0, D->OpEntry->LI);
383 InsertEntry (D, X, D->IP++);
386 AddOpLow (D, OP65_ADC);
389 if (D->PushEntry->RI->In.RegX == 0) {
390 /* The high byte is the value in X plus the carry */
391 CodeLabel* L = CS_GenLabel (D->Code, D->NextEntry);
392 X = NewCodeEntry (OP65_BCC, AM65_BRA, L->Name, L, D->OpEntry->LI);
393 InsertEntry (D, X, D->IP++);
394 X = NewCodeEntry (OP65_INX, AM65_IMP, 0, 0, D->OpEntry->LI);
395 InsertEntry (D, X, D->IP++);
396 } else if (D->OpEntry->RI->In.RegX == 0) {
397 /* The high byte is that of the first operand plus carry */
399 if (RegValIsKnown (D->PushEntry->RI->In.RegX)) {
400 /* Value of first op high byte is known */
401 const char* Arg = MakeHexArg (D->PushEntry->RI->In.RegX);
402 X = NewCodeEntry (OP65_LDX, AM65_IMM, Arg, 0, D->OpEntry->LI);
404 /* Value of first op high byte is unknown */
405 X = NewCodeEntry (OP65_LDX, AM65_ZP, D->ZPHi, 0, D->OpEntry->LI);
407 InsertEntry (D, X, D->IP++);
408 L = CS_GenLabel (D->Code, D->NextEntry);
409 X = NewCodeEntry (OP65_BCC, AM65_BRA, L->Name, L, D->OpEntry->LI);
410 InsertEntry (D, X, D->IP++);
411 X = NewCodeEntry (OP65_INX, AM65_IMP, 0, 0, D->OpEntry->LI);
412 InsertEntry (D, X, D->IP++);
414 /* High byte is unknown */
415 AddOpHigh (D, OP65_ADC);
418 /* Remove the push and the call to the tosaddax function */
421 /* We changed the sequence */
427 static unsigned Opt_tosandax (StackOpData* D)
428 /* Optimize the tosandax sequence if possible */
432 /* Check the entry before the push. If it's a lda instruction with an
433 * addressing mode that allows us to replace it, we may use this
434 * location for the op and must not save the value in the zero page
439 /* Store the value into the zeropage instead of pushing it */
440 ReplacePushByStore (D);
442 /* Inline the and, low byte */
443 D->IP = D->OpIndex + 1;
444 AddOpLow (D, OP65_AND);
447 if (D->PushEntry->RI->In.RegX == 0 || D->OpEntry->RI->In.RegX == 0) {
448 /* The high byte is zero */
449 X = NewCodeEntry (OP65_LDX, AM65_IMM, "$00", 0, D->OpEntry->LI);
450 InsertEntry (D, X, D->IP++);
452 /* High byte is unknown */
453 AddOpHigh (D, OP65_AND);
456 /* Remove the push and the call to the tosandax function */
459 /* We changed the sequence */
465 static unsigned Opt_tosorax (StackOpData* D)
466 /* Optimize the tosorax sequence if possible */
470 /* Check the entry before the push. If it's a lda instruction with an
471 * addressing mode that allows us to replace it, we may use this
472 * location for the op and must not save the value in the zero page
477 /* Store the value into the zeropage instead of pushing it */
478 ReplacePushByStore (D);
480 /* Inline the or, low byte */
481 D->IP = D->OpIndex + 1;
482 AddOpLow (D, OP65_ORA);
485 if (RegValIsKnown (D->PushEntry->RI->In.RegX) &&
486 RegValIsKnown (D->OpEntry->RI->In.RegX)) {
487 /* Both values known, precalculate the result */
488 const char* Arg = MakeHexArg (D->PushEntry->RI->In.RegX | D->OpEntry->RI->In.RegX);
489 X = NewCodeEntry (OP65_LDX, AM65_IMM, Arg, 0, D->OpEntry->LI);
490 InsertEntry (D, X, D->IP++);
491 } else if (D->PushEntry->RI->In.RegX != 0) {
492 /* High byte is unknown */
493 AddOpHigh (D, OP65_ORA);
496 /* Remove the push and the call to the tosorax function */
499 /* We changed the sequence */
505 static unsigned Opt_tosxorax (StackOpData* D)
506 /* Optimize the tosxorax sequence if possible */
510 /* Check the entry before the push. If it's a lda instruction with an
511 * addressing mode that allows us to replace it, we may use this
512 * location for the op and must not save the value in the zero page
517 /* Store the value into the zeropage instead of pushing it */
518 ReplacePushByStore (D);
520 /* Inline the xor, low byte */
521 D->IP = D->OpIndex + 1;
522 AddOpLow (D, OP65_EOR);
525 if (RegValIsKnown (D->PushEntry->RI->In.RegX) &&
526 RegValIsKnown (D->OpEntry->RI->In.RegX)) {
527 /* Both values known, precalculate the result */
528 const char* Arg = MakeHexArg (D->PushEntry->RI->In.RegX ^ D->OpEntry->RI->In.RegX);
529 X = NewCodeEntry (OP65_LDX, AM65_IMM, Arg, 0, D->OpEntry->LI);
530 InsertEntry (D, X, D->IP++);
531 } else if (D->PushEntry->RI->In.RegX != 0) {
532 /* High byte is unknown */
533 AddOpHigh (D, OP65_EOR);
536 /* Remove the push and the call to the tosandax function */
539 /* We changed the sequence */
545 /*****************************************************************************/
547 /*****************************************************************************/
551 /* Flags for the functions */
553 STOP_NONE, /* Nothing special */
554 STOP_A_UNUSED /* Call only if a unused later */
558 typedef unsigned (*OptFunc) (StackOpData* D);
559 typedef struct OptFuncDesc OptFuncDesc;
561 const char* Name; /* Name of the replaced runtime function */
562 OptFunc Func; /* Function pointer */
563 STOP_FLAGS Flags; /* Flags */
566 static const OptFuncDesc FuncTable[] = {
567 { "staspidx", Opt_staspidx, STOP_NONE },
568 { "staxspidx", Opt_staxspidx, STOP_A_UNUSED },
569 { "tosaddax", Opt_tosaddax, STOP_NONE },
570 { "tosandax", Opt_tosandax, STOP_NONE },
571 { "tosorax", Opt_tosorax, STOP_NONE },
572 { "tosxorax", Opt_tosxorax, STOP_NONE },
574 #define FUNC_COUNT (sizeof(FuncTable) / sizeof(FuncTable[0]))
578 static int CmpFunc (const void* Key, const void* Func)
579 /* Compare function for bsearch */
581 return strcmp (Key, ((const OptFuncDesc*) Func)->Name);
586 static const OptFuncDesc* FindFunc (const char* Name)
587 /* Find the function with the given name. Return a pointer to the table entry
588 * or NULL if the function was not found.
591 return bsearch (Name, FuncTable, FUNC_COUNT, sizeof(OptFuncDesc), CmpFunc);
596 static int CmpHarmless (const void* Key, const void* Entry)
597 /* Compare function for bsearch */
599 return strcmp (Key, *(const char**)Entry);
604 static int HarmlessCall (const char* Name)
605 /* Check if this is a call to a harmless subroutine that will not interrupt
606 * the pushax/op sequence when encountered.
609 static const char* Tab[] = {
613 void* R = bsearch (Name,
615 sizeof (Tab) / sizeof (Tab[0]),
623 /*****************************************************************************/
625 /*****************************************************************************/
629 unsigned OptStackOps (CodeSeg* S)
630 /* Optimize operations that take operands via the stack */
632 unsigned Changes = 0; /* Number of changes in one run */
633 int InSeq = 0; /* Inside a sequence */
634 unsigned Push = 0; /* Index of pushax */
635 unsigned UsedRegs = 0; /* Zeropage registers used in sequence */
639 /* Generate register info */
642 /* Look for a call to pushax followed by a call to some other function
643 * that takes it's first argument on the stack, and the second argument
644 * in the primary register.
645 * It depends on the code between the two if we can handle/transform the
646 * sequence, so check this code for the following list of things:
648 * - the range must be a basic block (one entry, one exit)
649 * - there may not be accesses to local variables with unknown
650 * offsets (because we have to adjust these offsets).
651 * - no subroutine calls
654 * Since we need a zero page register later, do also check the
655 * intermediate code for zero page use.
658 while (I < CS_GetEntryCount (S)) {
660 /* Get the next entry */
661 CodeEntry* E = CS_GetEntry (S, I);
663 /* Handling depends if we're inside a sequence or not */
666 if (((E->Use & REG_SP) != 0 &&
667 (E->AM != AM65_ZP_INDY || RegValIsUnknown (E->RI->In.RegY)))) {
669 /* All this stuff is not allowed in a sequence */
672 } else if (E->OPC == OP65_JSR) {
674 /* Subroutine call: Check if this is one of our functions */
675 const OptFuncDesc* F = FindFunc (E->Arg);
681 /* Check the flags */
682 if (F->Flags & STOP_A_UNUSED) {
683 /* a must be unused later */
684 if (RegAUsed (S, I+1)) {
685 /* Cannot optimize */
690 /* Determine the zero page locations to use */
692 UsedRegs |= GetRegInfo (S, I+1, REG_SREG | REG_PTR1 | REG_PTR2);
693 if ((UsedRegs & REG_SREG) == REG_NONE) {
694 /* SREG is available */
696 Data.ZPHi = "sreg+1";
697 } else if ((UsedRegs & REG_PTR1) == REG_NONE) {
699 Data.ZPHi = "ptr1+1";
700 } else if ((UsedRegs & REG_PTR2) == REG_NONE) {
702 Data.ZPHi = "ptr2+1";
704 /* No registers available */
709 /* Determine if we have a basic block */
711 PreCondOk = CS_IsBasicBlock (S, Push, I);
714 /* If preconditions are ok, call the optimizer function */
717 /* Adjust stack offsets */
718 Data.OpIndex = I + AdjustStackOffset (S, Push, I, 2);
720 /* Prepare the remainder of the data structure */
723 Data.PushIndex = Push;
724 Data.PrevEntry = CS_GetPrevEntry (S, Data.PushIndex);
725 Data.PushEntry = CS_GetEntry (S, Data.PushIndex);
727 Data.NextEntry = CS_GetNextEntry (S, Data.OpIndex);
729 /* Call the optimizer function */
730 Changes += F->Func (&Data);
732 /* Regenerate register info */
736 /* End of sequence */
739 } else if (strcmp (E->Arg, "pushax") == 0) {
740 /* Restart the sequence */
743 } else if (!HarmlessCall (E->Arg)) {
744 /* A call to an unkown subroutine ends the sequence */
750 /* Other stuff: Track zeropage register usage */
751 UsedRegs |= (E->Use | E->Chg);
755 } else if (CE_IsCallTo (E, "pushax")) {
757 /* This starts a sequence */
769 /* Free the register info */
772 /* Return the number of changes made */