1 /*****************************************************************************/
5 /* Optimize operations that take operands via the stack */
9 /* (C) 2001-2004 Ullrich von Bassewitz */
10 /* Römerstrasse 52 */
11 /* D-70794 Filderstadt */
12 /* EMail: uz@cc65.org */
15 /* This software is provided 'as-is', without any expressed or implied */
16 /* warranty. In no event will the authors be held liable for any damages */
17 /* arising from the use of this software. */
19 /* Permission is granted to anyone to use this software for any purpose, */
20 /* including commercial applications, and to alter it and redistribute it */
21 /* freely, subject to the following restrictions: */
23 /* 1. The origin of this software must not be misrepresented; you must not */
24 /* claim that you wrote the original software. If you use this software */
25 /* in a product, an acknowledgment in the product documentation would be */
26 /* appreciated but is not required. */
27 /* 2. Altered source versions must be plainly marked as such, and must not */
28 /* be misrepresented as being the original software. */
29 /* 3. This notice may not be removed or altered from any source */
32 /*****************************************************************************/
48 /*****************************************************************************/
50 /*****************************************************************************/
54 /* Flags for the functions */
56 STOP_NONE = 0x00, /* Nothing special */
57 STOP_A_UNUSED = 0x01, /* Call only if a unused later */
58 STOP_A_KNOWN = 0x02, /* Call only if A is known */
59 STOP_X_ZERO = 0x04 /* Call only if X is zero */
62 /* Structure forward decl */
63 typedef struct StackOpData StackOpData;
65 /* Structure that describes an optimizer subfunction for a specific op */
66 typedef unsigned (*OptFunc) (StackOpData* D);
67 typedef struct OptFuncDesc OptFuncDesc;
69 const char* Name; /* Name of the replaced runtime function */
70 OptFunc Func; /* Function pointer */
71 STOP_FLAGS Flags; /* Flags */
74 /* Structure that holds the needed data */
76 CodeSeg* Code; /* Pointer to code segment */
77 unsigned Flags; /* Flags to remember things */
79 /* Pointer to optimizer subfunction description */
80 const OptFuncDesc* OptFunc;
82 /* ZP register usage inside the sequence */
85 /* Several indices if insns in the code segment */
86 int LoadAIndex; /* Index of load insns, -1 = invalid */
89 int PushIndex; /* Index of call to pushax in codeseg */
90 int OpIndex; /* Index of actual operation */
92 CodeEntry* PrevEntry; /* Entry before the call to pushax */
93 CodeEntry* PushEntry; /* Pointer to entry with call to pushax */
94 CodeEntry* OpEntry; /* Pointer to entry with op */
95 CodeEntry* NextEntry; /* Entry after the op */
96 const char* ZPLo; /* Lo byte of zero page loc to use */
97 const char* ZPHi; /* Hi byte of zero page loc to use */
98 unsigned IP; /* Insertion point used by some routines */
101 /* Flags returned by DirectOp */
102 #define OP_DIRECT 0x01 /* Direct op may be used */
103 #define OP_RELOAD_Y 0x02 /* Must reload index register Y */
107 /*****************************************************************************/
109 /*****************************************************************************/
113 static void AdjustStackOffset (StackOpData* D, unsigned Offs)
114 /* Adjust the offset for all stack accesses in the range PushIndex to OpIndex.
115 * OpIndex is adjusted according to the insertions.
118 /* Walk over all entries */
119 int I = D->PushIndex + 1;
120 while (I < D->OpIndex) {
122 CodeEntry* E = CS_GetEntry (D->Code, I);
124 int NeedCorrection = 0;
125 if ((E->Use & REG_SP) != 0) {
127 /* Check for some things that should not happen */
128 CHECK (E->AM == AM65_ZP_INDY || E->RI->In.RegY >= (short) Offs);
129 CHECK (strcmp (E->Arg, "sp") == 0);
131 /* We need to correct this one */
134 } else if (CE_IsCallTo (E, "ldaxysp")) {
136 /* We need to correct this one */
141 if (NeedCorrection) {
143 /* Get the code entry before this one. If it's a LDY, adjust the
146 CodeEntry* P = CS_GetPrevEntry (D->Code, I);
147 if (P && P->OPC == OP65_LDY && CE_IsConstImm (P)) {
149 /* The Y load is just before the stack access, adjust it */
150 CE_SetNumArg (P, P->Num - Offs);
154 /* Insert a new load instruction before the stack access */
155 const char* Arg = MakeHexArg (E->RI->In.RegY - Offs);
156 CodeEntry* X = NewCodeEntry (OP65_LDY, AM65_IMM, Arg, 0, E->LI);
157 CS_InsertEntry (D->Code, X, I++);
159 /* One more inserted entries */
164 /* If we need the value of Y later, be sure to reload it */
165 if (RegYUsed (D->Code, I+1)) {
166 const char* Arg = MakeHexArg (E->RI->In.RegY);
167 CodeEntry* X = NewCodeEntry (OP65_LDY, AM65_IMM, Arg, 0, E->LI);
168 CS_InsertEntry (D->Code, X, I+1);
170 /* One more inserted entries */
173 /* Skip this instruction in the next round */
185 static void InsertEntry (StackOpData* D, CodeEntry* E, int Index)
186 /* Insert a new entry. Depending on Index, D->PushIndex and D->OpIndex will
187 * be adjusted by this function.
190 /* Insert the entry into the code segment */
191 CS_InsertEntry (D->Code, E, Index);
193 /* Adjust the indices if necessary */
194 if (D->PushEntry && Index <= D->PushIndex) {
197 if (D->OpEntry && Index <= D->OpIndex) {
204 static void DelEntry (StackOpData* D, int Index)
205 /* Delete an entry. Depending on Index, D->PushIndex and D->OpIndex will be
206 * adjusted by this function, and PushEntry/OpEntry may get invalidated.
209 /* Delete the entry from the code segment */
210 CS_DelEntry (D->Code, Index);
212 /* Adjust the indices if necessary */
213 if (Index < D->PushIndex) {
215 } else if (Index == D->PushIndex) {
218 if (Index < D->OpIndex) {
220 } else if (Index == D->OpIndex) {
227 static void CheckDirectOp (StackOpData* D)
228 /* Check if the given entry is a lda instruction with an addressing mode
229 * that allows us to replace it by another operation (like ora). If so, we may
230 * use this location for the or and must not save the value in the zero
234 /* We need the entry before the push */
236 CHECK ((E = D->PrevEntry) != 0);
238 if (E->OPC == OP65_LDA) {
239 if (E->AM == AM65_IMM || E->AM == AM65_ZP || E->AM == AM65_ABS) {
240 /* These insns are all ok and replaceable */
241 D->Flags |= OP_DIRECT;
242 } else if (E->AM == AM65_ZP_INDY && RegValIsKnown (E->RI->In.RegY) &&
243 strcmp (E->Arg, "sp") == 0) {
244 /* A load from the stack with known offset is also ok, but in this
245 * case we must reload the index register later. Please note that
246 * a load indirect via other zero page locations is not ok, since
247 * these locations may change between the push and the actual
250 D->Flags |= (OP_DIRECT | OP_RELOAD_Y);
257 static void ReplacePushByStore (StackOpData* D)
258 /* Replace the call to the push subroutine by a store into the zero page
259 * location (actually, the push is not replaced, because we need it for
260 * later, but the name is still ok since the push will get removed at the
261 * end of each routine).
266 /* Store the value into the zeropage instead of pushing it */
267 X = NewCodeEntry (OP65_STX, AM65_ZP, D->ZPHi, 0, D->PushEntry->LI);
268 InsertEntry (D, X, D->PushIndex+1);
269 if ((D->Flags & OP_DIRECT) == 0) {
270 X = NewCodeEntry (OP65_STA, AM65_ZP, D->ZPLo, 0, D->PushEntry->LI);
271 InsertEntry (D, X, D->PushIndex+1);
277 static void AddOpLow (StackOpData* D, opc_t OPC)
278 /* Add an op for the low byte of an operator. This function honours the
279 * OP_DIRECT and OP_RELOAD_Y flags and generates the necessary instructions.
280 * All code is inserted at the current insertion point.
285 if ((D->Flags & OP_DIRECT) != 0) {
286 /* Op with a variable location. If the location is on the stack, we
287 * need to reload the Y register.
289 if ((D->Flags & OP_RELOAD_Y) != 0) {
290 const char* Arg = MakeHexArg (D->PrevEntry->RI->In.RegY);
291 X = NewCodeEntry (OP65_LDY, AM65_IMM, Arg, 0, D->OpEntry->LI);
292 InsertEntry (D, X, D->IP++);
294 X = NewCodeEntry (OPC, D->PrevEntry->AM, D->PrevEntry->Arg, 0, D->OpEntry->LI);
296 /* Op with temp storage */
297 X = NewCodeEntry (OPC, AM65_ZP, D->ZPLo, 0, D->OpEntry->LI);
299 InsertEntry (D, X, D->IP++);
304 static void AddOpHigh (StackOpData* D, opc_t OPC)
305 /* Add an op for the high byte of an operator. Special cases (constant values
306 * or similar) have to be checked separately, the function covers only the
307 * generic case. Code is inserted at the insertion point.
312 /* High byte is unknown */
313 X = NewCodeEntry (OP65_STA, AM65_ZP, D->ZPLo, 0, D->OpEntry->LI);
314 InsertEntry (D, X, D->IP++);
315 X = NewCodeEntry (OP65_TXA, AM65_IMP, 0, 0, D->OpEntry->LI);
316 InsertEntry (D, X, D->IP++);
317 X = NewCodeEntry (OPC, AM65_ZP, D->ZPHi, 0, D->OpEntry->LI);
318 InsertEntry (D, X, D->IP++);
319 X = NewCodeEntry (OP65_TAX, AM65_IMP, 0, 0, D->OpEntry->LI);
320 InsertEntry (D, X, D->IP++);
321 X = NewCodeEntry (OP65_LDA, AM65_ZP, D->ZPLo, 0, D->OpEntry->LI);
322 InsertEntry (D, X, D->IP++);
327 static void RemovePushAndOp (StackOpData* D)
328 /* Remove the call to pushax and the call to the operator subroutine */
330 DelEntry (D, D->OpIndex);
331 DelEntry (D, D->PushIndex);
336 static int IsRegVar (StackOpData* D)
337 /* If the value pushed is that of a register variable, replace ZPLo and ZPHi
338 * in the given StackOpData struct by the register variables and return true.
339 * Otherwise leave D untouched and return false.
344 if (D->PushIndex >= 2 &&
345 (P = D->PrevEntry) != 0 &&
346 P->OPC == OP65_LDX &&
348 strncmp (P->Arg, "regbank+", 7) == 0 &&
349 IsDigit (P->Arg[8]) &&
350 (P = CS_GetEntry (D->Code, D->PushIndex-2)) != 0 &&
351 P->OPC == OP65_LDA &&
353 strncmp (P->Arg, "regbank+", 7) == 0 &&
354 IsDigit (P->Arg[8])) {
355 /* Ok, it loads the register variable */
356 D->ZPHi = D->PrevEntry->Arg;
366 /*****************************************************************************/
367 /* Actual optimization functions */
368 /*****************************************************************************/
372 static unsigned Opt___bzero (StackOpData* D)
373 /* Optimize the __bzero sequence if possible */
379 /* Check if we're using a register variable */
381 /* Store the value into the zeropage instead of pushing it */
382 ReplacePushByStore (D);
385 /* If the return value of __bzero is used, we have to add code to reload
386 * a/x from the pointer variable.
388 if (RegAXUsed (D->Code, D->OpIndex+1)) {
389 X = NewCodeEntry (OP65_LDA, AM65_ZP, D->ZPLo, 0, D->OpEntry->LI);
390 InsertEntry (D, X, D->OpIndex+1);
391 X = NewCodeEntry (OP65_LDX, AM65_ZP, D->ZPHi, 0, D->OpEntry->LI);
392 InsertEntry (D, X, D->OpIndex+2);
395 /* X is always zero, A contains the size of the data area to zero.
396 * Note: A may be zero, in which case the operation is null op.
398 if (D->OpEntry->RI->In.RegA != 0) {
400 /* The value of A is known */
401 if (D->OpEntry->RI->In.RegA <= 0x81) {
403 /* Loop using the sign bit */
404 X = NewCodeEntry (OP65_LDA, AM65_IMM, "$00", 0, D->OpEntry->LI);
405 InsertEntry (D, X, D->OpIndex+1);
407 Arg = MakeHexArg (D->OpEntry->RI->In.RegA - 1);
408 X = NewCodeEntry (OP65_LDY, AM65_IMM, Arg, 0, D->OpEntry->LI);
409 InsertEntry (D, X, D->OpIndex+2);
411 X = NewCodeEntry (OP65_STA, AM65_ZP_INDY, D->ZPLo, 0, D->OpEntry->LI);
412 InsertEntry (D, X, D->OpIndex+3);
413 L = CS_GenLabel (D->Code, X);
415 X = NewCodeEntry (OP65_DEY, AM65_IMP, 0, 0, D->OpEntry->LI);
416 InsertEntry (D, X, D->OpIndex+4);
418 X = NewCodeEntry (OP65_BPL, AM65_BRA, L->Name, L, D->OpEntry->LI);
419 InsertEntry (D, X, D->OpIndex+5);
423 /* Loop using an explicit compare */
424 X = NewCodeEntry (OP65_LDA, AM65_IMM, "$00", 0, D->OpEntry->LI);
425 InsertEntry (D, X, D->OpIndex+1);
427 X = NewCodeEntry (OP65_LDY, AM65_IMM, "$00", 0, D->OpEntry->LI);
428 InsertEntry (D, X, D->OpIndex+2);
430 X = NewCodeEntry (OP65_STA, AM65_ZP_INDY, D->ZPLo, 0, D->OpEntry->LI);
431 InsertEntry (D, X, D->OpIndex+3);
432 L = CS_GenLabel (D->Code, X);
434 X = NewCodeEntry (OP65_INY, AM65_IMP, 0, 0, D->OpEntry->LI);
435 InsertEntry (D, X, D->OpIndex+4);
437 Arg = MakeHexArg (D->OpEntry->RI->In.RegA);
438 X = NewCodeEntry (OP65_CPY, AM65_IMM, Arg, 0, D->OpEntry->LI);
439 InsertEntry (D, X, D->OpIndex+5);
441 X = NewCodeEntry (OP65_BPL, AM65_BRA, L->Name, L, D->OpEntry->LI);
442 InsertEntry (D, X, D->OpIndex+6);
447 /* Remove the push and the call to the __bzero function */
450 /* We changed the sequence */
456 static unsigned Opt_staspidx (StackOpData* D)
457 /* Optimize the staspidx sequence if possible */
461 /* Check if we're using a register variable */
463 /* Store the value into the zeropage instead of pushing it */
464 ReplacePushByStore (D);
467 /* Replace the store subroutine call by a direct op */
468 X = NewCodeEntry (OP65_STA, AM65_ZP_INDY, D->ZPLo, 0, D->OpEntry->LI);
469 InsertEntry (D, X, D->OpIndex+1);
471 /* Remove the push and the call to the staspidx function */
474 /* We changed the sequence */
480 static unsigned Opt_staxspidx (StackOpData* D)
481 /* Optimize the staxspidx sequence if possible */
485 /* Check if we're using a register variable */
487 /* Store the value into the zeropage instead of pushing it */
488 ReplacePushByStore (D);
491 /* Inline the store */
492 X = NewCodeEntry (OP65_STA, AM65_ZP_INDY, D->ZPLo, 0, D->OpEntry->LI);
493 InsertEntry (D, X, D->OpIndex+1);
494 if (RegValIsKnown (D->OpEntry->RI->In.RegY)) {
495 /* Value of Y is known */
496 const char* Arg = MakeHexArg (D->OpEntry->RI->In.RegY + 1);
497 X = NewCodeEntry (OP65_LDY, AM65_IMM, Arg, 0, D->OpEntry->LI);
499 X = NewCodeEntry (OP65_INY, AM65_IMP, 0, 0, D->OpEntry->LI);
501 InsertEntry (D, X, D->OpIndex+2);
502 if (RegValIsKnown (D->OpEntry->RI->In.RegX)) {
503 /* Value of X is known */
504 const char* Arg = MakeHexArg (D->OpEntry->RI->In.RegX);
505 X = NewCodeEntry (OP65_LDA, AM65_IMM, Arg, 0, D->OpEntry->LI);
508 X = NewCodeEntry (OP65_TXA, AM65_IMP, 0, 0, D->OpEntry->LI);
510 InsertEntry (D, X, D->OpIndex+3);
511 X = NewCodeEntry (OP65_STA, AM65_ZP_INDY, D->ZPLo, 0, D->OpEntry->LI);
512 InsertEntry (D, X, D->OpIndex+4);
514 /* Remove the push and the call to the staxspidx function */
517 /* We changed the sequence */
523 static unsigned Opt_tosaddax (StackOpData* D)
524 /* Optimize the tosaddax sequence if possible */
529 /* We need the entry behind the add */
530 CHECK (D->NextEntry != 0);
532 /* Check if the X register is known and zero when the add is done, and
533 * if the add is followed by
536 * jsr ldauidx ; or ldaidx
538 * If this is true, the addition does actually add an offset to a pointer
539 * before it is dereferenced. Since both subroutines take an offset in Y,
540 * we can pass the offset (instead of #$00) and remove the addition
543 if (D->OpEntry->RI->In.RegX == 0 &&
544 D->NextEntry->OPC == OP65_LDY &&
545 CE_IsKnownImm (D->NextEntry, 0) &&
546 !CE_HasLabel (D->NextEntry) &&
547 (N = CS_GetNextEntry (D->Code, D->OpIndex + 1)) != 0 &&
548 (CE_IsCallTo (N, "ldauidx") ||
549 CE_IsCallTo (N, "ldaidx"))) {
551 int Signed = (strcmp (N->Arg, "ldaidx") == 0);
553 /* Store the value into the zeropage instead of pushing it */
554 ReplacePushByStore (D);
556 /* Replace the ldy by a tay. Be sure to create the new entry before
557 * deleting the ldy, since we will reference the line info from this
560 X = NewCodeEntry (OP65_TAY, AM65_IMP, 0, 0, D->NextEntry->LI);
561 DelEntry (D, D->OpIndex + 1);
562 InsertEntry (D, X, D->OpIndex + 1);
564 /* Replace the call to ldaidx/ldauidx. Since X is already zero, and
565 * the ptr is in the zero page location, we just need to load from
566 * the pointer, and fix X in case of ldaidx.
568 X = NewCodeEntry (OP65_LDA, AM65_ZP_INDY, D->ZPLo, 0, N->LI);
569 DelEntry (D, D->OpIndex + 2);
570 InsertEntry (D, X, D->OpIndex + 2);
575 /* Add sign extension - N is unused now */
576 N = CS_GetNextEntry (D->Code, D->OpIndex + 2);
578 L = CS_GenLabel (D->Code, N);
580 X = NewCodeEntry (OP65_BPL, AM65_BRA, L->Name, L, X->LI);
581 InsertEntry (D, X, D->OpIndex + 3);
583 X = NewCodeEntry (OP65_DEX, AM65_IMP, 0, 0, X->LI);
584 InsertEntry (D, X, D->OpIndex + 4);
589 /* Check the entry before the push. If it's a lda instruction with an
590 * addressing mode that allows us to replace it, we may use this
591 * location for the op and must not save the value in the zero page
596 /* Store the value into the zeropage instead of pushing it */
597 ReplacePushByStore (D);
600 D->IP = D->OpIndex+1;
601 X = NewCodeEntry (OP65_CLC, AM65_IMP, 0, 0, D->OpEntry->LI);
602 InsertEntry (D, X, D->IP++);
605 AddOpLow (D, OP65_ADC);
608 if (D->PushEntry->RI->In.RegX == 0) {
609 /* The high byte is the value in X plus the carry */
610 CodeLabel* L = CS_GenLabel (D->Code, D->NextEntry);
611 X = NewCodeEntry (OP65_BCC, AM65_BRA, L->Name, L, D->OpEntry->LI);
612 InsertEntry (D, X, D->IP++);
613 X = NewCodeEntry (OP65_INX, AM65_IMP, 0, 0, D->OpEntry->LI);
614 InsertEntry (D, X, D->IP++);
615 } else if (D->OpEntry->RI->In.RegX == 0) {
616 /* The high byte is that of the first operand plus carry */
618 if (RegValIsKnown (D->PushEntry->RI->In.RegX)) {
619 /* Value of first op high byte is known */
620 const char* Arg = MakeHexArg (D->PushEntry->RI->In.RegX);
621 X = NewCodeEntry (OP65_LDX, AM65_IMM, Arg, 0, D->OpEntry->LI);
623 /* Value of first op high byte is unknown */
624 X = NewCodeEntry (OP65_LDX, AM65_ZP, D->ZPHi, 0, D->OpEntry->LI);
626 InsertEntry (D, X, D->IP++);
627 L = CS_GenLabel (D->Code, D->NextEntry);
628 X = NewCodeEntry (OP65_BCC, AM65_BRA, L->Name, L, D->OpEntry->LI);
629 InsertEntry (D, X, D->IP++);
630 X = NewCodeEntry (OP65_INX, AM65_IMP, 0, 0, D->OpEntry->LI);
631 InsertEntry (D, X, D->IP++);
633 /* High byte is unknown */
634 AddOpHigh (D, OP65_ADC);
638 /* Remove the push and the call to the tosaddax function */
641 /* We changed the sequence */
647 static unsigned Opt_tosandax (StackOpData* D)
648 /* Optimize the tosandax sequence if possible */
652 /* Check the entry before the push. If it's a lda instruction with an
653 * addressing mode that allows us to replace it, we may use this
654 * location for the op and must not save the value in the zero page
659 /* Store the value into the zeropage instead of pushing it */
660 ReplacePushByStore (D);
662 /* Inline the and, low byte */
663 D->IP = D->OpIndex + 1;
664 AddOpLow (D, OP65_AND);
667 if (D->PushEntry->RI->In.RegX == 0 || D->OpEntry->RI->In.RegX == 0) {
668 /* The high byte is zero */
669 X = NewCodeEntry (OP65_LDX, AM65_IMM, "$00", 0, D->OpEntry->LI);
670 InsertEntry (D, X, D->IP++);
672 /* High byte is unknown */
673 AddOpHigh (D, OP65_AND);
676 /* Remove the push and the call to the tosandax function */
679 /* We changed the sequence */
685 static unsigned Opt_tosorax (StackOpData* D)
686 /* Optimize the tosorax sequence if possible */
690 /* Check the entry before the push. If it's a lda instruction with an
691 * addressing mode that allows us to replace it, we may use this
692 * location for the op and must not save the value in the zero page
697 /* Store the value into the zeropage instead of pushing it */
698 ReplacePushByStore (D);
700 /* Inline the or, low byte */
701 D->IP = D->OpIndex + 1;
702 AddOpLow (D, OP65_ORA);
705 if (RegValIsKnown (D->PushEntry->RI->In.RegX) &&
706 RegValIsKnown (D->OpEntry->RI->In.RegX)) {
707 /* Both values known, precalculate the result */
708 unsigned char Result = D->PushEntry->RI->In.RegX | D->OpEntry->RI->In.RegX;
709 const char* Arg = MakeHexArg (Result);
710 X = NewCodeEntry (OP65_LDX, AM65_IMM, Arg, 0, D->OpEntry->LI);
711 InsertEntry (D, X, D->IP++);
712 } else if (D->PushEntry->RI->In.RegX != 0) {
713 /* High byte is unknown */
714 AddOpHigh (D, OP65_ORA);
717 /* Remove the push and the call to the tosorax function */
720 /* We changed the sequence */
726 static unsigned Opt_tosxorax (StackOpData* D)
727 /* Optimize the tosxorax sequence if possible */
731 /* Check the entry before the push. If it's a lda instruction with an
732 * addressing mode that allows us to replace it, we may use this
733 * location for the op and must not save the value in the zero page
738 /* Store the value into the zeropage instead of pushing it */
739 ReplacePushByStore (D);
741 /* Inline the xor, low byte */
742 D->IP = D->OpIndex + 1;
743 AddOpLow (D, OP65_EOR);
746 if (RegValIsKnown (D->PushEntry->RI->In.RegX) &&
747 RegValIsKnown (D->OpEntry->RI->In.RegX)) {
748 /* Both values known, precalculate the result */
749 const char* Arg = MakeHexArg (D->PushEntry->RI->In.RegX ^ D->OpEntry->RI->In.RegX);
750 X = NewCodeEntry (OP65_LDX, AM65_IMM, Arg, 0, D->OpEntry->LI);
751 InsertEntry (D, X, D->IP++);
752 } else if (D->PushEntry->RI->In.RegX != 0) {
753 /* High byte is unknown */
754 AddOpHigh (D, OP65_EOR);
757 /* Remove the push and the call to the tosandax function */
760 /* We changed the sequence */
766 /*****************************************************************************/
768 /*****************************************************************************/
772 static const OptFuncDesc FuncTable[] = {
773 { "__bzero", Opt___bzero, STOP_X_ZERO | STOP_A_KNOWN },
774 { "staspidx", Opt_staspidx, STOP_NONE },
775 { "staxspidx", Opt_staxspidx, STOP_A_UNUSED },
776 { "tosaddax", Opt_tosaddax, STOP_NONE },
777 { "tosandax", Opt_tosandax, STOP_NONE },
778 { "tosorax", Opt_tosorax, STOP_NONE },
779 { "tosxorax", Opt_tosxorax, STOP_NONE },
781 #define FUNC_COUNT (sizeof(FuncTable) / sizeof(FuncTable[0]))
785 static int CmpFunc (const void* Key, const void* Func)
786 /* Compare function for bsearch */
788 return strcmp (Key, ((const OptFuncDesc*) Func)->Name);
793 static const OptFuncDesc* FindFunc (const char* Name)
794 /* Find the function with the given name. Return a pointer to the table entry
795 * or NULL if the function was not found.
798 return bsearch (Name, FuncTable, FUNC_COUNT, sizeof(OptFuncDesc), CmpFunc);
803 static int CmpHarmless (const void* Key, const void* Entry)
804 /* Compare function for bsearch */
806 return strcmp (Key, *(const char**)Entry);
811 static int HarmlessCall (const char* Name)
812 /* Check if this is a call to a harmless subroutine that will not interrupt
813 * the pushax/op sequence when encountered.
816 static const char* Tab[] = {
830 void* R = bsearch (Name,
832 sizeof (Tab) / sizeof (Tab[0]),
840 static void ResetStackOpData (StackOpData* Data)
841 /* Reset the given data structure */
846 Data->LoadAIndex = -1;
847 Data->LoadXIndex = -1;
848 Data->LoadYIndex = -1;
849 Data->PushIndex = -1;
852 Data->UsedRegs = REG_NONE;
857 static int PreCondOk (StackOpData* D)
858 /* Check if the preconditions for a call to the optimizer subfunction are
859 * satisfied. As a side effect, this function will also choose the zero page
863 /* Check the flags */
864 if ((D->OptFunc->Flags & STOP_A_UNUSED) != 0 &&
865 RegAUsed (D->Code, D->OpIndex+1)) {
866 /* Cannot optimize */
868 } else if ((D->OptFunc->Flags & STOP_A_KNOWN) != 0 &&
869 RegValIsUnknown (D->OpEntry->RI->In.RegA)) {
870 /* Cannot optimize */
872 } else if ((D->OptFunc->Flags & STOP_X_ZERO) != 0 &&
873 D->OpEntry->RI->In.RegX != 0) {
874 /* Cannot optimize */
878 /* Determine the zero page locations to use */
879 if ((D->UsedRegs & REG_SREG) == REG_NONE) {
882 } else if ((D->UsedRegs & REG_PTR1) == REG_NONE) {
885 } else if ((D->UsedRegs & REG_PTR2) == REG_NONE) {
889 /* No registers available */
893 /* Determine if we have a basic block */
894 return CS_IsBasicBlock (D->Code, D->PushIndex, D->OpIndex);
899 /*****************************************************************************/
901 /*****************************************************************************/
905 unsigned OptStackOps (CodeSeg* S)
906 /* Optimize operations that take operands via the stack */
908 unsigned Changes = 0; /* Number of changes in one run */
919 /* Generate register info */
924 ResetStackOpData (&Data);
926 /* Look for a call to pushax followed by a call to some other function
927 * that takes it's first argument on the stack, and the second argument
928 * in the primary register.
929 * It depends on the code between the two if we can handle/transform the
930 * sequence, so check this code for the following list of things:
932 * - the range must be a basic block (one entry, one exit)
933 * - there may not be accesses to local variables with unknown
934 * offsets (because we have to adjust these offsets).
935 * - no subroutine calls
938 * Since we need a zero page register later, do also check the
939 * intermediate code for zero page use.
942 while (I < CS_GetEntryCount (S)) {
944 /* Get the next entry */
945 CodeEntry* E = CS_GetEntry (S, I);
947 /* Actions depend on state */
951 /* While searching, track register load insns, so we can tell
952 * what is in a register once pushax is encountered.
954 if (CE_IsCallTo (E, "pushax")) {
957 } else if (E->Info & OF_LOAD) {
958 if (E->Chg & REG_A) {
961 if (E->Chg & REG_X) {
964 if (E->Chg & REG_Y) {
967 } else if (E->Info & OF_XFR) {
969 case OP65_TAX: Data.LoadXIndex = Data.LoadAIndex; break;
970 case OP65_TAY: Data.LoadYIndex = Data.LoadAIndex; break;
971 case OP65_TXA: Data.LoadAIndex = Data.LoadXIndex; break;
972 case OP65_TYA: Data.LoadAIndex = Data.LoadYIndex; break;
976 if (E->Chg & REG_A) {
977 Data.LoadAIndex = -1;
979 if (E->Chg & REG_X) {
980 Data.LoadXIndex = -1;
982 if (E->Chg & REG_Y) {
983 Data.LoadYIndex = -1;
989 /* We' found a pushax before. Search for a stack op that may
990 * follow and in the meantime, track zeropage usage and check
991 * for code that will disable us from translating the sequence.
993 if (E->OPC == OP65_JSR) {
995 /* Subroutine call: Check if this is one of the functions,
996 * we're going to replace.
998 Data.OptFunc = FindFunc (E->Arg);
1000 /* Remember the op index and go on */
1005 } else if (HarmlessCall (E->Arg)) {
1006 /* Track zeropage register usage */
1007 Data.UsedRegs |= (E->Use | E->Chg);
1009 /* A call to an unkown subroutine: We need to start
1010 * over after the last pushax. Note: This will also
1011 * happen if we encounter a call to pushax!
1014 ResetStackOpData (&Data);
1019 } else if ((E->Use & REG_SP) != 0 &&
1020 (E->AM != AM65_ZP_INDY || RegValIsUnknown (E->RI->In.RegY) ||
1021 E->RI->In.RegY < 2)) {
1023 /* If we are using the stack, and we don't have "indirect Y"
1024 * addressing mode, or the value of Y is unknown, or less
1025 * than two, we cannot cope with this piece of code. Having
1026 * an unknown value of Y means that we cannot correct the
1027 * stack offset, while having an offset less than two means
1028 * that the code works with the value on stack which is to
1032 ResetStackOpData (&Data);
1037 /* Other stuff: Track zeropage register usage */
1038 Data.UsedRegs |= (E->Use | E->Chg);
1043 /* Track zero page location usage beyond this point */
1044 Data.UsedRegs |= GetRegInfo (S, I, REG_SREG | REG_PTR1 | REG_PTR2);
1046 /* Check the preconditions. If they aren't ok, reset the insn
1047 * pointer to the pushax and start over. We will loose part of
1048 * load tracking but at least a/x has probably lost between
1049 * pushax and here and will be tracked again when restarting.
1051 if (!PreCondOk (&Data)) {
1053 ResetStackOpData (&Data);
1058 /* Preconditions are ok, so call the optimizer function */
1060 /* Adjust stack offsets to account for the upcoming removal */
1061 AdjustStackOffset (&Data, 2);
1063 /* Prepare the remainder of the data structure */
1064 Data.PrevEntry = CS_GetPrevEntry (S, Data.PushIndex);
1065 Data.PushEntry = CS_GetEntry (S, Data.PushIndex);
1066 Data.OpEntry = CS_GetEntry (S, Data.OpIndex);
1067 Data.NextEntry = CS_GetNextEntry (S, Data.OpIndex);
1069 /* Call the optimizer function */
1070 Changes += Data.OptFunc->Func (&Data);
1072 /* Regenerate register info */
1076 ResetStackOpData (&Data);
1087 /* Free the register info */
1090 /* Return the number of changes made */