1 /*****************************************************************************/
5 /* Optimize operations that take operands via the stack */
9 /* (C) 2001-2019, Ullrich von Bassewitz */
10 /* Roemerstrasse 52 */
11 /* D-70794 Filderstadt */
12 /* EMail: uz@cc65.org */
15 /* This software is provided 'as-is', without any expressed or implied */
16 /* warranty. In no event will the authors be held liable for any damages */
17 /* arising from the use of this software. */
19 /* Permission is granted to anyone to use this software for any purpose, */
20 /* including commercial applications, and to alter it and redistribute it */
21 /* freely, subject to the following restrictions: */
23 /* 1. The origin of this software must not be misrepresented; you must not */
24 /* claim that you wrote the original software. If you use this software */
25 /* in a product, an acknowledgment in the product documentation would be */
26 /* appreciated but is not required. */
27 /* 2. Altered source versions must be plainly marked as such, and must not */
28 /* be misrepresented as being the original software. */
29 /* 3. This notice may not be removed or altered from any source */
32 /*****************************************************************************/
49 /*****************************************************************************/
50 /* Load tracking data */
51 /*****************************************************************************/
55 /* LoadRegInfo flags set by DirectOp */
58 LI_DIRECT = 0x01, /* Direct op may be used */
59 LI_RELOAD_Y = 0x02, /* Reload index register Y */
60 LI_REMOVE = 0x04, /* Load may be removed */
61 LI_DONT_REMOVE = 0x08, /* Load may not be removed */
62 LI_DUP_LOAD = 0x10, /* Duplicate load */
65 /* Structure that tells us how to load the lhs values */
66 typedef struct LoadRegInfo LoadRegInfo;
68 LI_FLAGS Flags; /* Tells us how to load */
69 int LoadIndex; /* Index of load insn, -1 if invalid */
70 CodeEntry* LoadEntry; /* The actual entry, 0 if invalid */
71 int XferIndex; /* Index of transfer insn */
72 CodeEntry* XferEntry; /* The actual transfer entry */
73 int Offs; /* Stack offset if data is on stack */
76 /* Now combined for both registers */
77 typedef struct LoadInfo LoadInfo;
79 LoadRegInfo A; /* Info for A register */
80 LoadRegInfo X; /* Info for X register */
81 LoadRegInfo Y; /* Info for Y register */
86 /*****************************************************************************/
88 /*****************************************************************************/
92 /* Flags for the functions */
94 OP_NONE = 0x00, /* Nothing special */
95 OP_A_KNOWN = 0x01, /* Value of A must be known */
96 OP_X_ZERO = 0x02, /* X must be zero */
97 OP_LHS_LOAD = 0x04, /* Must have load insns for LHS */
98 OP_LHS_LOAD_DIRECT = 0x0C, /* Must have direct load insn for LHS */
99 OP_RHS_LOAD = 0x10, /* Must have load insns for RHS */
100 OP_RHS_LOAD_DIRECT = 0x30, /* Must have direct load insn for RHS */
103 /* Structure forward decl */
104 typedef struct StackOpData StackOpData;
106 /* Structure that describes an optimizer subfunction for a specific op */
107 typedef unsigned (*OptFunc) (StackOpData* D);
108 typedef struct OptFuncDesc OptFuncDesc;
110 const char* Name; /* Name of the replaced runtime function */
111 OptFunc Func; /* Function pointer */
112 unsigned UnusedRegs; /* Regs that must not be used later */
113 OP_FLAGS Flags; /* Flags */
116 /* Structure that holds the needed data */
118 CodeSeg* Code; /* Pointer to code segment */
119 unsigned Flags; /* Flags to remember things */
121 /* Pointer to optimizer subfunction description */
122 const OptFuncDesc* OptFunc;
124 /* ZP register usage inside the sequence */
127 /* Register load information for lhs and rhs */
131 /* Several indices of insns in the code segment */
132 int PushIndex; /* Index of call to pushax in codeseg */
133 int OpIndex; /* Index of actual operation */
135 /* Pointers to insns in the code segment */
136 CodeEntry* PrevEntry; /* Entry before the call to pushax */
137 CodeEntry* PushEntry; /* Pointer to entry with call to pushax */
138 CodeEntry* OpEntry; /* Pointer to entry with op */
139 CodeEntry* NextEntry; /* Entry after the op */
141 const char* ZPLo; /* Lo byte of zero page loc to use */
142 const char* ZPHi; /* Hi byte of zero page loc to use */
143 unsigned IP; /* Insertion point used by some routines */
148 /*****************************************************************************/
149 /* Load tracking code */
150 /*****************************************************************************/
154 static void ClearLoadRegInfo (LoadRegInfo* RI)
155 /* Clear a LoadRegInfo struct */
165 static void FinalizeLoadRegInfo (LoadRegInfo* RI, CodeSeg* S)
166 /* Prepare a LoadRegInfo struct for use */
168 /* Get the entries */
169 if (RI->LoadIndex >= 0) {
170 RI->LoadEntry = CS_GetEntry (S, RI->LoadIndex);
174 if (RI->XferIndex >= 0) {
175 RI->XferEntry = CS_GetEntry (S, RI->XferIndex);
183 static void ClearLoadInfo (LoadInfo* LI)
184 /* Clear a LoadInfo struct */
186 ClearLoadRegInfo (&LI->A);
187 ClearLoadRegInfo (&LI->X);
188 ClearLoadRegInfo (&LI->Y);
193 static void AdjustLoadRegInfo (LoadRegInfo* RI, int Index, int Change)
194 /* Adjust a load register info struct after deleting or inserting an entry
195 ** with a given index
198 CHECK (abs (Change) == 1);
201 if (Index < RI->LoadIndex) {
203 } else if (Index == RI->LoadIndex) {
204 /* Has been removed */
208 if (Index < RI->XferIndex) {
210 } else if (Index == RI->XferIndex) {
211 /* Has been removed */
217 if (Index <= RI->LoadIndex) {
220 if (Index <= RI->XferIndex) {
228 static void FinalizeLoadInfo (LoadInfo* LI, CodeSeg* S)
229 /* Prepare a LoadInfo struct for use */
231 /* Get the entries */
232 FinalizeLoadRegInfo (&LI->A, S);
233 FinalizeLoadRegInfo (&LI->X, S);
234 FinalizeLoadRegInfo (&LI->Y, S);
239 static void AdjustLoadInfo (LoadInfo* LI, int Index, int Change)
240 /* Adjust a load info struct after deleting entry with a given index */
242 AdjustLoadRegInfo (&LI->A, Index, Change);
243 AdjustLoadRegInfo (&LI->X, Index, Change);
244 AdjustLoadRegInfo (&LI->Y, Index, Change);
249 static void HonourUseAndChg (LoadRegInfo* RI, unsigned Reg, const CodeEntry* E)
250 /* Honour use and change flags for an instruction */
253 ClearLoadRegInfo (RI);
254 } else if ((E->Use & Reg) && RI->LoadIndex >= 0) {
255 RI->Flags |= LI_DONT_REMOVE;
261 static void TrackLoads (LoadInfo* LI, CodeEntry* E, int I)
262 /* Track loads for a code entry */
264 if (E->Info & OF_LOAD) {
268 /* Determine, which register was loaded */
269 if (E->Chg & REG_A) {
271 } else if (E->Chg & REG_X) {
273 } else if (E->Chg & REG_Y) {
278 /* If we had a load or xfer op before, this is a duplicate load which
279 ** can cause problems if it encountered between the pushax and the op,
282 if (RI->LoadIndex >= 0 || RI->XferIndex >= 0) {
283 RI->Flags |= LI_DUP_LOAD;
286 /* Remember the load */
291 RI->Flags &= ~(LI_DIRECT | LI_RELOAD_Y);
292 if (E->AM == AM65_IMM || E->AM == AM65_ZP || E->AM == AM65_ABS) {
293 /* These insns are all ok and replaceable */
294 RI->Flags |= LI_DIRECT;
295 } else if (E->AM == AM65_ZP_INDY &&
296 RegValIsKnown (E->RI->In.RegY) &&
297 strcmp (E->Arg, "sp") == 0) {
298 /* A load from the stack with known offset is also ok, but in this
299 ** case we must reload the index register later. Please note that
300 ** a load indirect via other zero page locations is not ok, since
301 ** these locations may change between the push and the actual
304 RI->Offs = (unsigned char) E->RI->In.RegY;
305 RI->Flags |= (LI_DIRECT | LI_RELOAD_Y);
309 } else if (E->Info & OF_XFR) {
311 /* Determine source and target of the transfer and handle the TSX insn */
315 case OP65_TAX: Src = &LI->A; Tgt = &LI->X; break;
316 case OP65_TAY: Src = &LI->A; Tgt = &LI->Y; break;
317 case OP65_TXA: Src = &LI->X; Tgt = &LI->A; break;
318 case OP65_TYA: Src = &LI->Y; Tgt = &LI->A; break;
319 case OP65_TSX: ClearLoadRegInfo (&LI->X); return;
320 case OP65_TXS: return;
321 default: Internal ("Unknown XFR insn in TrackLoads");
324 /* If we had a load or xfer op before, this is a duplicate load which
325 ** can cause problems if it encountered between the pushax and the op,
328 if (Tgt->LoadIndex >= 0 || Tgt->XferIndex >= 0) {
329 Tgt->Flags |= LI_DUP_LOAD;
332 /* Transfer the data */
333 Tgt->LoadIndex = Src->LoadIndex;
335 Tgt->Offs = Src->Offs;
336 Tgt->Flags &= ~(LI_DIRECT | LI_RELOAD_Y);
337 Tgt->Flags |= Src->Flags & (LI_DIRECT | LI_RELOAD_Y);
339 } else if (CE_IsCallTo (E, "ldaxysp") && RegValIsKnown (E->RI->In.RegY)) {
341 /* If we had a load or xfer op before, this is a duplicate load which
342 ** can cause problems if it encountered between the pushax and the op,
343 ** so remember it for both registers involved.
345 if (LI->A.LoadIndex >= 0 || LI->A.XferIndex >= 0) {
346 LI->A.Flags |= LI_DUP_LOAD;
348 if (LI->X.LoadIndex >= 0 || LI->X.XferIndex >= 0) {
349 LI->X.Flags |= LI_DUP_LOAD;
352 /* Both registers set, Y changed */
354 LI->A.XferIndex = -1;
355 LI->A.Flags |= (LI_DIRECT | LI_RELOAD_Y);
356 LI->A.Offs = (unsigned char) E->RI->In.RegY - 1;
359 LI->X.XferIndex = -1;
360 LI->X.Flags |= (LI_DIRECT | LI_RELOAD_Y);
361 LI->X.Offs = (unsigned char) E->RI->In.RegY;
363 ClearLoadRegInfo (&LI->Y);
365 HonourUseAndChg (&LI->A, REG_A, E);
366 HonourUseAndChg (&LI->X, REG_X, E);
367 HonourUseAndChg (&LI->Y, REG_Y, E);
373 /*****************************************************************************/
375 /*****************************************************************************/
379 static void InsertEntry (StackOpData* D, CodeEntry* E, int Index)
380 /* Insert a new entry. Depending on Index, D->PushIndex and D->OpIndex will
381 ** be adjusted by this function.
384 /* Insert the entry into the code segment */
385 CS_InsertEntry (D->Code, E, Index);
387 /* Adjust register loads if necessary */
388 AdjustLoadInfo (&D->Lhs, Index, 1);
389 AdjustLoadInfo (&D->Rhs, Index, 1);
391 /* Adjust the indices if necessary */
392 if (D->PushEntry && Index <= D->PushIndex) {
395 if (D->OpEntry && Index <= D->OpIndex) {
402 static void DelEntry (StackOpData* D, int Index)
403 /* Delete an entry. Depending on Index, D->PushIndex and D->OpIndex will be
404 ** adjusted by this function, and PushEntry/OpEntry may get invalidated.
407 /* Delete the entry from the code segment */
408 CS_DelEntry (D->Code, Index);
410 /* Adjust register loads if necessary */
411 AdjustLoadInfo (&D->Lhs, Index, -1);
412 AdjustLoadInfo (&D->Rhs, Index, -1);
414 /* Adjust the other indices if necessary */
415 if (Index < D->PushIndex) {
417 } else if (Index == D->PushIndex) {
420 if (Index < D->OpIndex) {
422 } else if (Index == D->OpIndex) {
429 static void AdjustStackOffset (StackOpData* D, unsigned Offs)
430 /* Adjust the offset for all stack accesses in the range PushIndex to OpIndex.
431 ** OpIndex is adjusted according to the insertions.
434 /* Walk over all entries */
435 int I = D->PushIndex + 1;
436 while (I < D->OpIndex) {
438 CodeEntry* E = CS_GetEntry (D->Code, I);
440 /* Check if this entry does a stack access, and if so, if it's a plain
441 ** load from stack, since this is needed later.
444 if ((E->Use & REG_SP) != 0) {
446 /* Check for some things that should not happen */
447 CHECK (E->AM == AM65_ZP_INDY || E->RI->In.RegY >= (short) Offs);
448 CHECK (strcmp (E->Arg, "sp") == 0);
449 /* We need to correct this one */
450 Correction = (E->OPC == OP65_LDA)? 2 : 1;
452 } else if (CE_IsCallTo (E, "ldaxysp")) {
453 /* We need to correct this one */
458 /* Get the code entry before this one. If it's a LDY, adjust the
461 CodeEntry* P = CS_GetPrevEntry (D->Code, I);
462 if (P && P->OPC == OP65_LDY && CE_IsConstImm (P)) {
463 /* The Y load is just before the stack access, adjust it */
464 CE_SetNumArg (P, P->Num - Offs);
466 /* Insert a new load instruction before the stack access */
467 const char* Arg = MakeHexArg (E->RI->In.RegY - Offs);
468 CodeEntry* X = NewCodeEntry (OP65_LDY, AM65_IMM, Arg, 0, E->LI);
469 InsertEntry (D, X, I++);
472 /* If we need the value of Y later, be sure to reload it */
473 if (RegYUsed (D->Code, I+1)) {
475 const char* Arg = MakeHexArg (E->RI->In.RegY);
476 if (Correction == 2 && (N = CS_GetNextEntry(D->Code, I)) != 0 &&
477 ((N->Info & OF_ZBRA) != 0) && N->JumpTo != 0) {
478 /* The Y register is used but the load instruction loads A
479 ** and is followed by a branch that evaluates the zero flag.
480 ** This means that we cannot just insert the load insn
481 ** for the Y register at this place, because it would
482 ** destroy the Z flag. Instead place load insns at the
483 ** target of the branch and after it.
484 ** Note: There is a chance that this code won't work. The
485 ** jump may be a backwards jump (in which case the stack
486 ** offset has already been adjusted) or there may be other
487 ** instructions between the load and the conditional jump.
488 ** Currently the compiler does not generate such code, but
489 ** it is possible to force the optimizer into something
490 ** invalid by use of inline assembler.
493 /* Add load insn after the branch */
494 CodeEntry* X = NewCodeEntry (OP65_LDY, AM65_IMM, Arg, 0, E->LI);
495 InsertEntry (D, X, I+2);
497 /* Add load insn before branch target */
498 CodeEntry* Y = NewCodeEntry (OP65_LDY, AM65_IMM, Arg, 0, E->LI);
499 int J = CS_GetEntryIndex (D->Code, N->JumpTo->Owner);
500 CHECK (J > I); /* Must not happen */
501 InsertEntry (D, Y, J);
503 /* Move the label to the new insn */
504 CodeLabel* L = CS_GenLabel (D->Code, Y);
505 CS_MoveLabelRef (D->Code, N, L);
507 CodeEntry* X = NewCodeEntry (OP65_LDY, AM65_IMM, Arg, 0, E->LI);
508 InsertEntry (D, X, I+1);
509 /* Skip this instruction in the next round */
519 /* If we have rhs load insns that load from stack, we'll have to adjust
520 ** the offsets for these also.
522 if (D->Rhs.A.Flags & LI_RELOAD_Y) {
523 D->Rhs.A.Offs -= Offs;
525 if (D->Rhs.X.Flags & LI_RELOAD_Y) {
526 D->Rhs.X.Offs -= Offs;
532 static void AddStoreA (StackOpData* D)
533 /* Add a store to zero page after the push insn */
535 CodeEntry* X = NewCodeEntry (OP65_STA, AM65_ZP, D->ZPLo, 0, D->PushEntry->LI);
536 InsertEntry (D, X, D->PushIndex+1);
541 static void AddStoreX (StackOpData* D)
542 /* Add a store to zero page after the push insn */
544 CodeEntry* X = NewCodeEntry (OP65_STX, AM65_ZP, D->ZPHi, 0, D->PushEntry->LI);
545 InsertEntry (D, X, D->PushIndex+1);
550 static void ReplacePushByStore (StackOpData* D)
551 /* Replace the call to the push subroutine by a store into the zero page
552 ** location (actually, the push is not replaced, because we need it for
553 ** later, but the name is still ok since the push will get removed at the
554 ** end of each routine).
557 /* Store the value into the zeropage instead of pushing it. Check high
558 ** byte first so that the store is later in A/X order.
560 if ((D->Lhs.X.Flags & LI_DIRECT) == 0) {
563 if ((D->Lhs.A.Flags & LI_DIRECT) == 0) {
570 static void AddOpLow (StackOpData* D, opc_t OPC, LoadInfo* LI)
571 /* Add an op for the low byte of an operator. This function honours the
572 ** OP_DIRECT and OP_RELOAD_Y flags and generates the necessary instructions.
573 ** All code is inserted at the current insertion point.
578 if ((LI->A.Flags & LI_DIRECT) != 0) {
579 /* Op with a variable location. If the location is on the stack, we
580 ** need to reload the Y register.
582 if ((LI->A.Flags & LI_RELOAD_Y) == 0) {
585 CodeEntry* LoadA = LI->A.LoadEntry;
586 X = NewCodeEntry (OPC, LoadA->AM, LoadA->Arg, 0, D->OpEntry->LI);
587 InsertEntry (D, X, D->IP++);
592 const char* Arg = MakeHexArg (LI->A.Offs);
593 X = NewCodeEntry (OP65_LDY, AM65_IMM, Arg, 0, D->OpEntry->LI);
594 InsertEntry (D, X, D->IP++);
597 X = NewCodeEntry (OPC, AM65_ZP_INDY, "sp", 0, D->OpEntry->LI);
598 InsertEntry (D, X, D->IP++);
602 /* In both cases, we can remove the load */
603 LI->A.Flags |= LI_REMOVE;
607 /* Op with temp storage */
608 X = NewCodeEntry (OPC, AM65_ZP, D->ZPLo, 0, D->OpEntry->LI);
609 InsertEntry (D, X, D->IP++);
616 static void AddOpHigh (StackOpData* D, opc_t OPC, LoadInfo* LI, int KeepResult)
617 /* Add an op for the high byte of an operator. Special cases (constant values
618 ** or similar) have to be checked separately, the function covers only the
619 ** generic case. Code is inserted at the insertion point.
626 X = NewCodeEntry (OP65_PHA, AM65_IMP, 0, 0, D->OpEntry->LI);
627 InsertEntry (D, X, D->IP++);
631 X = NewCodeEntry (OP65_TXA, AM65_IMP, 0, 0, D->OpEntry->LI);
632 InsertEntry (D, X, D->IP++);
634 if ((LI->X.Flags & LI_DIRECT) != 0) {
636 if ((LI->X.Flags & LI_RELOAD_Y) == 0) {
639 CodeEntry* LoadX = LI->X.LoadEntry;
640 X = NewCodeEntry (OPC, LoadX->AM, LoadX->Arg, 0, D->OpEntry->LI);
641 InsertEntry (D, X, D->IP++);
646 const char* Arg = MakeHexArg (LI->X.Offs);
647 X = NewCodeEntry (OP65_LDY, AM65_IMM, Arg, 0, D->OpEntry->LI);
648 InsertEntry (D, X, D->IP++);
651 X = NewCodeEntry (OPC, AM65_ZP_INDY, "sp", 0, D->OpEntry->LI);
652 InsertEntry (D, X, D->IP++);
655 /* In both cases, we can remove the load */
656 LI->X.Flags |= LI_REMOVE;
660 X = NewCodeEntry (OPC, AM65_ZP, D->ZPHi, 0, D->OpEntry->LI);
661 InsertEntry (D, X, D->IP++);
666 X = NewCodeEntry (OP65_TAX, AM65_IMP, 0, 0, D->OpEntry->LI);
667 InsertEntry (D, X, D->IP++);
670 X = NewCodeEntry (OP65_PLA, AM65_IMP, 0, 0, D->OpEntry->LI);
671 InsertEntry (D, X, D->IP++);
677 static void RemoveRegLoads (StackOpData* D, LoadInfo* LI)
678 /* Remove register load insns */
680 /* Both registers may be loaded with one insn, but DelEntry will in this
681 ** case clear the other one.
683 if ((LI->A.Flags & (LI_REMOVE | LI_DONT_REMOVE)) == LI_REMOVE) {
684 if (LI->A.LoadIndex >= 0) {
685 DelEntry (D, LI->A.LoadIndex);
687 if (LI->A.XferIndex >= 0) {
688 DelEntry (D, LI->A.XferIndex);
691 if ((LI->X.Flags & (LI_REMOVE | LI_DONT_REMOVE)) == LI_REMOVE) {
692 if (LI->X.LoadIndex >= 0) {
693 DelEntry (D, LI->X.LoadIndex);
695 if (LI->X.XferIndex >= 0) {
696 DelEntry (D, LI->X.XferIndex);
703 static void RemoveRemainders (StackOpData* D)
704 /* Remove the code that is unnecessary after translation of the sequence */
706 /* Remove the register loads for lhs and rhs */
707 RemoveRegLoads (D, &D->Lhs);
708 RemoveRegLoads (D, &D->Rhs);
710 /* Remove the push and the operator routine */
711 DelEntry (D, D->OpIndex);
712 DelEntry (D, D->PushIndex);
717 static int IsRegVar (StackOpData* D)
718 /* If the value pushed is that of a zeropage variable, replace ZPLo and ZPHi
719 ** in the given StackOpData struct by the variable and return true. Otherwise
720 ** leave D untouched and return false.
723 CodeEntry* LoadA = D->Lhs.A.LoadEntry;
724 CodeEntry* LoadX = D->Lhs.X.LoadEntry;
727 /* Must have both load insns */
728 if (LoadA == 0 || LoadX == 0) {
732 /* Must be loads from zp */
733 if (LoadA->AM != AM65_ZP || LoadX->AM != AM65_ZP) {
737 /* Must be the same zp loc with high byte in X */
738 Len = strlen (LoadA->Arg);
739 if (strncmp (LoadA->Arg, LoadX->Arg, Len) != 0 ||
740 strcmp (LoadX->Arg + Len, "+1") != 0) {
744 /* Use the zero page location directly */
745 D->ZPLo = LoadA->Arg;
746 D->ZPHi = LoadX->Arg;
752 /*****************************************************************************/
753 /* Actual optimization functions */
754 /*****************************************************************************/
758 static unsigned Opt_toseqax_tosneax (StackOpData* D, const char* BoolTransformer)
759 /* Optimize the toseqax and tosneax sequences. */
764 /* Create a call to the boolean transformer function and a label for this
765 ** insn. This is needed for all variants. Other insns are inserted *before*
768 X = NewCodeEntry (OP65_JSR, AM65_ABS, BoolTransformer, 0, D->OpEntry->LI);
769 InsertEntry (D, X, D->OpIndex + 1);
770 L = CS_GenLabel (D->Code, X);
772 /* If the lhs is direct (but not stack relative), encode compares with lhs
773 ** effectively reverting the order (which doesn't matter for ==).
775 if ((D->Lhs.A.Flags & (LI_DIRECT | LI_RELOAD_Y)) == LI_DIRECT &&
776 (D->Lhs.X.Flags & (LI_DIRECT | LI_RELOAD_Y)) == LI_DIRECT) {
778 CodeEntry* LoadX = D->Lhs.X.LoadEntry;
779 CodeEntry* LoadA = D->Lhs.A.LoadEntry;
781 D->IP = D->OpIndex+1;
784 X = NewCodeEntry (OP65_CPX, LoadX->AM, LoadX->Arg, 0, D->OpEntry->LI);
785 InsertEntry (D, X, D->IP++);
788 X = NewCodeEntry (OP65_BNE, AM65_BRA, L->Name, L, D->OpEntry->LI);
789 InsertEntry (D, X, D->IP++);
792 X = NewCodeEntry (OP65_CMP, LoadA->AM, LoadA->Arg, 0, D->OpEntry->LI);
793 InsertEntry (D, X, D->IP++);
795 /* Lhs load entries can be removed */
796 D->Lhs.X.Flags |= LI_REMOVE;
797 D->Lhs.A.Flags |= LI_REMOVE;
799 } else if ((D->Rhs.A.Flags & (LI_DIRECT | LI_RELOAD_Y)) == LI_DIRECT &&
800 (D->Rhs.X.Flags & (LI_DIRECT | LI_RELOAD_Y)) == LI_DIRECT) {
802 CodeEntry* LoadX = D->Rhs.X.LoadEntry;
803 CodeEntry* LoadA = D->Rhs.A.LoadEntry;
805 D->IP = D->OpIndex+1;
808 X = NewCodeEntry (OP65_CPX, LoadX->AM, LoadX->Arg, 0, D->OpEntry->LI);
809 InsertEntry (D, X, D->IP++);
812 X = NewCodeEntry (OP65_BNE, AM65_BRA, L->Name, L, D->OpEntry->LI);
813 InsertEntry (D, X, D->IP++);
816 X = NewCodeEntry (OP65_CMP, LoadA->AM, LoadA->Arg, 0, D->OpEntry->LI);
817 InsertEntry (D, X, D->IP++);
819 /* Rhs load entries can be removed */
820 D->Rhs.X.Flags |= LI_REMOVE;
821 D->Rhs.A.Flags |= LI_REMOVE;
823 } else if ((D->Rhs.A.Flags & LI_DIRECT) != 0 &&
824 (D->Rhs.X.Flags & LI_DIRECT) != 0) {
826 D->IP = D->OpIndex+1;
828 /* Add operand for low byte */
829 AddOpLow (D, OP65_CMP, &D->Rhs);
832 X = NewCodeEntry (OP65_BNE, AM65_BRA, L->Name, L, D->OpEntry->LI);
833 InsertEntry (D, X, D->IP++);
835 /* Add operand for high byte */
836 AddOpHigh (D, OP65_CMP, &D->Rhs, 0);
840 /* Save lhs into zeropage, then compare */
844 D->IP = D->OpIndex+1;
847 X = NewCodeEntry (OP65_CPX, AM65_ZP, D->ZPHi, 0, D->OpEntry->LI);
848 InsertEntry (D, X, D->IP++);
851 X = NewCodeEntry (OP65_BNE, AM65_BRA, L->Name, L, D->OpEntry->LI);
852 InsertEntry (D, X, D->IP++);
855 X = NewCodeEntry (OP65_CMP, AM65_ZP, D->ZPLo, 0, D->OpEntry->LI);
856 InsertEntry (D, X, D->IP++);
860 /* Remove the push and the call to the tosgeax function */
861 RemoveRemainders (D);
863 /* We changed the sequence */
869 static unsigned Opt_tosshift (StackOpData* D, const char* Name)
870 /* Optimize shift sequences. */
874 /* Store the value into the zeropage instead of pushing it */
875 ReplacePushByStore (D);
877 /* If the lhs is direct (but not stack relative), we can just reload the
880 if ((D->Lhs.A.Flags & (LI_DIRECT | LI_RELOAD_Y)) == LI_DIRECT &&
881 (D->Lhs.X.Flags & (LI_DIRECT | LI_RELOAD_Y)) == LI_DIRECT) {
883 CodeEntry* LoadX = D->Lhs.X.LoadEntry;
884 CodeEntry* LoadA = D->Lhs.A.LoadEntry;
886 /* Inline the shift */
887 D->IP = D->OpIndex+1;
890 X = NewCodeEntry (OP65_TAY, AM65_IMP, 0, 0, D->OpEntry->LI);
891 InsertEntry (D, X, D->IP++);
894 X = NewCodeEntry (OP65_LDA, LoadA->AM, LoadA->Arg, 0, D->OpEntry->LI);
895 InsertEntry (D, X, D->IP++);
898 X = NewCodeEntry (OP65_LDX, LoadX->AM, LoadX->Arg, 0, D->OpEntry->LI);
899 InsertEntry (D, X, D->IP++);
901 /* Lhs load entries can be removed */
902 D->Lhs.X.Flags |= LI_REMOVE;
903 D->Lhs.A.Flags |= LI_REMOVE;
907 /* Save lhs into zeropage and reload later */
911 /* Be sure to setup IP after adding the stores, otherwise it will get
914 D->IP = D->OpIndex+1;
917 X = NewCodeEntry (OP65_TAY, AM65_IMP, 0, 0, D->OpEntry->LI);
918 InsertEntry (D, X, D->IP++);
921 X = NewCodeEntry (OP65_LDA, AM65_ZP, D->ZPLo, 0, D->OpEntry->LI);
922 InsertEntry (D, X, D->IP++);
925 X = NewCodeEntry (OP65_LDX, AM65_ZP, D->ZPHi, 0, D->OpEntry->LI);
926 InsertEntry (D, X, D->IP++);
930 /* jsr shlaxy/aslaxy/whatever */
931 X = NewCodeEntry (OP65_JSR, AM65_ABS, Name, 0, D->OpEntry->LI);
932 InsertEntry (D, X, D->IP++);
934 /* Remove the push and the call to the shift function */
935 RemoveRemainders (D);
937 /* We changed the sequence */
943 static unsigned Opt___bzero (StackOpData* D)
944 /* Optimize the __bzero sequence */
950 /* Check if we're using a register variable */
952 /* Store the value into the zeropage instead of pushing it */
957 /* If the return value of __bzero is used, we have to add code to reload
958 ** a/x from the pointer variable.
960 if (RegAXUsed (D->Code, D->OpIndex+1)) {
961 X = NewCodeEntry (OP65_LDA, AM65_ZP, D->ZPLo, 0, D->OpEntry->LI);
962 InsertEntry (D, X, D->OpIndex+1);
963 X = NewCodeEntry (OP65_LDX, AM65_ZP, D->ZPHi, 0, D->OpEntry->LI);
964 InsertEntry (D, X, D->OpIndex+2);
967 /* X is always zero, A contains the size of the data area to zero.
968 ** Note: A may be zero, in which case the operation is null op.
970 if (D->OpEntry->RI->In.RegA != 0) {
973 X = NewCodeEntry (OP65_LDA, AM65_IMM, "$00", 0, D->OpEntry->LI);
974 InsertEntry (D, X, D->OpIndex+1);
976 /* The value of A is known */
977 if (D->OpEntry->RI->In.RegA <= 0x81) {
979 /* Loop using the sign bit */
982 Arg = MakeHexArg (D->OpEntry->RI->In.RegA - 1);
983 X = NewCodeEntry (OP65_LDY, AM65_IMM, Arg, 0, D->OpEntry->LI);
984 InsertEntry (D, X, D->OpIndex+2);
987 X = NewCodeEntry (OP65_STA, AM65_ZP_INDY, D->ZPLo, 0, D->OpEntry->LI);
988 InsertEntry (D, X, D->OpIndex+3);
989 L = CS_GenLabel (D->Code, X);
992 X = NewCodeEntry (OP65_DEY, AM65_IMP, 0, 0, D->OpEntry->LI);
993 InsertEntry (D, X, D->OpIndex+4);
996 X = NewCodeEntry (OP65_BPL, AM65_BRA, L->Name, L, D->OpEntry->LI);
997 InsertEntry (D, X, D->OpIndex+5);
1001 /* Loop using an explicit compare */
1004 X = NewCodeEntry (OP65_LDY, AM65_IMM, "$00", 0, D->OpEntry->LI);
1005 InsertEntry (D, X, D->OpIndex+2);
1008 X = NewCodeEntry (OP65_STA, AM65_ZP_INDY, D->ZPLo, 0, D->OpEntry->LI);
1009 InsertEntry (D, X, D->OpIndex+3);
1010 L = CS_GenLabel (D->Code, X);
1013 X = NewCodeEntry (OP65_INY, AM65_IMP, 0, 0, D->OpEntry->LI);
1014 InsertEntry (D, X, D->OpIndex+4);
1017 Arg = MakeHexArg (D->OpEntry->RI->In.RegA);
1018 X = NewCodeEntry (OP65_CPY, AM65_IMM, Arg, 0, D->OpEntry->LI);
1019 InsertEntry (D, X, D->OpIndex+5);
1022 X = NewCodeEntry (OP65_BNE, AM65_BRA, L->Name, L, D->OpEntry->LI);
1023 InsertEntry (D, X, D->OpIndex+6);
1028 /* Remove the push and the call to the __bzero function */
1029 RemoveRemainders (D);
1031 /* We changed the sequence */
1037 static unsigned Opt_staspidx (StackOpData* D)
1038 /* Optimize the staspidx sequence */
1042 /* Check if we're using a register variable */
1043 if (!IsRegVar (D)) {
1044 /* Store the value into the zeropage instead of pushing it */
1049 /* Replace the store subroutine call by a direct op */
1050 X = NewCodeEntry (OP65_STA, AM65_ZP_INDY, D->ZPLo, 0, D->OpEntry->LI);
1051 InsertEntry (D, X, D->OpIndex+1);
1053 /* Remove the push and the call to the staspidx function */
1054 RemoveRemainders (D);
1056 /* We changed the sequence */
1062 static unsigned Opt_staxspidx (StackOpData* D)
1063 /* Optimize the staxspidx sequence */
1067 /* Check if we're using a register variable */
1068 if (!IsRegVar (D)) {
1069 /* Store the value into the zeropage instead of pushing it */
1074 /* Inline the store */
1077 X = NewCodeEntry (OP65_STA, AM65_ZP_INDY, D->ZPLo, 0, D->OpEntry->LI);
1078 InsertEntry (D, X, D->OpIndex+1);
1080 if (RegValIsKnown (D->OpEntry->RI->In.RegY)) {
1081 /* Value of Y is known */
1082 const char* Arg = MakeHexArg (D->OpEntry->RI->In.RegY + 1);
1083 X = NewCodeEntry (OP65_LDY, AM65_IMM, Arg, 0, D->OpEntry->LI);
1085 X = NewCodeEntry (OP65_INY, AM65_IMP, 0, 0, D->OpEntry->LI);
1087 InsertEntry (D, X, D->OpIndex+2);
1089 if (RegValIsKnown (D->OpEntry->RI->In.RegX)) {
1090 /* Value of X is known */
1091 const char* Arg = MakeHexArg (D->OpEntry->RI->In.RegX);
1092 X = NewCodeEntry (OP65_LDA, AM65_IMM, Arg, 0, D->OpEntry->LI);
1095 X = NewCodeEntry (OP65_TXA, AM65_IMP, 0, 0, D->OpEntry->LI);
1097 InsertEntry (D, X, D->OpIndex+3);
1100 X = NewCodeEntry (OP65_STA, AM65_ZP_INDY, D->ZPLo, 0, D->OpEntry->LI);
1101 InsertEntry (D, X, D->OpIndex+4);
1103 /* If we remove staxspidx, we must restore the Y register to what the
1104 ** function would return.
1106 X = NewCodeEntry (OP65_LDY, AM65_IMM, "$00", 0, D->OpEntry->LI);
1107 InsertEntry (D, X, D->OpIndex+5);
1109 /* Remove the push and the call to the staxspidx function */
1110 RemoveRemainders (D);
1112 /* We changed the sequence */
1118 static unsigned Opt_tosaddax (StackOpData* D)
1119 /* Optimize the tosaddax sequence */
1124 /* We need the entry behind the add */
1125 CHECK (D->NextEntry != 0);
1127 /* Check if the X register is known and zero when the add is done, and
1128 ** if the add is followed by
1131 ** jsr ldauidx ; or ldaidx
1133 ** If this is true, the addition does actually add an offset to a pointer
1134 ** before it is dereferenced. Since both subroutines take an offset in Y,
1135 ** we can pass the offset (instead of #$00) and remove the addition
1138 if (D->OpEntry->RI->In.RegX == 0 &&
1139 D->NextEntry->OPC == OP65_LDY &&
1140 CE_IsKnownImm (D->NextEntry, 0) &&
1141 !CE_HasLabel (D->NextEntry) &&
1142 (N = CS_GetNextEntry (D->Code, D->OpIndex + 1)) != 0 &&
1143 (CE_IsCallTo (N, "ldauidx") ||
1144 CE_IsCallTo (N, "ldaidx"))) {
1146 int Signed = (strcmp (N->Arg, "ldaidx") == 0);
1148 /* Store the value into the zeropage instead of pushing it */
1152 /* Replace the ldy by a tay. Be sure to create the new entry before
1153 ** deleting the ldy, since we will reference the line info from this
1156 X = NewCodeEntry (OP65_TAY, AM65_IMP, 0, 0, D->NextEntry->LI);
1157 DelEntry (D, D->OpIndex + 1);
1158 InsertEntry (D, X, D->OpIndex + 1);
1160 /* Replace the call to ldaidx/ldauidx. Since X is already zero, and
1161 ** the ptr is in the zero page location, we just need to load from
1162 ** the pointer, and fix X in case of ldaidx.
1164 X = NewCodeEntry (OP65_LDA, AM65_ZP_INDY, D->ZPLo, 0, N->LI);
1165 DelEntry (D, D->OpIndex + 2);
1166 InsertEntry (D, X, D->OpIndex + 2);
1171 /* Add sign extension - N is unused now */
1172 N = CS_GetNextEntry (D->Code, D->OpIndex + 2);
1174 L = CS_GenLabel (D->Code, N);
1176 X = NewCodeEntry (OP65_BPL, AM65_BRA, L->Name, L, X->LI);
1177 InsertEntry (D, X, D->OpIndex + 3);
1179 X = NewCodeEntry (OP65_DEX, AM65_IMP, 0, 0, X->LI);
1180 InsertEntry (D, X, D->OpIndex + 4);
1185 /* Store the value into the zeropage instead of pushing it */
1186 ReplacePushByStore (D);
1188 /* Inline the add */
1189 D->IP = D->OpIndex+1;
1192 X = NewCodeEntry (OP65_CLC, AM65_IMP, 0, 0, D->OpEntry->LI);
1193 InsertEntry (D, X, D->IP++);
1196 AddOpLow (D, OP65_ADC, &D->Lhs);
1199 if (D->PushEntry->RI->In.RegX == 0) {
1201 /* The high byte is the value in X plus the carry */
1202 CodeLabel* L = CS_GenLabel (D->Code, D->NextEntry);
1205 X = NewCodeEntry (OP65_BCC, AM65_BRA, L->Name, L, D->OpEntry->LI);
1206 InsertEntry (D, X, D->IP++);
1209 X = NewCodeEntry (OP65_INX, AM65_IMP, 0, 0, D->OpEntry->LI);
1210 InsertEntry (D, X, D->IP++);
1212 } else if (D->OpEntry->RI->In.RegX == 0 &&
1213 (RegValIsKnown (D->PushEntry->RI->In.RegX) ||
1214 (D->Lhs.X.Flags & LI_RELOAD_Y) == 0)) {
1216 /* The high byte is that of the first operand plus carry */
1218 if (RegValIsKnown (D->PushEntry->RI->In.RegX)) {
1219 /* Value of first op high byte is known */
1220 const char* Arg = MakeHexArg (D->PushEntry->RI->In.RegX);
1221 X = NewCodeEntry (OP65_LDX, AM65_IMM, Arg, 0, D->OpEntry->LI);
1223 /* Value of first op high byte is unknown. Load from ZP or
1224 ** original storage.
1226 if (D->Lhs.X.Flags & LI_DIRECT) {
1227 CodeEntry* LoadX = D->Lhs.X.LoadEntry;
1228 X = NewCodeEntry (OP65_LDX, LoadX->AM, LoadX->Arg, 0, D->OpEntry->LI);
1230 X = NewCodeEntry (OP65_LDX, AM65_ZP, D->ZPHi, 0, D->OpEntry->LI);
1233 InsertEntry (D, X, D->IP++);
1236 L = CS_GenLabel (D->Code, D->NextEntry);
1237 X = NewCodeEntry (OP65_BCC, AM65_BRA, L->Name, L, D->OpEntry->LI);
1238 InsertEntry (D, X, D->IP++);
1241 X = NewCodeEntry (OP65_INX, AM65_IMP, 0, 0, D->OpEntry->LI);
1242 InsertEntry (D, X, D->IP++);
1244 /* High byte is unknown */
1245 AddOpHigh (D, OP65_ADC, &D->Lhs, 1);
1249 /* Remove the push and the call to the tosaddax function */
1250 RemoveRemainders (D);
1252 /* We changed the sequence */
1258 static unsigned Opt_tosandax (StackOpData* D)
1259 /* Optimize the tosandax sequence */
1261 /* Store the value into the zeropage instead of pushing it */
1262 ReplacePushByStore (D);
1264 /* Inline the and, low byte */
1265 D->IP = D->OpIndex + 1;
1266 AddOpLow (D, OP65_AND, &D->Lhs);
1269 AddOpHigh (D, OP65_AND, &D->Lhs, 1);
1271 /* Remove the push and the call to the tosandax function */
1272 RemoveRemainders (D);
1274 /* We changed the sequence */
1280 static unsigned Opt_tosaslax (StackOpData* D)
1281 /* Optimize the tosaslax sequence */
1283 return Opt_tosshift (D, "aslaxy");
1288 static unsigned Opt_tosasrax (StackOpData* D)
1289 /* Optimize the tosasrax sequence */
1291 return Opt_tosshift (D, "asraxy");
1296 static unsigned Opt_toseqax (StackOpData* D)
1297 /* Optimize the toseqax sequence */
1299 return Opt_toseqax_tosneax (D, "booleq");
1304 static unsigned Opt_tosgeax (StackOpData* D)
1305 /* Optimize the tosgeax sequence */
1310 /* Inline the sbc */
1311 D->IP = D->OpIndex+1;
1313 /* Must be true because of OP_RHS_LOAD */
1314 CHECK ((D->Rhs.A.Flags & D->Rhs.X.Flags & LI_DIRECT) != 0);
1316 /* Add code for low operand */
1317 AddOpLow (D, OP65_CMP, &D->Rhs);
1319 /* Add code for high operand */
1320 AddOpHigh (D, OP65_SBC, &D->Rhs, 0);
1323 X = NewCodeEntry (OP65_EOR, AM65_IMM, "$80", 0, D->OpEntry->LI);
1324 InsertEntry (D, X, D->IP++);
1327 X = NewCodeEntry (OP65_ASL, AM65_ACC, "a", 0, D->OpEntry->LI);
1328 InsertEntry (D, X, D->IP++);
1329 L = CS_GenLabel (D->Code, X);
1331 /* Insert a bvs L before the eor insn */
1332 X = NewCodeEntry (OP65_BVS, AM65_BRA, L->Name, L, D->OpEntry->LI);
1333 InsertEntry (D, X, D->IP - 2);
1337 X = NewCodeEntry (OP65_LDA, AM65_IMM, "$00", 0, D->OpEntry->LI);
1338 InsertEntry (D, X, D->IP++);
1341 X = NewCodeEntry (OP65_LDX, AM65_IMM, "$00", 0, D->OpEntry->LI);
1342 InsertEntry (D, X, D->IP++);
1345 X = NewCodeEntry (OP65_ROL, AM65_ACC, "a", 0, D->OpEntry->LI);
1346 InsertEntry (D, X, D->IP++);
1348 /* Remove the push and the call to the tosgeax function */
1349 RemoveRemainders (D);
1351 /* We changed the sequence */
1357 static unsigned Opt_tosltax (StackOpData* D)
1358 /* Optimize the tosltax sequence */
1364 /* Inline the compare */
1365 D->IP = D->OpIndex+1;
1367 /* Must be true because of OP_RHS_LOAD */
1368 CHECK ((D->Rhs.A.Flags & D->Rhs.X.Flags & LI_DIRECT) != 0);
1370 /* Add code for low operand */
1371 AddOpLow (D, OP65_CMP, &D->Rhs);
1373 /* Add code for high operand */
1374 AddOpHigh (D, OP65_SBC, &D->Rhs, 0);
1377 X = NewCodeEntry (OP65_EOR, AM65_IMM, "$80", 0, D->OpEntry->LI);
1378 InsertEntry (D, X, D->IP++);
1381 X = NewCodeEntry (OP65_ASL, AM65_ACC, "a", 0, D->OpEntry->LI);
1382 InsertEntry (D, X, D->IP++);
1383 L = CS_GenLabel (D->Code, X);
1385 /* Insert a bvc L before the eor insn */
1386 X = NewCodeEntry (OP65_BVC, AM65_BRA, L->Name, L, D->OpEntry->LI);
1387 InsertEntry (D, X, D->IP - 2);
1391 X = NewCodeEntry (OP65_LDA, AM65_IMM, "$00", 0, D->OpEntry->LI);
1392 InsertEntry (D, X, D->IP++);
1395 X = NewCodeEntry (OP65_LDX, AM65_IMM, "$00", 0, D->OpEntry->LI);
1396 InsertEntry (D, X, D->IP++);
1399 X = NewCodeEntry (OP65_ROL, AM65_ACC, "a", 0, D->OpEntry->LI);
1400 InsertEntry (D, X, D->IP++);
1402 /* Remove the push and the call to the tosltax function */
1403 RemoveRemainders (D);
1405 /* We changed the sequence */
1411 static unsigned Opt_tosneax (StackOpData* D)
1412 /* Optimize the tosneax sequence */
1414 return Opt_toseqax_tosneax (D, "boolne");
1419 static unsigned Opt_tosorax (StackOpData* D)
1420 /* Optimize the tosorax sequence */
1422 /* Store the value into the zeropage instead of pushing it */
1423 ReplacePushByStore (D);
1425 /* Inline the or, low byte */
1426 D->IP = D->OpIndex + 1;
1427 AddOpLow (D, OP65_ORA, &D->Lhs);
1430 AddOpHigh (D, OP65_ORA, &D->Lhs, 1);
1432 /* Remove the push and the call to the tosorax function */
1433 RemoveRemainders (D);
1435 /* We changed the sequence */
1441 static unsigned Opt_tosshlax (StackOpData* D)
1442 /* Optimize the tosshlax sequence */
1444 return Opt_tosshift (D, "shlaxy");
1449 static unsigned Opt_tosshrax (StackOpData* D)
1450 /* Optimize the tosshrax sequence */
1452 return Opt_tosshift (D, "shraxy");
1457 static unsigned Opt_tossubax (StackOpData* D)
1458 /* Optimize the tossubax sequence. Note: subtraction is not commutative! */
1463 /* Inline the sbc */
1464 D->IP = D->OpIndex+1;
1467 X = NewCodeEntry (OP65_SEC, AM65_IMP, 0, 0, D->OpEntry->LI);
1468 InsertEntry (D, X, D->IP++);
1470 /* Must be true because of OP_RHS_LOAD */
1471 CHECK ((D->Rhs.A.Flags & D->Rhs.X.Flags & LI_DIRECT) != 0);
1473 /* Add code for low operand */
1474 AddOpLow (D, OP65_SBC, &D->Rhs);
1476 /* Add code for high operand */
1477 AddOpHigh (D, OP65_SBC, &D->Rhs, 1);
1479 /* Remove the push and the call to the tossubax function */
1480 RemoveRemainders (D);
1482 /* We changed the sequence */
1488 static unsigned Opt_tosugeax (StackOpData* D)
1489 /* Optimize the tosugeax sequence */
1494 /* Inline the sbc */
1495 D->IP = D->OpIndex+1;
1497 /* Must be true because of OP_RHS_LOAD */
1498 CHECK ((D->Rhs.A.Flags & D->Rhs.X.Flags & LI_DIRECT) != 0);
1500 /* Add code for low operand */
1501 AddOpLow (D, OP65_CMP, &D->Rhs);
1503 /* Add code for high operand */
1504 AddOpHigh (D, OP65_SBC, &D->Rhs, 0);
1507 X = NewCodeEntry (OP65_LDA, AM65_IMM, "$00", 0, D->OpEntry->LI);
1508 InsertEntry (D, X, D->IP++);
1511 X = NewCodeEntry (OP65_LDX, AM65_IMM, "$00", 0, D->OpEntry->LI);
1512 InsertEntry (D, X, D->IP++);
1515 X = NewCodeEntry (OP65_ROL, AM65_ACC, "a", 0, D->OpEntry->LI);
1516 InsertEntry (D, X, D->IP++);
1518 /* Remove the push and the call to the tosugeax function */
1519 RemoveRemainders (D);
1521 /* We changed the sequence */
1527 static unsigned Opt_tosugtax (StackOpData* D)
1528 /* Optimize the tosugtax sequence */
1533 /* Inline the sbc */
1534 D->IP = D->OpIndex+1;
1536 /* Must be true because of OP_RHS_LOAD */
1537 CHECK ((D->Rhs.A.Flags & D->Rhs.X.Flags & LI_DIRECT) != 0);
1540 X = NewCodeEntry (OP65_SEC, AM65_IMP, 0, 0, D->OpEntry->LI);
1541 InsertEntry (D, X, D->IP++);
1543 /* Add code for low operand */
1544 AddOpLow (D, OP65_SBC, &D->Rhs);
1546 /* We need the zero flag, so remember the immediate result */
1547 X = NewCodeEntry (OP65_STA, AM65_ZP, "tmp1", 0, D->OpEntry->LI);
1548 InsertEntry (D, X, D->IP++);
1550 /* Add code for high operand */
1551 AddOpHigh (D, OP65_SBC, &D->Rhs, 0);
1554 X = NewCodeEntry (OP65_ORA, AM65_ZP, "tmp1", 0, D->OpEntry->LI);
1555 InsertEntry (D, X, D->IP++);
1557 /* Transform to boolean */
1558 X = NewCodeEntry (OP65_JSR, AM65_ABS, "boolugt", 0, D->OpEntry->LI);
1559 InsertEntry (D, X, D->IP++);
1561 /* Remove the push and the call to the operator function */
1562 RemoveRemainders (D);
1564 /* We changed the sequence */
1570 static unsigned Opt_tosuleax (StackOpData* D)
1571 /* Optimize the tosuleax sequence */
1576 /* Inline the sbc */
1577 D->IP = D->OpIndex+1;
1579 /* Must be true because of OP_RHS_LOAD */
1580 CHECK ((D->Rhs.A.Flags & D->Rhs.X.Flags & LI_DIRECT) != 0);
1583 X = NewCodeEntry (OP65_SEC, AM65_IMP, 0, 0, D->OpEntry->LI);
1584 InsertEntry (D, X, D->IP++);
1586 /* Add code for low operand */
1587 AddOpLow (D, OP65_SBC, &D->Rhs);
1589 /* We need the zero flag, so remember the immediate result */
1590 X = NewCodeEntry (OP65_STA, AM65_ZP, "tmp1", 0, D->OpEntry->LI);
1591 InsertEntry (D, X, D->IP++);
1593 /* Add code for high operand */
1594 AddOpHigh (D, OP65_SBC, &D->Rhs, 0);
1597 X = NewCodeEntry (OP65_ORA, AM65_ZP, "tmp1", 0, D->OpEntry->LI);
1598 InsertEntry (D, X, D->IP++);
1600 /* Transform to boolean */
1601 X = NewCodeEntry (OP65_JSR, AM65_ABS, "boolule", 0, D->OpEntry->LI);
1602 InsertEntry (D, X, D->IP++);
1604 /* Remove the push and the call to the operator function */
1605 RemoveRemainders (D);
1607 /* We changed the sequence */
1613 static unsigned Opt_tosultax (StackOpData* D)
1614 /* Optimize the tosultax sequence */
1619 /* Inline the sbc */
1620 D->IP = D->OpIndex+1;
1622 /* Must be true because of OP_RHS_LOAD */
1623 CHECK ((D->Rhs.A.Flags & D->Rhs.X.Flags & LI_DIRECT) != 0);
1625 /* Add code for low operand */
1626 AddOpLow (D, OP65_CMP, &D->Rhs);
1628 /* Add code for high operand */
1629 AddOpHigh (D, OP65_SBC, &D->Rhs, 0);
1631 /* Transform to boolean */
1632 X = NewCodeEntry (OP65_JSR, AM65_ABS, "boolult", 0, D->OpEntry->LI);
1633 InsertEntry (D, X, D->IP++);
1635 /* Remove the push and the call to the operator function */
1636 RemoveRemainders (D);
1638 /* We changed the sequence */
1644 static unsigned Opt_tosxorax (StackOpData* D)
1645 /* Optimize the tosxorax sequence */
1650 /* Store the value into the zeropage instead of pushing it */
1651 ReplacePushByStore (D);
1653 /* Inline the xor, low byte */
1654 D->IP = D->OpIndex + 1;
1655 AddOpLow (D, OP65_EOR, &D->Lhs);
1658 if (RegValIsKnown (D->PushEntry->RI->In.RegX) &&
1659 RegValIsKnown (D->OpEntry->RI->In.RegX)) {
1660 /* Both values known, precalculate the result */
1661 const char* Arg = MakeHexArg (D->PushEntry->RI->In.RegX ^ D->OpEntry->RI->In.RegX);
1662 X = NewCodeEntry (OP65_LDX, AM65_IMM, Arg, 0, D->OpEntry->LI);
1663 InsertEntry (D, X, D->IP++);
1664 } else if (D->PushEntry->RI->In.RegX != 0) {
1665 /* High byte is unknown */
1666 AddOpHigh (D, OP65_EOR, &D->Lhs, 1);
1669 /* Remove the push and the call to the tosandax function */
1670 RemoveRemainders (D);
1672 /* We changed the sequence */
1678 /*****************************************************************************/
1680 /*****************************************************************************/
1684 static const OptFuncDesc FuncTable[] = {
1685 { "__bzero", Opt___bzero, REG_NONE, OP_X_ZERO | OP_A_KNOWN },
1686 { "staspidx", Opt_staspidx, REG_NONE, OP_NONE },
1687 { "staxspidx", Opt_staxspidx, REG_AX, OP_NONE },
1688 { "tosaddax", Opt_tosaddax, REG_NONE, OP_NONE },
1689 { "tosandax", Opt_tosandax, REG_NONE, OP_NONE },
1690 { "tosaslax", Opt_tosaslax, REG_NONE, OP_NONE },
1691 { "tosasrax", Opt_tosasrax, REG_NONE, OP_NONE },
1692 { "toseqax", Opt_toseqax, REG_NONE, OP_NONE },
1693 { "tosgeax", Opt_tosgeax, REG_NONE, OP_RHS_LOAD_DIRECT },
1694 { "tosltax", Opt_tosltax, REG_NONE, OP_RHS_LOAD_DIRECT },
1695 { "tosneax", Opt_tosneax, REG_NONE, OP_NONE },
1696 { "tosorax", Opt_tosorax, REG_NONE, OP_NONE },
1697 { "tosshlax", Opt_tosshlax, REG_NONE, OP_NONE },
1698 { "tosshrax", Opt_tosshrax, REG_NONE, OP_NONE },
1699 { "tossubax", Opt_tossubax, REG_NONE, OP_RHS_LOAD_DIRECT },
1700 { "tosugeax", Opt_tosugeax, REG_NONE, OP_RHS_LOAD_DIRECT },
1701 { "tosugtax", Opt_tosugtax, REG_NONE, OP_RHS_LOAD_DIRECT },
1702 { "tosuleax", Opt_tosuleax, REG_NONE, OP_RHS_LOAD_DIRECT },
1703 { "tosultax", Opt_tosultax, REG_NONE, OP_RHS_LOAD_DIRECT },
1704 { "tosxorax", Opt_tosxorax, REG_NONE, OP_NONE },
1706 #define FUNC_COUNT (sizeof(FuncTable) / sizeof(FuncTable[0]))
1710 static int CmpFunc (const void* Key, const void* Func)
1711 /* Compare function for bsearch */
1713 return strcmp (Key, ((const OptFuncDesc*) Func)->Name);
1718 static const OptFuncDesc* FindFunc (const char* Name)
1719 /* Find the function with the given name. Return a pointer to the table entry
1720 ** or NULL if the function was not found.
1723 return bsearch (Name, FuncTable, FUNC_COUNT, sizeof(OptFuncDesc), CmpFunc);
1728 static int CmpHarmless (const void* Key, const void* Entry)
1729 /* Compare function for bsearch */
1731 return strcmp (Key, *(const char**)Entry);
1736 static int HarmlessCall (const char* Name)
1737 /* Check if this is a call to a harmless subroutine that will not interrupt
1738 ** the pushax/op sequence when encountered.
1741 static const char* const Tab[] = {
1787 void* R = bsearch (Name,
1789 sizeof (Tab) / sizeof (Tab[0]),
1797 static void ResetStackOpData (StackOpData* Data)
1798 /* Reset the given data structure */
1801 Data->ZPUsage = REG_NONE;
1803 ClearLoadInfo (&Data->Lhs);
1804 ClearLoadInfo (&Data->Rhs);
1806 Data->PushIndex = -1;
1812 static int PreCondOk (StackOpData* D)
1813 /* Check if the preconditions for a call to the optimizer subfunction are
1814 ** satisfied. As a side effect, this function will also choose the zero page
1818 /* Check the flags */
1819 unsigned UnusedRegs = D->OptFunc->UnusedRegs;
1820 if (UnusedRegs != REG_NONE &&
1821 (GetRegInfo (D->Code, D->OpIndex+1, UnusedRegs) & UnusedRegs) != 0) {
1822 /* Cannot optimize */
1825 if ((D->OptFunc->Flags & OP_A_KNOWN) != 0 &&
1826 RegValIsUnknown (D->OpEntry->RI->In.RegA)) {
1827 /* Cannot optimize */
1830 if ((D->OptFunc->Flags & OP_X_ZERO) != 0 &&
1831 D->OpEntry->RI->In.RegX != 0) {
1832 /* Cannot optimize */
1835 if ((D->OptFunc->Flags & OP_LHS_LOAD) != 0) {
1836 if (D->Lhs.A.LoadIndex < 0 || D->Lhs.X.LoadIndex < 0) {
1837 /* Cannot optimize */
1839 } else if ((D->OptFunc->Flags & OP_LHS_LOAD_DIRECT) != 0) {
1840 if ((D->Lhs.A.Flags & D->Lhs.X.Flags & LI_DIRECT) == 0) {
1841 /* Cannot optimize */
1846 if ((D->OptFunc->Flags & OP_RHS_LOAD) != 0) {
1847 if (D->Rhs.A.LoadIndex < 0 || D->Rhs.X.LoadIndex < 0) {
1848 /* Cannot optimize */
1850 } else if ((D->OptFunc->Flags & OP_RHS_LOAD_DIRECT) != 0) {
1851 if ((D->Rhs.A.Flags & D->Rhs.X.Flags & LI_DIRECT) == 0) {
1852 /* Cannot optimize */
1857 if ((D->Rhs.A.Flags | D->Rhs.X.Flags) & LI_DUP_LOAD) {
1858 /* Cannot optimize */
1862 /* Determine the zero page locations to use. We've tracked the used
1863 ** ZP locations, so try to find some for us that are unused.
1865 if ((D->ZPUsage & REG_PTR1) == REG_NONE) {
1868 } else if ((D->ZPUsage & REG_SREG) == REG_NONE) {
1871 } else if ((D->ZPUsage & REG_PTR2) == REG_NONE) {
1875 /* No registers available */
1879 /* Determine if we have a basic block */
1880 return CS_IsBasicBlock (D->Code, D->PushIndex, D->OpIndex);
1885 /*****************************************************************************/
1887 /*****************************************************************************/
1891 unsigned OptStackOps (CodeSeg* S)
1892 /* Optimize operations that take operands via the stack */
1894 unsigned Changes = 0; /* Number of changes in one run */
1897 int OldEntryCount; /* Old number of entries */
1898 unsigned UsedRegs = 0; /* Registers used */
1899 unsigned ChangedRegs = 0;/* Registers changed */
1907 } State = Initialize;
1910 /* Remember the code segment in the info struct */
1913 /* Look for a call to pushax followed by a call to some other function
1914 ** that takes it's first argument on the stack, and the second argument
1915 ** in the primary register.
1916 ** It depends on the code between the two if we can handle/transform the
1917 ** sequence, so check this code for the following list of things:
1919 ** - the range must be a basic block (one entry, one exit)
1920 ** - there may not be accesses to local variables with unknown
1921 ** offsets (because we have to adjust these offsets).
1922 ** - no subroutine calls
1925 ** Since we need a zero page register later, do also check the
1926 ** intermediate code for zero page use.
1929 while (I < (int)CS_GetEntryCount (S)) {
1931 /* Get the next entry */
1932 CodeEntry* E = CS_GetEntry (S, I);
1934 /* Actions depend on state */
1938 ResetStackOpData (&Data);
1939 UsedRegs = ChangedRegs = REG_NONE;
1944 /* While searching, track register load insns, so we can tell
1945 ** what is in a register once pushax is encountered.
1947 if (CE_HasLabel (E)) {
1948 /* Currently we don't track across branches */
1949 ClearLoadInfo (&Data.Lhs);
1951 if (CE_IsCallTo (E, "pushax")) {
1955 /* Track load insns */
1956 TrackLoads (&Data.Lhs, E, I);
1961 /* We' found a pushax before. Search for a stack op that may
1962 ** follow and in the meantime, track zeropage usage and check
1963 ** for code that will disable us from translating the sequence.
1965 if (CE_HasLabel (E)) {
1966 /* Currently we don't track across branches */
1967 ClearLoadInfo (&Data.Rhs);
1969 if (E->OPC == OP65_JSR) {
1971 /* Subroutine call: Check if this is one of the functions,
1972 ** we're going to replace.
1974 Data.OptFunc = FindFunc (E->Arg);
1976 /* Remember the op index and go on */
1981 } else if (!HarmlessCall (E->Arg)) {
1982 /* A call to an unkown subroutine: We need to start
1983 ** over after the last pushax. Note: This will also
1984 ** happen if we encounter a call to pushax!
1990 /* Track register usage */
1991 Data.ZPUsage |= (E->Use | E->Chg);
1992 TrackLoads (&Data.Rhs, E, I);
1995 } else if (E->Info & OF_STORE && (E->Chg & REG_ZP) == 0) {
1997 /* Too dangerous - there may be a change of a variable
1998 ** within the sequence.
2004 } else if ((E->Use & REG_SP) != 0 &&
2005 (E->AM != AM65_ZP_INDY ||
2006 RegValIsUnknown (E->RI->In.RegY) ||
2007 E->RI->In.RegY < 2)) {
2009 /* If we are using the stack, and we don't have "indirect Y"
2010 ** addressing mode, or the value of Y is unknown, or less
2011 ** than two, we cannot cope with this piece of code. Having
2012 ** an unknown value of Y means that we cannot correct the
2013 ** stack offset, while having an offset less than two means
2014 ** that the code works with the value on stack which is to
2022 /* Other stuff: Track register usage */
2023 Data.ZPUsage |= (E->Use | E->Chg);
2024 TrackLoads (&Data.Rhs, E, I);
2026 /* If the registers from the push (A/X) are used before they're
2027 ** changed, we cannot change the sequence, because this would
2028 ** with a high probability change the register contents.
2031 if ((UsedRegs & ~ChangedRegs) & REG_AX) {
2036 ChangedRegs |= E->Chg;
2040 /* Track zero page location usage beyond this point */
2041 Data.ZPUsage |= GetRegInfo (S, I, REG_SREG | REG_PTR1 | REG_PTR2);
2043 /* Finalize the load info */
2044 FinalizeLoadInfo (&Data.Lhs, S);
2045 FinalizeLoadInfo (&Data.Rhs, S);
2047 /* Check if the lhs loads from zeropage. If this is true, these
2048 ** zero page locations have to be added to ZPUsage, because
2049 ** they cannot be used for intermediate storage. In addition,
2050 ** if one of these zero page locations is destroyed between
2051 ** pushing the lhs and the actual operation, we cannot use the
2052 ** original zero page locations for the final op, but must
2053 ** use another ZP location to save them.
2055 ChangedRegs &= REG_ZP;
2056 if (Data.Lhs.A.LoadEntry && Data.Lhs.A.LoadEntry->AM == AM65_ZP) {
2057 Data.ZPUsage |= Data.Lhs.A.LoadEntry->Use;
2058 if ((Data.Lhs.A.LoadEntry->Use & ChangedRegs) != 0) {
2059 Data.Lhs.A.Flags &= ~(LI_DIRECT | LI_RELOAD_Y);
2062 if (Data.Lhs.X.LoadEntry && Data.Lhs.X.LoadEntry->AM == AM65_ZP) {
2063 Data.ZPUsage |= Data.Lhs.X.LoadEntry->Use;
2064 if ((Data.Lhs.X.LoadEntry->Use & ChangedRegs) != 0) {
2065 Data.Lhs.X.Flags &= ~(LI_DIRECT | LI_RELOAD_Y);
2069 /* Check the preconditions. If they aren't ok, reset the insn
2070 ** pointer to the pushax and start over. We will loose part of
2071 ** load tracking but at least a/x has probably lost between
2072 ** pushax and here and will be tracked again when restarting.
2074 if (!PreCondOk (&Data)) {
2080 /* Prepare the remainder of the data structure. */
2081 Data.PrevEntry = CS_GetPrevEntry (S, Data.PushIndex);
2082 Data.PushEntry = CS_GetEntry (S, Data.PushIndex);
2083 Data.OpEntry = CS_GetEntry (S, Data.OpIndex);
2084 Data.NextEntry = CS_GetNextEntry (S, Data.OpIndex);
2086 /* Remember the current number of code lines */
2087 OldEntryCount = CS_GetEntryCount (S);
2089 /* Adjust stack offsets to account for the upcoming removal */
2090 AdjustStackOffset (&Data, 2);
2092 /* Regenerate register info, since AdjustStackOffset changed
2097 /* Call the optimizer function */
2098 Changes += Data.OptFunc->Func (&Data);
2100 /* Since the function may have added or deleted entries,
2101 ** correct the index.
2103 I += CS_GetEntryCount (S) - OldEntryCount;
2105 /* Regenerate register info */
2119 /* Return the number of changes made */