1 /*****************************************************************************/
5 /* Optimize operations that take operands via the stack */
9 /* (C) 2001 Ullrich von Bassewitz */
11 /* D-70597 Stuttgart */
12 /* EMail: uz@cc65.org */
15 /* This software is provided 'as-is', without any expressed or implied */
16 /* warranty. In no event will the authors be held liable for any damages */
17 /* arising from the use of this software. */
19 /* Permission is granted to anyone to use this software for any purpose, */
20 /* including commercial applications, and to alter it and redistribute it */
21 /* freely, subject to the following restrictions: */
23 /* 1. The origin of this software must not be misrepresented; you must not */
24 /* claim that you wrote the original software. If you use this software */
25 /* in a product, an acknowledgment in the product documentation would be */
26 /* appreciated but is not required. */
27 /* 2. Altered source versions must be plainly marked as such, and must not */
28 /* be misrepresented as being the original software. */
29 /* 3. This notice may not be removed or altered from any source */
32 /*****************************************************************************/
50 /*****************************************************************************/
52 /*****************************************************************************/
56 static unsigned AdjustStackOffset (CodeSeg* S, unsigned Start, unsigned Stop,
58 /* Adjust the offset for all stack accesses in the range Start to Stop, both
59 * inclusive. The function returns the number of instructions that have been
63 /* Number of inserted instructions */
64 unsigned Inserted = 0;
66 /* Walk over all entries */
70 CodeEntry* E = CS_GetEntry (S, I);
72 if (E->Use & REG_SP) {
76 /* Check for some things that should not happen */
77 CHECK (E->AM == AM65_ZP_INDY || E->RI->In.RegY >= (short) Offs);
78 CHECK (strcmp (E->Arg, "sp") == 0);
80 /* Get the code entry before this one. If it's a LDY, adjust the
83 P = CS_GetPrevEntry (S, I);
84 if (P && P->OPC == OP65_LDY && CE_KnownImm (P)) {
86 /* The Y load is just before the stack access, adjust it */
87 CE_SetNumArg (P, P->Num - Offs);
91 /* Insert a new load instruction before the stack access */
94 xsprintf (Buf, sizeof (Buf), "$%02X", E->RI->In.RegY - Offs);
95 X = NewCodeEntry (OP65_LDY, AM65_IMM, Buf, 0, E->LI);
96 CS_InsertEntry (S, X, I);
98 /* One more inserted entries */
102 /* Be sure to skip the stack access for the next round */
113 /* Return the number of inserted entries */
119 /*****************************************************************************/
120 /* Actual optimization functions */
121 /*****************************************************************************/
125 static unsigned Opt_staspidx (CodeSeg* S, unsigned Push, unsigned Store,
126 const char* ZPLo, const char* ZPHi)
127 /* Optimize the staspidx sequence if possible */
130 CodeEntry* PushEntry;
131 CodeEntry* StoreEntry;
133 /* Get the push entry */
134 PushEntry = CS_GetEntry (S, Push);
136 /* Store the value into the zeropage instead of pushing it */
137 X = NewCodeEntry (OP65_STA, AM65_ZP, ZPLo, 0, PushEntry->LI);
138 CS_InsertEntry (S, X, Push+1);
139 X = NewCodeEntry (OP65_STX, AM65_ZP, ZPHi, 0, PushEntry->LI);
140 CS_InsertEntry (S, X, Push+2);
142 /* Correct the index of the store and get a pointer to the entry */
144 StoreEntry = CS_GetEntry (S, Store);
146 /* Inline the store */
147 X = NewCodeEntry (OP65_STA, AM65_ZP_INDY, ZPLo, 0, StoreEntry->LI);
148 CS_InsertEntry (S, X, Store+1);
150 /* Remove the push and the call to the staspidx function */
151 CS_DelEntry (S, Store);
152 CS_DelEntry (S, Push);
154 /* We changed the sequence */
160 static unsigned Opt_staxspidx (CodeSeg* S, unsigned Push, unsigned Store,
161 const char* ZPLo, const char* ZPHi)
162 /* Optimize the staxspidx sequence if possible */
165 CodeEntry* PushEntry;
166 CodeEntry* StoreEntry;
168 /* Get the push entry */
169 PushEntry = CS_GetEntry (S, Push);
171 /* Store the value into the zeropage instead of pushing it */
172 X = NewCodeEntry (OP65_STA, AM65_ZP, ZPLo, 0, PushEntry->LI);
173 CS_InsertEntry (S, X, Push+1);
174 X = NewCodeEntry (OP65_STX, AM65_ZP, ZPHi, 0, PushEntry->LI);
175 CS_InsertEntry (S, X, Push+2);
177 /* Correct the index of the store and get a pointer to the entry */
179 StoreEntry = CS_GetEntry (S, Store);
181 /* Inline the store */
182 X = NewCodeEntry (OP65_STA, AM65_ZP_INDY, ZPLo, 0, StoreEntry->LI);
183 CS_InsertEntry (S, X, Store+1);
184 X = NewCodeEntry (OP65_INY, AM65_IMP, 0, 0, StoreEntry->LI);
185 CS_InsertEntry (S, X, Store+2);
186 if (StoreEntry->RI->In.RegX >= 0) {
187 /* Value of X is known */
189 xsprintf (Buf, sizeof (Buf), "$%02X", StoreEntry->RI->In.RegX);
190 X = NewCodeEntry (OP65_LDA, AM65_IMM, Buf, 0, StoreEntry->LI);
193 X = NewCodeEntry (OP65_TXA, AM65_IMP, 0, 0, StoreEntry->LI);
195 CS_InsertEntry (S, X, Store+3);
196 X = NewCodeEntry (OP65_STA, AM65_ZP_INDY, ZPLo, 0, StoreEntry->LI);
197 CS_InsertEntry (S, X, Store+4);
199 /* Remove the push and the call to the staspidx function */
200 CS_DelEntry (S, Store);
201 CS_DelEntry (S, Push);
203 /* We changed the sequence */
209 static unsigned Opt_tosaddax (CodeSeg* S, unsigned Push, unsigned Add,
210 const char* ZPLo, const char* ZPHi)
211 /* Optimize the tosaddax sequence if possible */
216 CodeEntry* PushEntry;
221 /* We need the entry behind the add */
222 CHECK ((N = CS_GetNextEntry (S, Add)) != 0);
224 /* And the entry before the push */
225 CHECK ((P = CS_GetPrevEntry (S, Push)) != 0);
227 /* Get the push entry */
228 PushEntry = CS_GetEntry (S, Push);
230 /* Check the entry before the push, if it's a lda instruction with an
231 * addressing mode that does not use an additional index register. If
232 * so, we may use this location for the add and must not save the
233 * value in the zero page location.
235 DirectAdd = (P->OPC == OP65_LDA &&
236 (P->AM == AM65_IMM || P->AM == AM65_ZP || P->AM == AM65_ABS));
238 /* Store the value into the zeropage instead of pushing it */
239 X = NewCodeEntry (OP65_STX, AM65_ZP, ZPHi, 0, PushEntry->LI);
240 CS_InsertEntry (S, X, Push+1);
241 ++Add; /* Correct the index */
243 X = NewCodeEntry (OP65_STA, AM65_ZP, ZPLo, 0, PushEntry->LI);
244 CS_InsertEntry (S, X, Push+1);
245 ++Add; /* Correct the index */
248 /* Get a pointer to the add entry */
249 AddEntry = CS_GetEntry (S, Add);
252 X = NewCodeEntry (OP65_CLC, AM65_IMP, 0, 0, AddEntry->LI);
253 CS_InsertEntry (S, X, Add+1);
255 /* Add from variable location */
256 X = NewCodeEntry (OP65_ADC, P->AM, P->Arg, 0, AddEntry->LI);
258 /* Add from temp storage */
259 X = NewCodeEntry (OP65_ADC, AM65_ZP, ZPLo, 0, AddEntry->LI);
261 CS_InsertEntry (S, X, Add+2);
262 if (PushEntry->RI->In.RegX == 0) {
263 /* The high byte is the value in X plus the carry */
264 CodeLabel* L = CS_GenLabel (S, N);
265 X = NewCodeEntry (OP65_BCC, AM65_BRA, L->Name, L, AddEntry->LI);
266 CS_InsertEntry (S, X, Add+3);
267 X = NewCodeEntry (OP65_INX, AM65_IMP, 0, 0, AddEntry->LI);
268 CS_InsertEntry (S, X, Add+4);
269 } else if (AddEntry->RI->In.RegX == 0) {
270 /* The high byte is that of the first operand plus carry */
272 if (PushEntry->RI->In.RegX >= 0) {
273 /* Value of first op high byte is known */
275 xsprintf (Buf, sizeof (Buf), "$%02X", PushEntry->RI->In.RegX);
276 X = NewCodeEntry (OP65_LDX, AM65_IMM, Buf, 0, AddEntry->LI);
278 /* Value of first op high byte is unknown */
279 X = NewCodeEntry (OP65_LDX, AM65_ZP, ZPHi, 0, AddEntry->LI);
281 CS_InsertEntry (S, X, Add+3);
282 L = CS_GenLabel (S, N);
283 X = NewCodeEntry (OP65_BCC, AM65_BRA, L->Name, L, AddEntry->LI);
284 CS_InsertEntry (S, X, Add+4);
285 X = NewCodeEntry (OP65_INX, AM65_IMP, 0, 0, AddEntry->LI);
286 CS_InsertEntry (S, X, Add+5);
288 /* High byte is unknown */
289 X = NewCodeEntry (OP65_STA, AM65_ZP, ZPLo, 0, AddEntry->LI);
290 CS_InsertEntry (S, X, Add+3);
291 X = NewCodeEntry (OP65_TXA, AM65_IMP, 0, 0, AddEntry->LI);
292 CS_InsertEntry (S, X, Add+4);
293 X = NewCodeEntry (OP65_ADC, AM65_ZP, ZPHi, 0, AddEntry->LI);
294 CS_InsertEntry (S, X, Add+5);
295 X = NewCodeEntry (OP65_TAX, AM65_IMP, 0, 0, AddEntry->LI);
296 CS_InsertEntry (S, X, Add+6);
297 X = NewCodeEntry (OP65_LDA, AM65_ZP, ZPLo, 0, AddEntry->LI);
298 CS_InsertEntry (S, X, Add+7);
301 /* Remove the push and the call to the tosaddax function */
302 CS_DelEntry (S, Add);
303 CS_DelEntry (S, Push);
305 /* We changed the sequence */
311 static unsigned Opt_tosandax (CodeSeg* S, unsigned Push, unsigned And,
312 const char* ZPLo, const char* ZPHi)
313 /* Optimize the tosandax sequence if possible */
317 CodeEntry* PushEntry;
321 /* Get the entry before the push */
322 CHECK ((P = CS_GetPrevEntry (S, Push)) != 0);
324 /* Get the push entry */
325 PushEntry = CS_GetEntry (S, Push);
327 /* Check the entry before the push, if it's a lda instruction with an
328 * addressing mode that does not use an additional index register. If
329 * so, we may use this location for the and and must not save the
330 * value in the zero page location.
332 DirectAnd = (P->OPC == OP65_LDA &&
333 (P->AM == AM65_IMM || P->AM == AM65_ZP || P->AM == AM65_ABS));
335 /* Store the value into the zeropage instead of pushing it */
336 X = NewCodeEntry (OP65_STX, AM65_ZP, ZPHi, 0, PushEntry->LI);
337 CS_InsertEntry (S, X, Push+1);
338 ++And; /* Correct the index */
340 X = NewCodeEntry (OP65_STA, AM65_ZP, ZPLo, 0, PushEntry->LI);
341 CS_InsertEntry (S, X, Push+1);
342 ++And; /* Correct the index */
345 /* Get a pointer to the and entry */
346 AndEntry = CS_GetEntry (S, And);
350 /* And with variable location */
351 X = NewCodeEntry (OP65_AND, P->AM, P->Arg, 0, AndEntry->LI);
353 /* And with temp storage */
354 X = NewCodeEntry (OP65_AND, AM65_ZP, ZPLo, 0, AndEntry->LI);
356 CS_InsertEntry (S, X, And+1);
357 if (PushEntry->RI->In.RegX == 0 || AndEntry->RI->In.RegX == 0) {
358 /* The high byte is zero */
359 X = NewCodeEntry (OP65_LDX, AM65_IMM, "$00", 0, AndEntry->LI);
360 CS_InsertEntry (S, X, And+2);
362 /* High byte is unknown */
363 X = NewCodeEntry (OP65_STA, AM65_ZP, ZPLo, 0, AndEntry->LI);
364 CS_InsertEntry (S, X, And+2);
365 X = NewCodeEntry (OP65_TXA, AM65_IMP, 0, 0, AndEntry->LI);
366 CS_InsertEntry (S, X, And+3);
367 X = NewCodeEntry (OP65_AND, AM65_ZP, ZPHi, 0, AndEntry->LI);
368 CS_InsertEntry (S, X, And+4);
369 X = NewCodeEntry (OP65_TAX, AM65_IMP, 0, 0, AndEntry->LI);
370 CS_InsertEntry (S, X, And+5);
371 X = NewCodeEntry (OP65_LDA, AM65_ZP, ZPLo, 0, AndEntry->LI);
372 CS_InsertEntry (S, X, And+6);
375 /* Remove the push and the call to the tosandax function */
376 CS_DelEntry (S, And);
377 CS_DelEntry (S, Push);
379 /* We changed the sequence */
385 static unsigned Opt_tosorax (CodeSeg* S, unsigned Push, unsigned Or,
386 const char* ZPLo, const char* ZPHi)
387 /* Optimize the tosorax sequence if possible */
391 CodeEntry* PushEntry;
395 /* Get the entry before the push */
396 CHECK ((P = CS_GetPrevEntry (S, Push)) != 0);
398 /* Get the push entry */
399 PushEntry = CS_GetEntry (S, Push);
401 /* Check the entry before the push, if it's a lda instruction with an
402 * addressing mode that does not use an additional index register. If
403 * so, we may use this location for the or and must not save the
404 * value in the zero page location.
406 DirectOr = (P->OPC == OP65_LDA &&
407 (P->AM == AM65_IMM || P->AM == AM65_ZP || P->AM == AM65_ABS));
409 /* Store the value into the zeropage instead of pushing it */
410 X = NewCodeEntry (OP65_STX, AM65_ZP, ZPHi, 0, PushEntry->LI);
411 CS_InsertEntry (S, X, Push+1);
412 ++Or; /* Correct the index */
414 X = NewCodeEntry (OP65_STA, AM65_ZP, ZPLo, 0, PushEntry->LI);
415 CS_InsertEntry (S, X, Push+1);
416 ++Or; /* Correct the index */
419 /* Get a pointer to the or entry */
420 OrEntry = CS_GetEntry (S, Or);
424 /* Or with variable location */
425 X = NewCodeEntry (OP65_ORA, P->AM, P->Arg, 0, OrEntry->LI);
427 X = NewCodeEntry (OP65_ORA, AM65_ZP, ZPLo, 0, OrEntry->LI);
429 CS_InsertEntry (S, X, Or+1);
430 if (PushEntry->RI->In.RegX >= 0 && OrEntry->RI->In.RegX >= 0) {
431 /* Both values known, precalculate the result */
433 int Val = (PushEntry->RI->In.RegX | OrEntry->RI->In.RegX);
434 xsprintf (Buf, sizeof (Buf), "$%02X", Val);
435 X = NewCodeEntry (OP65_LDX, AM65_IMM, Buf, 0, OrEntry->LI);
436 CS_InsertEntry (S, X, Or+2);
437 } else if (PushEntry->RI->In.RegX != 0) {
438 /* High byte is unknown */
439 X = NewCodeEntry (OP65_STA, AM65_ZP, ZPLo, 0, OrEntry->LI);
440 CS_InsertEntry (S, X, Or+2);
441 X = NewCodeEntry (OP65_TXA, AM65_IMP, 0, 0, OrEntry->LI);
442 CS_InsertEntry (S, X, Or+3);
443 X = NewCodeEntry (OP65_ORA, AM65_ZP, ZPHi, 0, OrEntry->LI);
444 CS_InsertEntry (S, X, Or+4);
445 X = NewCodeEntry (OP65_TAX, AM65_IMP, 0, 0, OrEntry->LI);
446 CS_InsertEntry (S, X, Or+5);
447 X = NewCodeEntry (OP65_LDA, AM65_ZP, ZPLo, 0, OrEntry->LI);
448 CS_InsertEntry (S, X, Or+6);
451 /* Remove the push and the call to the tosandax function */
453 CS_DelEntry (S, Push);
455 /* We changed the sequence */
461 static unsigned Opt_tosxorax (CodeSeg* S, unsigned Push, unsigned Xor,
462 const char* ZPLo, const char* ZPHi)
463 /* Optimize the tosxorax sequence if possible */
467 CodeEntry* PushEntry;
471 /* Get the entry before the push */
472 CHECK ((P = CS_GetPrevEntry (S, Push)) != 0);
474 /* Get the push entry */
475 PushEntry = CS_GetEntry (S, Push);
477 /* Check the entry before the push, if it's a lda instruction with an
478 * addressing mode that does not use an additional index register. If
479 * so, we may use this location for the xor and must not save the
480 * value in the zero page location.
482 DirectXor = (P->OPC == OP65_LDA &&
483 (P->AM == AM65_IMM || P->AM == AM65_ZP || P->AM == AM65_ABS));
485 /* Store the value into the zeropage instead of pushing it */
486 X = NewCodeEntry (OP65_STX, AM65_ZP, ZPHi, 0, PushEntry->LI);
487 CS_InsertEntry (S, X, Push+1);
488 ++Xor; /* Correct the index */
490 X = NewCodeEntry (OP65_STA, AM65_ZP, ZPLo, 0, PushEntry->LI);
491 CS_InsertEntry (S, X, Push+1);
492 ++Xor; /* Correct the index */
495 /* Get a pointer to the entry */
496 XorEntry = CS_GetEntry (S, Xor);
500 /* Xor with variable location */
501 X = NewCodeEntry (OP65_EOR, P->AM, P->Arg, 0, XorEntry->LI);
503 /* Xor with temp storage */
504 X = NewCodeEntry (OP65_EOR, AM65_ZP, ZPLo, 0, XorEntry->LI);
506 CS_InsertEntry (S, X, Xor+1);
507 if (PushEntry->RI->In.RegX >= 0 && XorEntry->RI->In.RegX >= 0) {
508 /* Both values known, precalculate the result */
510 int Val = (PushEntry->RI->In.RegX ^ XorEntry->RI->In.RegX);
511 xsprintf (Buf, sizeof (Buf), "$%02X", Val);
512 X = NewCodeEntry (OP65_LDX, AM65_IMM, Buf, 0, XorEntry->LI);
513 CS_InsertEntry (S, X, Xor+2);
514 } else if (PushEntry->RI->In.RegX != 0) {
515 /* High byte is unknown */
516 X = NewCodeEntry (OP65_STA, AM65_ZP, ZPLo, 0, XorEntry->LI);
517 CS_InsertEntry (S, X, Xor+2);
518 X = NewCodeEntry (OP65_TXA, AM65_IMP, 0, 0, XorEntry->LI);
519 CS_InsertEntry (S, X, Xor+3);
520 X = NewCodeEntry (OP65_EOR, AM65_ZP, ZPHi, 0, XorEntry->LI);
521 CS_InsertEntry (S, X, Xor+4);
522 X = NewCodeEntry (OP65_TAX, AM65_IMP, 0, 0, XorEntry->LI);
523 CS_InsertEntry (S, X, Xor+5);
524 X = NewCodeEntry (OP65_LDA, AM65_ZP, ZPLo, 0, XorEntry->LI);
525 CS_InsertEntry (S, X, Xor+6);
528 /* Remove the push and the call to the tosandax function */
529 CS_DelEntry (S, Xor);
530 CS_DelEntry (S, Push);
532 /* We changed the sequence */
538 /*****************************************************************************/
540 /*****************************************************************************/
544 /* Flags for the functions */
546 STOP_NONE, /* Nothing special */
547 STOP_A_UNUSED /* Call only if a unused later */
551 typedef unsigned (*OptFunc) (CodeSeg* S, unsigned Push, unsigned Store,
552 const char* ZPLo, const char* ZPHi);
553 typedef struct OptFuncDesc OptFuncDesc;
555 const char* Name; /* Name of the replaced runtime function */
556 OptFunc Func; /* Function pointer */
557 STOP_FLAGS Flags; /* Flags */
560 static const OptFuncDesc FuncTable[] = {
561 { "staspidx", Opt_staspidx, STOP_NONE },
562 { "staxspidx", Opt_staxspidx, STOP_A_UNUSED },
563 { "tosaddax", Opt_tosaddax, STOP_NONE },
564 { "tosandax", Opt_tosandax, STOP_NONE },
565 { "tosorax", Opt_tosorax, STOP_NONE },
566 { "tosxorax", Opt_tosxorax, STOP_NONE },
568 #define FUNC_COUNT (sizeof(FuncTable) / sizeof(FuncTable[0]))
572 static int CmpFunc (const void* Key, const void* Func)
573 /* Compare function for bsearch */
575 return strcmp (Key, ((const OptFuncDesc*) Func)->Name);
580 static const OptFuncDesc* FindFunc (const char* Name)
581 /* Find the function with the given name. Return a pointer to the table entry
582 * or NULL if the function was not found.
585 return bsearch (Name, FuncTable, FUNC_COUNT, sizeof(OptFuncDesc), CmpFunc);
590 /*****************************************************************************/
592 /*****************************************************************************/
596 unsigned OptStackOps (CodeSeg* S)
597 /* Optimize operations that take operands via the stack */
599 unsigned Changes = 0; /* Number of changes in one run */
600 int InSeq = 0; /* Inside a sequence */
601 unsigned Push = 0; /* Index of pushax */
602 unsigned UsedRegs = 0; /* Zeropage registers used in sequence */
605 /* Generate register info */
608 /* Look for a call to pushax followed by a call to some other function
609 * that takes it's first argument on the stack, and the second argument
610 * in the primary register.
611 * It depends on the code between the two if we can handle/transform the
612 * sequence, so check this code for the following list of things:
614 * - there must not be a jump or conditional branch (this may
615 * get relaxed later).
616 * - there may not be accesses to local variables with unknown
617 * offsets (because we have to adjust these offsets).
618 * - no subroutine calls
621 * Since we need a zero page register later, do also check the
622 * intermediate code for zero page use.
625 while (I < CS_GetEntryCount (S)) {
627 /* Get the next entry */
628 CodeEntry* E = CS_GetEntry (S, I);
630 /* Handling depends if we're inside a sequence or not */
633 if ((E->Info & OF_BRA) != 0 ||
634 ((E->Use & REG_SP) != 0 &&
635 (E->AM != AM65_ZP_INDY || E->RI->In.RegY < 0)) ||
638 /* All this stuff is not allowed in a sequence */
641 } else if (E->OPC == OP65_JSR) {
643 /* Subroutine call: Check if this is one of our functions */
644 const OptFuncDesc* F = FindFunc (E->Arg);
647 const char* ZPLo = 0;
648 const char* ZPHi = 0;
651 /* Check the flags */
652 if (F->Flags & STOP_A_UNUSED) {
653 /* a must be unused later */
654 if (RegAUsed (S, I+1)) {
655 /* Cannot optimize */
660 /* Determine the zero page locations to use */
662 UsedRegs |= GetRegInfo (S, I+1, REG_SREG | REG_PTR1 | REG_PTR2);
663 if ((UsedRegs & REG_SREG) == REG_NONE) {
664 /* SREG is available */
667 } else if ((UsedRegs & REG_PTR1) == REG_NONE) {
670 } else if ((UsedRegs & REG_PTR2) == REG_NONE) {
674 /* No registers available */
679 /* If preconditions are ok, call the optimizer function */
682 /* Adjust stack offsets */
683 unsigned Op = I + AdjustStackOffset (S, Push, I, 2);
685 /* Call the optimizer function */
686 Changes += F->Func (S, Push, Op, ZPLo, ZPHi);
688 /* Regenerate register info */
692 /* End of sequence */
695 } else if (strcmp (E->Arg, "pushax") == 0) {
696 /* Restart the sequence */
700 /* A call to an unkown subroutine ends the sequence */
706 /* Other stuff: Track zeropage register usage */
707 UsedRegs |= (E->Use | E->Chg);
711 } else if (CE_IsCall (E, "pushax")) {
713 /* This starts a sequence */
725 /* Free the register info */
728 /* Return the number of changes made */