1 /*****************************************************************************/
5 /* Optimize operations that take operands via the stack */
9 /* (C) 2001 Ullrich von Bassewitz */
11 /* D-70597 Stuttgart */
12 /* EMail: uz@cc65.org */
15 /* This software is provided 'as-is', without any expressed or implied */
16 /* warranty. In no event will the authors be held liable for any damages */
17 /* arising from the use of this software. */
19 /* Permission is granted to anyone to use this software for any purpose, */
20 /* including commercial applications, and to alter it and redistribute it */
21 /* freely, subject to the following restrictions: */
23 /* 1. The origin of this software must not be misrepresented; you must not */
24 /* claim that you wrote the original software. If you use this software */
25 /* in a product, an acknowledgment in the product documentation would be */
26 /* appreciated but is not required. */
27 /* 2. Altered source versions must be plainly marked as such, and must not */
28 /* be misrepresented as being the original software. */
29 /* 3. This notice may not be removed or altered from any source */
32 /*****************************************************************************/
50 /*****************************************************************************/
52 /*****************************************************************************/
56 static unsigned AdjustStackOffset (CodeSeg* S, unsigned Start, unsigned Stop,
58 /* Adjust the offset for all stack accesses in the range Start to Stop, both
59 * inclusive. The function returns the number of instructions that have been
63 /* Number of inserted instructions */
64 unsigned Inserted = 0;
66 /* Walk over all entries */
70 CodeEntry* E = CS_GetEntry (S, I);
72 if (E->Use & REG_SP) {
76 /* Check for some things that should not happen */
77 CHECK (E->AM == AM65_ZP_INDY || E->RI->In.RegY >= (short) Offs);
78 CHECK (strcmp (E->Arg, "sp") == 0);
80 /* Get the code entry before this one. If it's a LDY, adjust the
83 P = CS_GetPrevEntry (S, I);
84 if (P && P->OPC == OP65_LDY && CE_KnownImm (P)) {
86 /* The Y load is just before the stack access, adjust it */
87 CE_SetNumArg (P, P->Num - Offs);
91 /* Insert a new load instruction before the stack access */
94 xsprintf (Buf, sizeof (Buf), "$%02X", E->RI->In.RegY - Offs);
95 X = NewCodeEntry (OP65_LDY, AM65_IMM, Buf, 0, E->LI);
96 CS_InsertEntry (S, X, I);
98 /* One more inserted entries */
102 /* Be sure to skip the stack access for the next round */
113 /* Return the number of inserted entries */
119 /*****************************************************************************/
120 /* Actual optimization functions */
121 /*****************************************************************************/
125 static unsigned Opt_staspidx (CodeSeg* S, unsigned Push, unsigned Store,
126 const char* ZPLo, const char* ZPHi)
127 /* Optimize the staspidx sequence if possible */
130 CodeEntry* PushEntry;
131 CodeEntry* StoreEntry;
133 /* Get the push entry */
134 PushEntry = CS_GetEntry (S, Push);
136 /* Store the value into the zeropage instead of pushing it */
137 X = NewCodeEntry (OP65_STA, AM65_ZP, ZPLo, 0, PushEntry->LI);
138 CS_InsertEntry (S, X, Push+1);
139 X = NewCodeEntry (OP65_STX, AM65_ZP, ZPHi, 0, PushEntry->LI);
140 CS_InsertEntry (S, X, Push+2);
142 /* Correct the index of the store and get a pointer to the entry */
144 StoreEntry = CS_GetEntry (S, Store);
146 /* Inline the store */
147 X = NewCodeEntry (OP65_STA, AM65_ZP_INDY, ZPLo, 0, StoreEntry->LI);
148 CS_InsertEntry (S, X, Store+1);
150 /* Remove the push and the call to the staspidx function */
151 CS_DelEntry (S, Store);
152 CS_DelEntry (S, Push);
154 /* We changed the sequence */
160 static unsigned Opt_tosaddax (CodeSeg* S, unsigned Push, unsigned Add,
161 const char* ZPLo, const char* ZPHi)
162 /* Optimize the tosaddax sequence if possible */
167 CodeEntry* PushEntry;
172 /* We need the entry behind the add */
173 CHECK ((N = CS_GetNextEntry (S, Add)) != 0);
175 /* And the entry before the push */
176 CHECK ((P = CS_GetPrevEntry (S, Push)) != 0);
178 /* Get the push entry */
179 PushEntry = CS_GetEntry (S, Push);
181 /* Check the entry before the push, if it's a lda instruction with an
182 * addressing mode that does not use an additional index register. If
183 * so, we may use this location for the add and must not save the
184 * value in the zero page location.
186 DirectAdd = (P->OPC == OP65_LDA &&
187 (P->AM == AM65_IMM || P->AM == AM65_ZP || P->AM == AM65_ABS));
189 /* Store the value into the zeropage instead of pushing it */
190 X = NewCodeEntry (OP65_STX, AM65_ZP, ZPHi, 0, PushEntry->LI);
191 CS_InsertEntry (S, X, Push+1);
192 ++Add; /* Correct the index */
194 X = NewCodeEntry (OP65_STA, AM65_ZP, ZPLo, 0, PushEntry->LI);
195 CS_InsertEntry (S, X, Push+1);
196 ++Add; /* Correct the index */
199 /* Get a pointer to the add entry */
200 AddEntry = CS_GetEntry (S, Add);
203 X = NewCodeEntry (OP65_CLC, AM65_IMP, 0, 0, AddEntry->LI);
204 CS_InsertEntry (S, X, Add+1);
206 /* Add from variable location */
207 X = NewCodeEntry (OP65_ADC, P->AM, P->Arg, 0, AddEntry->LI);
209 /* Add from temp storage */
210 X = NewCodeEntry (OP65_ADC, AM65_ZP, ZPLo, 0, AddEntry->LI);
212 CS_InsertEntry (S, X, Add+2);
213 if (PushEntry->RI->In.RegX == 0) {
214 /* The high byte is the value in X plus the carry */
215 CodeLabel* L = CS_GenLabel (S, N);
216 X = NewCodeEntry (OP65_BCC, AM65_BRA, L->Name, L, AddEntry->LI);
217 CS_InsertEntry (S, X, Add+3);
218 X = NewCodeEntry (OP65_INX, AM65_IMP, 0, 0, AddEntry->LI);
219 CS_InsertEntry (S, X, Add+4);
220 } else if (AddEntry->RI->In.RegX == 0) {
221 /* The high byte is that of the first operand plus carry */
223 if (PushEntry->RI->In.RegX >= 0) {
224 /* Value of first op high byte is known */
226 xsprintf (Buf, sizeof (Buf), "$%02X", PushEntry->RI->In.RegX);
227 X = NewCodeEntry (OP65_LDX, AM65_IMM, Buf, 0, AddEntry->LI);
229 /* Value of first op high byte is unknown */
230 X = NewCodeEntry (OP65_LDX, AM65_ZP, ZPHi, 0, AddEntry->LI);
232 CS_InsertEntry (S, X, Add+3);
233 L = CS_GenLabel (S, N);
234 X = NewCodeEntry (OP65_BCC, AM65_BRA, L->Name, L, AddEntry->LI);
235 CS_InsertEntry (S, X, Add+4);
236 X = NewCodeEntry (OP65_INX, AM65_IMP, 0, 0, AddEntry->LI);
237 CS_InsertEntry (S, X, Add+5);
239 /* High byte is unknown */
240 X = NewCodeEntry (OP65_STA, AM65_ZP, ZPLo, 0, AddEntry->LI);
241 CS_InsertEntry (S, X, Add+3);
242 X = NewCodeEntry (OP65_TXA, AM65_IMP, 0, 0, AddEntry->LI);
243 CS_InsertEntry (S, X, Add+4);
244 X = NewCodeEntry (OP65_ADC, AM65_ZP, ZPHi, 0, AddEntry->LI);
245 CS_InsertEntry (S, X, Add+5);
246 X = NewCodeEntry (OP65_TAX, AM65_IMP, 0, 0, AddEntry->LI);
247 CS_InsertEntry (S, X, Add+6);
248 X = NewCodeEntry (OP65_LDA, AM65_ZP, ZPLo, 0, AddEntry->LI);
249 CS_InsertEntry (S, X, Add+7);
252 /* Remove the push and the call to the tosaddax function */
253 CS_DelEntry (S, Add);
254 CS_DelEntry (S, Push);
256 /* We changed the sequence */
262 static unsigned Opt_tosandax (CodeSeg* S, unsigned Push, unsigned And,
263 const char* ZPLo, const char* ZPHi)
264 /* Optimize the tosandax sequence if possible */
268 CodeEntry* PushEntry;
272 /* Get the entry before the push */
273 CHECK ((P = CS_GetPrevEntry (S, Push)) != 0);
275 /* Get the push entry */
276 PushEntry = CS_GetEntry (S, Push);
278 /* Check the entry before the push, if it's a lda instruction with an
279 * addressing mode that does not use an additional index register. If
280 * so, we may use this location for the and and must not save the
281 * value in the zero page location.
283 DirectAnd = (P->OPC == OP65_LDA &&
284 (P->AM == AM65_IMM || P->AM == AM65_ZP || P->AM == AM65_ABS));
286 /* Store the value into the zeropage instead of pushing it */
287 X = NewCodeEntry (OP65_STX, AM65_ZP, ZPHi, 0, PushEntry->LI);
288 CS_InsertEntry (S, X, Push+1);
289 ++And; /* Correct the index */
291 X = NewCodeEntry (OP65_STA, AM65_ZP, ZPLo, 0, PushEntry->LI);
292 CS_InsertEntry (S, X, Push+1);
293 ++And; /* Correct the index */
296 /* Get a pointer to the and entry */
297 AndEntry = CS_GetEntry (S, And);
301 /* And with variable location */
302 X = NewCodeEntry (OP65_AND, P->AM, P->Arg, 0, AndEntry->LI);
304 /* And with temp storage */
305 X = NewCodeEntry (OP65_AND, AM65_ZP, ZPLo, 0, AndEntry->LI);
307 CS_InsertEntry (S, X, And+1);
308 if (PushEntry->RI->In.RegX == 0 || AndEntry->RI->In.RegX == 0) {
309 /* The high byte is zero */
310 X = NewCodeEntry (OP65_LDX, AM65_IMM, "$00", 0, AndEntry->LI);
311 CS_InsertEntry (S, X, And+2);
313 /* High byte is unknown */
314 X = NewCodeEntry (OP65_STA, AM65_ZP, ZPLo, 0, AndEntry->LI);
315 CS_InsertEntry (S, X, And+2);
316 X = NewCodeEntry (OP65_TXA, AM65_IMP, 0, 0, AndEntry->LI);
317 CS_InsertEntry (S, X, And+3);
318 X = NewCodeEntry (OP65_AND, AM65_ZP, ZPHi, 0, AndEntry->LI);
319 CS_InsertEntry (S, X, And+4);
320 X = NewCodeEntry (OP65_TAX, AM65_IMP, 0, 0, AndEntry->LI);
321 CS_InsertEntry (S, X, And+5);
322 X = NewCodeEntry (OP65_LDA, AM65_ZP, ZPLo, 0, AndEntry->LI);
323 CS_InsertEntry (S, X, And+6);
326 /* Remove the push and the call to the tosandax function */
327 CS_DelEntry (S, And);
328 CS_DelEntry (S, Push);
330 /* We changed the sequence */
336 static unsigned Opt_tosorax (CodeSeg* S, unsigned Push, unsigned Or,
337 const char* ZPLo, const char* ZPHi)
338 /* Optimize the tosorax sequence if possible */
342 CodeEntry* PushEntry;
346 /* Get the entry before the push */
347 CHECK ((P = CS_GetPrevEntry (S, Push)) != 0);
349 /* Get the push entry */
350 PushEntry = CS_GetEntry (S, Push);
352 /* Check the entry before the push, if it's a lda instruction with an
353 * addressing mode that does not use an additional index register. If
354 * so, we may use this location for the or and must not save the
355 * value in the zero page location.
357 DirectOr = (P->OPC == OP65_LDA &&
358 (P->AM == AM65_IMM || P->AM == AM65_ZP || P->AM == AM65_ABS));
360 /* Store the value into the zeropage instead of pushing it */
361 X = NewCodeEntry (OP65_STX, AM65_ZP, ZPHi, 0, PushEntry->LI);
362 CS_InsertEntry (S, X, Push+1);
363 ++Or; /* Correct the index */
365 X = NewCodeEntry (OP65_STA, AM65_ZP, ZPLo, 0, PushEntry->LI);
366 CS_InsertEntry (S, X, Push+1);
367 ++Or; /* Correct the index */
370 /* Get a pointer to the or entry */
371 OrEntry = CS_GetEntry (S, Or);
375 /* Or with variable location */
376 X = NewCodeEntry (OP65_ORA, P->AM, P->Arg, 0, OrEntry->LI);
378 X = NewCodeEntry (OP65_ORA, AM65_ZP, ZPLo, 0, OrEntry->LI);
380 CS_InsertEntry (S, X, Or+1);
381 if (PushEntry->RI->In.RegX >= 0 && OrEntry->RI->In.RegX >= 0) {
382 /* Both values known, precalculate the result */
384 int Val = (PushEntry->RI->In.RegX | OrEntry->RI->In.RegX);
385 xsprintf (Buf, sizeof (Buf), "$%02X", Val);
386 X = NewCodeEntry (OP65_LDX, AM65_IMM, Buf, 0, OrEntry->LI);
387 CS_InsertEntry (S, X, Or+2);
388 } else if (PushEntry->RI->In.RegX != 0) {
389 /* High byte is unknown */
390 X = NewCodeEntry (OP65_STA, AM65_ZP, ZPLo, 0, OrEntry->LI);
391 CS_InsertEntry (S, X, Or+2);
392 X = NewCodeEntry (OP65_TXA, AM65_IMP, 0, 0, OrEntry->LI);
393 CS_InsertEntry (S, X, Or+3);
394 X = NewCodeEntry (OP65_ORA, AM65_ZP, ZPHi, 0, OrEntry->LI);
395 CS_InsertEntry (S, X, Or+4);
396 X = NewCodeEntry (OP65_TAX, AM65_IMP, 0, 0, OrEntry->LI);
397 CS_InsertEntry (S, X, Or+5);
398 X = NewCodeEntry (OP65_LDA, AM65_ZP, ZPLo, 0, OrEntry->LI);
399 CS_InsertEntry (S, X, Or+6);
402 /* Remove the push and the call to the tosandax function */
404 CS_DelEntry (S, Push);
406 /* We changed the sequence */
412 static unsigned Opt_tosxorax (CodeSeg* S, unsigned Push, unsigned Xor,
413 const char* ZPLo, const char* ZPHi)
414 /* Optimize the tosxorax sequence if possible */
418 CodeEntry* PushEntry;
422 /* Get the entry before the push */
423 CHECK ((P = CS_GetPrevEntry (S, Push)) != 0);
425 /* Get the push entry */
426 PushEntry = CS_GetEntry (S, Push);
428 /* Check the entry before the push, if it's a lda instruction with an
429 * addressing mode that does not use an additional index register. If
430 * so, we may use this location for the xor and must not save the
431 * value in the zero page location.
433 DirectXor = (P->OPC == OP65_LDA &&
434 (P->AM == AM65_IMM || P->AM == AM65_ZP || P->AM == AM65_ABS));
436 /* Store the value into the zeropage instead of pushing it */
437 X = NewCodeEntry (OP65_STX, AM65_ZP, ZPHi, 0, PushEntry->LI);
438 CS_InsertEntry (S, X, Push+1);
439 ++Xor; /* Correct the index */
441 X = NewCodeEntry (OP65_STA, AM65_ZP, ZPLo, 0, PushEntry->LI);
442 CS_InsertEntry (S, X, Push+1);
443 ++Xor; /* Correct the index */
446 /* Get a pointer to the entry */
447 XorEntry = CS_GetEntry (S, Xor);
451 /* Xor with variable location */
452 X = NewCodeEntry (OP65_EOR, P->AM, P->Arg, 0, XorEntry->LI);
454 /* Xor with temp storage */
455 X = NewCodeEntry (OP65_EOR, AM65_ZP, ZPLo, 0, XorEntry->LI);
457 CS_InsertEntry (S, X, Xor+1);
458 if (PushEntry->RI->In.RegX >= 0 && XorEntry->RI->In.RegX >= 0) {
459 /* Both values known, precalculate the result */
461 int Val = (PushEntry->RI->In.RegX ^ XorEntry->RI->In.RegX);
462 xsprintf (Buf, sizeof (Buf), "$%02X", Val);
463 X = NewCodeEntry (OP65_LDX, AM65_IMM, Buf, 0, XorEntry->LI);
464 CS_InsertEntry (S, X, Xor+2);
465 } else if (PushEntry->RI->In.RegX != 0) {
466 /* High byte is unknown */
467 X = NewCodeEntry (OP65_STA, AM65_ZP, ZPLo, 0, XorEntry->LI);
468 CS_InsertEntry (S, X, Xor+2);
469 X = NewCodeEntry (OP65_TXA, AM65_IMP, 0, 0, XorEntry->LI);
470 CS_InsertEntry (S, X, Xor+3);
471 X = NewCodeEntry (OP65_EOR, AM65_ZP, ZPHi, 0, XorEntry->LI);
472 CS_InsertEntry (S, X, Xor+4);
473 X = NewCodeEntry (OP65_TAX, AM65_IMP, 0, 0, XorEntry->LI);
474 CS_InsertEntry (S, X, Xor+5);
475 X = NewCodeEntry (OP65_LDA, AM65_ZP, ZPLo, 0, XorEntry->LI);
476 CS_InsertEntry (S, X, Xor+6);
479 /* Remove the push and the call to the tosandax function */
480 CS_DelEntry (S, Xor);
481 CS_DelEntry (S, Push);
483 /* We changed the sequence */
489 /*****************************************************************************/
491 /*****************************************************************************/
495 typedef unsigned (*OptFunc) (CodeSeg* S, unsigned Push, unsigned Store,
496 const char* ZPLo, const char* ZPHi);
497 typedef struct OptFuncDesc OptFuncDesc;
499 const char* Name; /* Name of the replaced runtime function */
500 OptFunc Func; /* Function pointer */
503 static const OptFuncDesc FuncTable[] = {
504 { "staspidx", Opt_staspidx },
505 { "tosaddax", Opt_tosaddax },
506 { "tosandax", Opt_tosandax },
507 { "tosorax", Opt_tosorax },
508 { "tosxorax", Opt_tosxorax },
510 #define FUNC_COUNT (sizeof(FuncTable) / sizeof(FuncTable[0]))
514 static int CmpFunc (const void* Key, const void* Func)
515 /* Compare function for bsearch */
517 return strcmp (Key, ((const OptFuncDesc*) Func)->Name);
522 static const OptFuncDesc* FindFunc (const char* Name)
523 /* Find the function with the given name. Return a pointer to the table entry
524 * or NULL if the function was not found.
527 return bsearch (Name, FuncTable, FUNC_COUNT, sizeof(OptFuncDesc), CmpFunc);
532 /*****************************************************************************/
534 /*****************************************************************************/
538 unsigned OptStackOps (CodeSeg* S)
539 /* Optimize operations that take operands via the stack */
541 unsigned Changes = 0; /* Number of changes in one run */
542 int InSeq = 0; /* Inside a sequence */
543 unsigned Push = 0; /* Index of pushax */
544 unsigned UsedRegs = 0; /* Zeropage registers used in sequence */
547 /* Generate register info */
550 /* Look for a call to pushax followed by a call to some other function
551 * that takes it's first argument on the stack, and the second argument
552 * in the primary register.
553 * It depends on the code between the two if we can handle/transform the
554 * sequence, so check this code for the following list of things:
556 * - there must not be a jump or conditional branch (this may
557 * get relaxed later).
558 * - there may not be accesses to local variables with unknown
559 * offsets (because we have to adjust these offsets).
560 * - no subroutine calls
563 * Since we need a zero page register later, do also check the
564 * intermediate code for zero page use.
567 while (I < CS_GetEntryCount (S)) {
569 /* Get the next entry */
570 CodeEntry* E = CS_GetEntry (S, I);
572 /* Handling depends if we're inside a sequence or not */
575 if ((E->Info & OF_BRA) != 0 ||
576 ((E->Use & REG_SP) != 0 &&
577 (E->AM != AM65_ZP_INDY || E->RI->In.RegY < 0)) ||
580 /* All this stuff is not allowed in a sequence */
583 } else if (E->OPC == OP65_JSR) {
585 /* Subroutine call: Check if this is one of our functions */
586 const OptFuncDesc* F = FindFunc (E->Arg);
589 /* Determine the register to use */
592 UsedRegs |= GetRegInfo (S, I+1, REG_SREG | REG_PTR1 | REG_PTR2);
593 if ((UsedRegs & REG_SREG) == REG_NONE) {
594 /* SREG is available */
597 } else if ((UsedRegs & REG_PTR1) == REG_NONE) {
600 } else if ((UsedRegs & REG_PTR2) == REG_NONE) {
604 /* No registers available */
609 /* If we have a register, call the optimizer function */
612 /* Adjust stack offsets */
613 unsigned Op = I + AdjustStackOffset (S, Push, I, 2);
615 /* Call the optimizer function */
616 Changes += F->Func (S, Push, Op, ZPLo, ZPHi);
618 /* Regenerate register info */
622 /* End of sequence */
625 } else if (strcmp (E->Arg, "pushax") == 0) {
626 /* Restart the sequence */
630 /* A call to an unkown subroutine ends the sequence */
636 /* Other stuff: Track zeropage register usage */
637 UsedRegs |= (E->Use | E->Chg);
641 } else if (CE_IsCall (E, "pushax")) {
643 /* This starts a sequence */
655 /* Free the register info */
658 /* Return the number of changes made */