1 /*****************************************************************************/
5 /* Optimize operations that take operands via the stack */
9 /* (C) 2001-2002 Ullrich von Bassewitz */
11 /* D-70597 Stuttgart */
12 /* EMail: uz@cc65.org */
15 /* This software is provided 'as-is', without any expressed or implied */
16 /* warranty. In no event will the authors be held liable for any damages */
17 /* arising from the use of this software. */
19 /* Permission is granted to anyone to use this software for any purpose, */
20 /* including commercial applications, and to alter it and redistribute it */
21 /* freely, subject to the following restrictions: */
23 /* 1. The origin of this software must not be misrepresented; you must not */
24 /* claim that you wrote the original software. If you use this software */
25 /* in a product, an acknowledgment in the product documentation would be */
26 /* appreciated but is not required. */
27 /* 2. Altered source versions must be plainly marked as such, and must not */
28 /* be misrepresented as being the original software. */
29 /* 3. This notice may not be removed or altered from any source */
32 /*****************************************************************************/
47 /*****************************************************************************/
49 /*****************************************************************************/
53 static unsigned AdjustStackOffset (CodeSeg* S, unsigned Start, unsigned Stop,
55 /* Adjust the offset for all stack accesses in the range Start to Stop, both
56 * inclusive. The function returns the number of instructions that have been
60 /* Number of inserted instructions */
61 unsigned Inserted = 0;
63 /* Walk over all entries */
67 CodeEntry* E = CS_GetEntry (S, I);
69 if (E->Use & REG_SP) {
73 /* Check for some things that should not happen */
74 CHECK (E->AM == AM65_ZP_INDY || E->RI->In.RegY >= (short) Offs);
75 CHECK (strcmp (E->Arg, "sp") == 0);
77 /* Get the code entry before this one. If it's a LDY, adjust the
80 P = CS_GetPrevEntry (S, I);
81 if (P && P->OPC == OP65_LDY && CE_KnownImm (P)) {
83 /* The Y load is just before the stack access, adjust it */
84 CE_SetNumArg (P, P->Num - Offs);
88 /* Insert a new load instruction before the stack access */
89 const char* Arg = MakeHexArg (E->RI->In.RegY - Offs);
90 CodeEntry* X = NewCodeEntry (OP65_LDY, AM65_IMM, Arg, 0, E->LI);
91 CS_InsertEntry (S, X, I);
93 /* One more inserted entries */
97 /* Be sure to skip the stack access for the next round */
108 /* Return the number of inserted entries */
114 /*****************************************************************************/
115 /* Actual optimization functions */
116 /*****************************************************************************/
120 static unsigned Opt_staspidx (CodeSeg* S, unsigned Push, unsigned Store,
121 const char* ZPLo, const char* ZPHi)
122 /* Optimize the staspidx sequence if possible */
125 CodeEntry* PushEntry;
126 CodeEntry* StoreEntry;
128 /* Get the push entry */
129 PushEntry = CS_GetEntry (S, Push);
131 /* Store the value into the zeropage instead of pushing it */
132 X = NewCodeEntry (OP65_STA, AM65_ZP, ZPLo, 0, PushEntry->LI);
133 CS_InsertEntry (S, X, Push+1);
134 X = NewCodeEntry (OP65_STX, AM65_ZP, ZPHi, 0, PushEntry->LI);
135 CS_InsertEntry (S, X, Push+2);
137 /* Correct the index of the store and get a pointer to the entry */
139 StoreEntry = CS_GetEntry (S, Store);
141 /* Inline the store */
142 X = NewCodeEntry (OP65_STA, AM65_ZP_INDY, ZPLo, 0, StoreEntry->LI);
143 CS_InsertEntry (S, X, Store+1);
145 /* Remove the push and the call to the staspidx function */
146 CS_DelEntry (S, Store);
147 CS_DelEntry (S, Push);
149 /* We changed the sequence */
155 static unsigned Opt_staxspidx (CodeSeg* S, unsigned Push, unsigned Store,
156 const char* ZPLo, const char* ZPHi)
157 /* Optimize the staxspidx sequence if possible */
160 CodeEntry* PushEntry;
161 CodeEntry* StoreEntry;
163 /* Get the push entry */
164 PushEntry = CS_GetEntry (S, Push);
166 /* Store the value into the zeropage instead of pushing it */
167 X = NewCodeEntry (OP65_STA, AM65_ZP, ZPLo, 0, PushEntry->LI);
168 CS_InsertEntry (S, X, Push+1);
169 X = NewCodeEntry (OP65_STX, AM65_ZP, ZPHi, 0, PushEntry->LI);
170 CS_InsertEntry (S, X, Push+2);
172 /* Correct the index of the store and get a pointer to the entry */
174 StoreEntry = CS_GetEntry (S, Store);
176 /* Inline the store */
177 X = NewCodeEntry (OP65_STA, AM65_ZP_INDY, ZPLo, 0, StoreEntry->LI);
178 CS_InsertEntry (S, X, Store+1);
179 X = NewCodeEntry (OP65_INY, AM65_IMP, 0, 0, StoreEntry->LI);
180 CS_InsertEntry (S, X, Store+2);
181 if (StoreEntry->RI->In.RegX >= 0) {
182 /* Value of X is known */
183 const char* Arg = MakeHexArg (StoreEntry->RI->In.RegX);
184 X = NewCodeEntry (OP65_LDA, AM65_IMM, Arg, 0, StoreEntry->LI);
187 X = NewCodeEntry (OP65_TXA, AM65_IMP, 0, 0, StoreEntry->LI);
189 CS_InsertEntry (S, X, Store+3);
190 X = NewCodeEntry (OP65_STA, AM65_ZP_INDY, ZPLo, 0, StoreEntry->LI);
191 CS_InsertEntry (S, X, Store+4);
193 /* Remove the push and the call to the staspidx function */
194 CS_DelEntry (S, Store);
195 CS_DelEntry (S, Push);
197 /* We changed the sequence */
203 static unsigned Opt_tosaddax (CodeSeg* S, unsigned Push, unsigned Add,
204 const char* ZPLo, const char* ZPHi)
205 /* Optimize the tosaddax sequence if possible */
210 CodeEntry* PushEntry;
215 /* We need the entry behind the add */
216 CHECK ((N = CS_GetNextEntry (S, Add)) != 0);
218 /* And the entry before the push */
219 CHECK ((P = CS_GetPrevEntry (S, Push)) != 0);
221 /* Get the push entry */
222 PushEntry = CS_GetEntry (S, Push);
224 /* Check the entry before the push, if it's a lda instruction with an
225 * addressing mode that does not use an additional index register. If
226 * so, we may use this location for the add and must not save the
227 * value in the zero page location.
229 DirectAdd = (P->OPC == OP65_LDA &&
230 (P->AM == AM65_IMM || P->AM == AM65_ZP || P->AM == AM65_ABS));
232 /* Store the value into the zeropage instead of pushing it */
233 X = NewCodeEntry (OP65_STX, AM65_ZP, ZPHi, 0, PushEntry->LI);
234 CS_InsertEntry (S, X, Push+1);
235 ++Add; /* Correct the index */
237 X = NewCodeEntry (OP65_STA, AM65_ZP, ZPLo, 0, PushEntry->LI);
238 CS_InsertEntry (S, X, Push+1);
239 ++Add; /* Correct the index */
242 /* Get a pointer to the add entry */
243 AddEntry = CS_GetEntry (S, Add);
246 X = NewCodeEntry (OP65_CLC, AM65_IMP, 0, 0, AddEntry->LI);
247 CS_InsertEntry (S, X, Add+1);
249 /* Add from variable location */
250 X = NewCodeEntry (OP65_ADC, P->AM, P->Arg, 0, AddEntry->LI);
252 /* Add from temp storage */
253 X = NewCodeEntry (OP65_ADC, AM65_ZP, ZPLo, 0, AddEntry->LI);
255 CS_InsertEntry (S, X, Add+2);
256 if (PushEntry->RI->In.RegX == 0) {
257 /* The high byte is the value in X plus the carry */
258 CodeLabel* L = CS_GenLabel (S, N);
259 X = NewCodeEntry (OP65_BCC, AM65_BRA, L->Name, L, AddEntry->LI);
260 CS_InsertEntry (S, X, Add+3);
261 X = NewCodeEntry (OP65_INX, AM65_IMP, 0, 0, AddEntry->LI);
262 CS_InsertEntry (S, X, Add+4);
263 } else if (AddEntry->RI->In.RegX == 0) {
264 /* The high byte is that of the first operand plus carry */
266 if (PushEntry->RI->In.RegX >= 0) {
267 /* Value of first op high byte is known */
268 const char* Arg = MakeHexArg (PushEntry->RI->In.RegX);
269 X = NewCodeEntry (OP65_LDX, AM65_IMM, Arg, 0, AddEntry->LI);
271 /* Value of first op high byte is unknown */
272 X = NewCodeEntry (OP65_LDX, AM65_ZP, ZPHi, 0, AddEntry->LI);
274 CS_InsertEntry (S, X, Add+3);
275 L = CS_GenLabel (S, N);
276 X = NewCodeEntry (OP65_BCC, AM65_BRA, L->Name, L, AddEntry->LI);
277 CS_InsertEntry (S, X, Add+4);
278 X = NewCodeEntry (OP65_INX, AM65_IMP, 0, 0, AddEntry->LI);
279 CS_InsertEntry (S, X, Add+5);
281 /* High byte is unknown */
282 X = NewCodeEntry (OP65_STA, AM65_ZP, ZPLo, 0, AddEntry->LI);
283 CS_InsertEntry (S, X, Add+3);
284 X = NewCodeEntry (OP65_TXA, AM65_IMP, 0, 0, AddEntry->LI);
285 CS_InsertEntry (S, X, Add+4);
286 X = NewCodeEntry (OP65_ADC, AM65_ZP, ZPHi, 0, AddEntry->LI);
287 CS_InsertEntry (S, X, Add+5);
288 X = NewCodeEntry (OP65_TAX, AM65_IMP, 0, 0, AddEntry->LI);
289 CS_InsertEntry (S, X, Add+6);
290 X = NewCodeEntry (OP65_LDA, AM65_ZP, ZPLo, 0, AddEntry->LI);
291 CS_InsertEntry (S, X, Add+7);
294 /* Remove the push and the call to the tosaddax function */
295 CS_DelEntry (S, Add);
296 CS_DelEntry (S, Push);
298 /* We changed the sequence */
304 static unsigned Opt_tosandax (CodeSeg* S, unsigned Push, unsigned And,
305 const char* ZPLo, const char* ZPHi)
306 /* Optimize the tosandax sequence if possible */
310 CodeEntry* PushEntry;
314 /* Get the entry before the push */
315 CHECK ((P = CS_GetPrevEntry (S, Push)) != 0);
317 /* Get the push entry */
318 PushEntry = CS_GetEntry (S, Push);
320 /* Check the entry before the push, if it's a lda instruction with an
321 * addressing mode that does not use an additional index register. If
322 * so, we may use this location for the and and must not save the
323 * value in the zero page location.
325 DirectAnd = (P->OPC == OP65_LDA &&
326 (P->AM == AM65_IMM || P->AM == AM65_ZP || P->AM == AM65_ABS));
328 /* Store the value into the zeropage instead of pushing it */
329 X = NewCodeEntry (OP65_STX, AM65_ZP, ZPHi, 0, PushEntry->LI);
330 CS_InsertEntry (S, X, Push+1);
331 ++And; /* Correct the index */
333 X = NewCodeEntry (OP65_STA, AM65_ZP, ZPLo, 0, PushEntry->LI);
334 CS_InsertEntry (S, X, Push+1);
335 ++And; /* Correct the index */
338 /* Get a pointer to the and entry */
339 AndEntry = CS_GetEntry (S, And);
343 /* And with variable location */
344 X = NewCodeEntry (OP65_AND, P->AM, P->Arg, 0, AndEntry->LI);
346 /* And with temp storage */
347 X = NewCodeEntry (OP65_AND, AM65_ZP, ZPLo, 0, AndEntry->LI);
349 CS_InsertEntry (S, X, And+1);
350 if (PushEntry->RI->In.RegX == 0 || AndEntry->RI->In.RegX == 0) {
351 /* The high byte is zero */
352 X = NewCodeEntry (OP65_LDX, AM65_IMM, "$00", 0, AndEntry->LI);
353 CS_InsertEntry (S, X, And+2);
355 /* High byte is unknown */
356 X = NewCodeEntry (OP65_STA, AM65_ZP, ZPLo, 0, AndEntry->LI);
357 CS_InsertEntry (S, X, And+2);
358 X = NewCodeEntry (OP65_TXA, AM65_IMP, 0, 0, AndEntry->LI);
359 CS_InsertEntry (S, X, And+3);
360 X = NewCodeEntry (OP65_AND, AM65_ZP, ZPHi, 0, AndEntry->LI);
361 CS_InsertEntry (S, X, And+4);
362 X = NewCodeEntry (OP65_TAX, AM65_IMP, 0, 0, AndEntry->LI);
363 CS_InsertEntry (S, X, And+5);
364 X = NewCodeEntry (OP65_LDA, AM65_ZP, ZPLo, 0, AndEntry->LI);
365 CS_InsertEntry (S, X, And+6);
368 /* Remove the push and the call to the tosandax function */
369 CS_DelEntry (S, And);
370 CS_DelEntry (S, Push);
372 /* We changed the sequence */
378 static unsigned Opt_tosorax (CodeSeg* S, unsigned Push, unsigned Or,
379 const char* ZPLo, const char* ZPHi)
380 /* Optimize the tosorax sequence if possible */
384 CodeEntry* PushEntry;
388 /* Get the entry before the push */
389 CHECK ((P = CS_GetPrevEntry (S, Push)) != 0);
391 /* Get the push entry */
392 PushEntry = CS_GetEntry (S, Push);
394 /* Check the entry before the push, if it's a lda instruction with an
395 * addressing mode that does not use an additional index register. If
396 * so, we may use this location for the or and must not save the
397 * value in the zero page location.
399 DirectOr = (P->OPC == OP65_LDA &&
400 (P->AM == AM65_IMM || P->AM == AM65_ZP || P->AM == AM65_ABS));
402 /* Store the value into the zeropage instead of pushing it */
403 X = NewCodeEntry (OP65_STX, AM65_ZP, ZPHi, 0, PushEntry->LI);
404 CS_InsertEntry (S, X, Push+1);
405 ++Or; /* Correct the index */
407 X = NewCodeEntry (OP65_STA, AM65_ZP, ZPLo, 0, PushEntry->LI);
408 CS_InsertEntry (S, X, Push+1);
409 ++Or; /* Correct the index */
412 /* Get a pointer to the or entry */
413 OrEntry = CS_GetEntry (S, Or);
417 /* Or with variable location */
418 X = NewCodeEntry (OP65_ORA, P->AM, P->Arg, 0, OrEntry->LI);
420 X = NewCodeEntry (OP65_ORA, AM65_ZP, ZPLo, 0, OrEntry->LI);
422 CS_InsertEntry (S, X, Or+1);
423 if (PushEntry->RI->In.RegX >= 0 && OrEntry->RI->In.RegX >= 0) {
424 /* Both values known, precalculate the result */
425 const char* Arg = MakeHexArg (PushEntry->RI->In.RegX | OrEntry->RI->In.RegX);
426 X = NewCodeEntry (OP65_LDX, AM65_IMM, Arg, 0, OrEntry->LI);
427 CS_InsertEntry (S, X, Or+2);
428 } else if (PushEntry->RI->In.RegX != 0) {
429 /* High byte is unknown */
430 X = NewCodeEntry (OP65_STA, AM65_ZP, ZPLo, 0, OrEntry->LI);
431 CS_InsertEntry (S, X, Or+2);
432 X = NewCodeEntry (OP65_TXA, AM65_IMP, 0, 0, OrEntry->LI);
433 CS_InsertEntry (S, X, Or+3);
434 X = NewCodeEntry (OP65_ORA, AM65_ZP, ZPHi, 0, OrEntry->LI);
435 CS_InsertEntry (S, X, Or+4);
436 X = NewCodeEntry (OP65_TAX, AM65_IMP, 0, 0, OrEntry->LI);
437 CS_InsertEntry (S, X, Or+5);
438 X = NewCodeEntry (OP65_LDA, AM65_ZP, ZPLo, 0, OrEntry->LI);
439 CS_InsertEntry (S, X, Or+6);
442 /* Remove the push and the call to the tosandax function */
444 CS_DelEntry (S, Push);
446 /* We changed the sequence */
452 static unsigned Opt_tosxorax (CodeSeg* S, unsigned Push, unsigned Xor,
453 const char* ZPLo, const char* ZPHi)
454 /* Optimize the tosxorax sequence if possible */
458 CodeEntry* PushEntry;
462 /* Get the entry before the push */
463 CHECK ((P = CS_GetPrevEntry (S, Push)) != 0);
465 /* Get the push entry */
466 PushEntry = CS_GetEntry (S, Push);
468 /* Check the entry before the push, if it's a lda instruction with an
469 * addressing mode that does not use an additional index register. If
470 * so, we may use this location for the xor and must not save the
471 * value in the zero page location.
473 DirectXor = (P->OPC == OP65_LDA &&
474 (P->AM == AM65_IMM || P->AM == AM65_ZP || P->AM == AM65_ABS));
476 /* Store the value into the zeropage instead of pushing it */
477 X = NewCodeEntry (OP65_STX, AM65_ZP, ZPHi, 0, PushEntry->LI);
478 CS_InsertEntry (S, X, Push+1);
479 ++Xor; /* Correct the index */
481 X = NewCodeEntry (OP65_STA, AM65_ZP, ZPLo, 0, PushEntry->LI);
482 CS_InsertEntry (S, X, Push+1);
483 ++Xor; /* Correct the index */
486 /* Get a pointer to the entry */
487 XorEntry = CS_GetEntry (S, Xor);
491 /* Xor with variable location */
492 X = NewCodeEntry (OP65_EOR, P->AM, P->Arg, 0, XorEntry->LI);
494 /* Xor with temp storage */
495 X = NewCodeEntry (OP65_EOR, AM65_ZP, ZPLo, 0, XorEntry->LI);
497 CS_InsertEntry (S, X, Xor+1);
498 if (PushEntry->RI->In.RegX >= 0 && XorEntry->RI->In.RegX >= 0) {
499 /* Both values known, precalculate the result */
500 const char* Arg = MakeHexArg (PushEntry->RI->In.RegX ^ XorEntry->RI->In.RegX);
501 X = NewCodeEntry (OP65_LDX, AM65_IMM, Arg, 0, XorEntry->LI);
502 CS_InsertEntry (S, X, Xor+2);
503 } else if (PushEntry->RI->In.RegX != 0) {
504 /* High byte is unknown */
505 X = NewCodeEntry (OP65_STA, AM65_ZP, ZPLo, 0, XorEntry->LI);
506 CS_InsertEntry (S, X, Xor+2);
507 X = NewCodeEntry (OP65_TXA, AM65_IMP, 0, 0, XorEntry->LI);
508 CS_InsertEntry (S, X, Xor+3);
509 X = NewCodeEntry (OP65_EOR, AM65_ZP, ZPHi, 0, XorEntry->LI);
510 CS_InsertEntry (S, X, Xor+4);
511 X = NewCodeEntry (OP65_TAX, AM65_IMP, 0, 0, XorEntry->LI);
512 CS_InsertEntry (S, X, Xor+5);
513 X = NewCodeEntry (OP65_LDA, AM65_ZP, ZPLo, 0, XorEntry->LI);
514 CS_InsertEntry (S, X, Xor+6);
517 /* Remove the push and the call to the tosandax function */
518 CS_DelEntry (S, Xor);
519 CS_DelEntry (S, Push);
521 /* We changed the sequence */
527 /*****************************************************************************/
529 /*****************************************************************************/
533 /* Flags for the functions */
535 STOP_NONE, /* Nothing special */
536 STOP_A_UNUSED /* Call only if a unused later */
540 typedef unsigned (*OptFunc) (CodeSeg* S, unsigned Push, unsigned Store,
541 const char* ZPLo, const char* ZPHi);
542 typedef struct OptFuncDesc OptFuncDesc;
544 const char* Name; /* Name of the replaced runtime function */
545 OptFunc Func; /* Function pointer */
546 STOP_FLAGS Flags; /* Flags */
549 static const OptFuncDesc FuncTable[] = {
550 { "staspidx", Opt_staspidx, STOP_NONE },
551 { "staxspidx", Opt_staxspidx, STOP_A_UNUSED },
552 { "tosaddax", Opt_tosaddax, STOP_NONE },
553 { "tosandax", Opt_tosandax, STOP_NONE },
554 { "tosorax", Opt_tosorax, STOP_NONE },
555 { "tosxorax", Opt_tosxorax, STOP_NONE },
557 #define FUNC_COUNT (sizeof(FuncTable) / sizeof(FuncTable[0]))
561 static int CmpFunc (const void* Key, const void* Func)
562 /* Compare function for bsearch */
564 return strcmp (Key, ((const OptFuncDesc*) Func)->Name);
569 static const OptFuncDesc* FindFunc (const char* Name)
570 /* Find the function with the given name. Return a pointer to the table entry
571 * or NULL if the function was not found.
574 return bsearch (Name, FuncTable, FUNC_COUNT, sizeof(OptFuncDesc), CmpFunc);
579 /*****************************************************************************/
581 /*****************************************************************************/
585 unsigned OptStackOps (CodeSeg* S)
586 /* Optimize operations that take operands via the stack */
588 unsigned Changes = 0; /* Number of changes in one run */
589 int InSeq = 0; /* Inside a sequence */
590 unsigned Push = 0; /* Index of pushax */
591 unsigned UsedRegs = 0; /* Zeropage registers used in sequence */
594 /* Generate register info */
597 /* Look for a call to pushax followed by a call to some other function
598 * that takes it's first argument on the stack, and the second argument
599 * in the primary register.
600 * It depends on the code between the two if we can handle/transform the
601 * sequence, so check this code for the following list of things:
603 * - there must not be a jump or conditional branch (this may
604 * get relaxed later).
605 * - there may not be accesses to local variables with unknown
606 * offsets (because we have to adjust these offsets).
607 * - no subroutine calls
610 * Since we need a zero page register later, do also check the
611 * intermediate code for zero page use.
614 while (I < CS_GetEntryCount (S)) {
616 /* Get the next entry */
617 CodeEntry* E = CS_GetEntry (S, I);
619 /* Handling depends if we're inside a sequence or not */
622 if ((E->Info & OF_BRA) != 0 ||
623 ((E->Use & REG_SP) != 0 &&
624 (E->AM != AM65_ZP_INDY || E->RI->In.RegY < 0)) ||
627 /* All this stuff is not allowed in a sequence */
630 } else if (E->OPC == OP65_JSR) {
632 /* Subroutine call: Check if this is one of our functions */
633 const OptFuncDesc* F = FindFunc (E->Arg);
636 const char* ZPLo = 0;
637 const char* ZPHi = 0;
640 /* Check the flags */
641 if (F->Flags & STOP_A_UNUSED) {
642 /* a must be unused later */
643 if (RegAUsed (S, I+1)) {
644 /* Cannot optimize */
649 /* Determine the zero page locations to use */
651 UsedRegs |= GetRegInfo (S, I+1, REG_SREG | REG_PTR1 | REG_PTR2);
652 if ((UsedRegs & REG_SREG) == REG_NONE) {
653 /* SREG is available */
656 } else if ((UsedRegs & REG_PTR1) == REG_NONE) {
659 } else if ((UsedRegs & REG_PTR2) == REG_NONE) {
663 /* No registers available */
668 /* If preconditions are ok, call the optimizer function */
671 /* Adjust stack offsets */
672 unsigned Op = I + AdjustStackOffset (S, Push, I, 2);
674 /* Call the optimizer function */
675 Changes += F->Func (S, Push, Op, ZPLo, ZPHi);
677 /* Regenerate register info */
681 /* End of sequence */
684 } else if (strcmp (E->Arg, "pushax") == 0) {
685 /* Restart the sequence */
689 /* A call to an unkown subroutine ends the sequence */
695 /* Other stuff: Track zeropage register usage */
696 UsedRegs |= (E->Use | E->Chg);
700 } else if (CE_IsCall (E, "pushax")) {
702 /* This starts a sequence */
714 /* Free the register info */
717 /* Return the number of changes made */