1 /*****************************************************************************/
5 /* Optimize stores through pointers */
9 /* (C) 2012, Ullrich von Bassewitz */
10 /* Roemerstrasse 52 */
11 /* D-70794 Filderstadt */
12 /* EMail: uz@cc65.org */
15 /* This software is provided 'as-is', without any expressed or implied */
16 /* warranty. In no event will the authors be held liable for any damages */
17 /* arising from the use of this software. */
19 /* Permission is granted to anyone to use this software for any purpose, */
20 /* including commercial applications, and to alter it and redistribute it */
21 /* freely, subject to the following restrictions: */
23 /* 1. The origin of this software must not be misrepresented; you must not */
24 /* claim that you wrote the original software. If you use this software */
25 /* in a product, an acknowledgment in the product documentation would be */
26 /* appreciated but is not required. */
27 /* 2. Altered source versions must be plainly marked as such, and must not */
28 /* be misrepresented as being the original software. */
29 /* 3. This notice may not be removed or altered from any source */
32 /*****************************************************************************/
46 #include "coptptrstore.h"
50 /*****************************************************************************/
51 /* Helper functions */
52 /*****************************************************************************/
56 static unsigned OptPtrStore1Sub (CodeSeg* S, unsigned I, CodeEntry** const L)
57 /* Check if this is one of the allowed suboperation for OptPtrStore1 */
59 /* Check for a label attached to the entry */
60 if (CE_HasLabel (L[0])) {
64 /* Check for single insn sub ops */
65 if (L[0]->OPC == OP65_AND ||
66 L[0]->OPC == OP65_EOR ||
67 L[0]->OPC == OP65_ORA ||
68 (L[0]->OPC == OP65_JSR && strncmp (L[0]->Arg, "shlax", 5) == 0) ||
69 (L[0]->OPC == OP65_JSR && strncmp (L[0]->Arg, "shrax", 5) == 0)) {
74 } else if (L[0]->OPC == OP65_CLC &&
75 (L[1] = CS_GetNextEntry (S, I)) != 0 &&
76 L[1]->OPC == OP65_ADC &&
77 !CE_HasLabel (L[1])) {
79 } else if (L[0]->OPC == OP65_SEC &&
80 (L[1] = CS_GetNextEntry (S, I)) != 0 &&
81 L[1]->OPC == OP65_SBC &&
82 !CE_HasLabel (L[1])) {
94 static const char* LoadAXZP (CodeSeg* S, unsigned I)
95 /* If the two instructions preceeding S/I are a load of A/X from a two byte
96 * zero byte location, return the name of the zero page location. Otherwise
104 CS_GetEntries (S, L, I-2, 2) &&
105 L[0]->OPC == OP65_LDA &&
106 L[0]->AM == AM65_ZP &&
107 L[1]->OPC == OP65_LDX &&
108 L[1]->AM == AM65_ZP &&
109 !CE_HasLabel (L[1]) &&
110 (Len = strlen (L[0]->Arg)) == strlen (L[1]->Arg) - 2 &&
111 memcmp (L[0]->Arg, L[1]->Arg, Len) == 0 &&
112 L[1]->Arg[Len] == '+' &&
113 L[1]->Arg[Len+1] == '1') {
115 /* Return the label */
128 static const char* LoadAXImm (CodeSeg* S, unsigned I)
129 /* If the instructions preceeding S/I are a load of A/X of a constant value
130 * or a word sized address label, return the address of the location as a
132 * Beware: In case of a numeric value, the result is returned in static
133 * storage which is overwritten with each call.
136 static StrBuf Buf = STATIC_STRBUF_INITIALIZER;
142 /* Fetch entry at I and check if A/X is known */
143 L[0] = CS_GetEntry (S, I);
145 RegValIsKnown (L[0]->RI->In.RegA) &&
146 RegValIsKnown (L[0]->RI->In.RegX)) {
148 /* Numeric argument - get low and high byte */
149 unsigned Lo = (L[0]->RI->In.RegA & 0xFF);
150 unsigned Hi = (L[0]->RI->In.RegX & 0xFF);
152 /* Format into buffer */
153 SB_Printf (&Buf, "$%04X", Lo | (Hi << 8));
155 /* Return the address as a string */
156 return SB_GetConstBuf (&Buf);
160 /* Search back for the two instructions loading A and X. Abort
161 * the search if the registers are changed in any other way or
162 * if a label is reached while we don't have both loads.
168 CodeEntry* E = CS_GetEntry (S, I);
170 /* Check for the loads of A and X */
171 if (ALoad == 0 && E->OPC == OP65_LDA && E->AM == AM65_IMM) {
173 } else if (E->Chg & REG_A) {
174 /* A is changed before we get the load */
176 } else if (XLoad == 0 && E->OPC == OP65_LDX && E->AM == AM65_IMM) {
178 } else if (E->Chg & REG_X) {
179 /* X is changed before we get the load */
183 if (ALoad != 0 && XLoad != 0) {
188 /* If we have a label, before both are found, bail out */
189 if (CE_HasLabel (E)) {
194 /* Check for a load of a label address */
195 if ((Len = strlen (ALoad->Arg)) > 3 &&
196 ALoad->Arg[0] == '<' &&
197 ALoad->Arg[1] == '(' &&
198 strlen (XLoad->Arg) == Len &&
199 XLoad->Arg[0] == '>' &&
200 memcmp (ALoad->Arg+1, XLoad->Arg+1, Len-1) == 0) {
202 /* Load of an address label */
203 SB_CopyBuf (&Buf, ALoad->Arg + 2, Len - 3);
205 return SB_GetConstBuf (&Buf);
214 /*****************************************************************************/
216 /*****************************************************************************/
220 unsigned OptPtrStore1 (CodeSeg* S)
221 /* Search for the sequence:
263 * depending on the code preceeding the sequence above.
266 unsigned Changes = 0;
269 /* Walk over the entries */
271 while (I < CS_GetEntryCount (S)) {
276 L[0] = CS_GetEntry (S, I);
278 /* Check for the sequence */
279 if (L[0]->OPC == OP65_CLC &&
280 CS_GetEntries (S, L+1, I+1, 8) &&
281 L[1]->OPC == OP65_ADC &&
282 (L[1]->AM == AM65_ABS ||
283 L[1]->AM == AM65_ZP ||
284 L[1]->AM == AM65_IMM ||
285 (L[1]->AM == AM65_ZP_INDY &&
286 RegValIsKnown (L[1]->RI->In.RegY))) &&
287 (L[2]->OPC == OP65_BCC || L[2]->OPC == OP65_JCC) &&
289 L[2]->JumpTo->Owner == L[4] &&
290 L[3]->OPC == OP65_INX &&
291 CE_IsCallTo (L[4], "pushax") &&
292 L[5]->OPC == OP65_LDX &&
293 L[6]->OPC == OP65_LDA &&
294 L[7]->OPC == OP65_LDY &&
295 CE_IsKnownImm (L[7], 0) &&
296 CE_IsCallTo (L[8], "staspidx") &&
297 !CS_RangeHasLabel (S, I+1, 3) &&
298 !CS_RangeHasLabel (S, I+5, 4)) {
304 /* Track the insertion point */
306 if ((Loc = LoadAXZP (S, I)) != 0) {
307 /* If the sequence is preceeded by a load of a ZP value,
308 * we can use this ZP value as a pointer using ZP
309 * indirect Y addressing.
312 } else if ((Loc = LoadAXImm (S, I)) != 0) {
313 /* If the sequence is preceeded by a load of an immediate
314 * value, we can use this absolute value as an address
315 * using absolute indexed Y addressing.
320 /* If we don't have a store location, we use ptr1 with zp
321 * indirect Y addressing. We must store the value in A/X into
330 X = NewCodeEntry (OP65_STA, AM65_ZP, "ptr1", 0, L[8]->LI);
331 CS_InsertEntry (S, X, IP++);
333 X = NewCodeEntry (OP65_STX, AM65_ZP, "ptr1+1", 0, L[8]->LI);
334 CS_InsertEntry (S, X, IP++);
338 /* If the index is loaded from (zp),y, we cannot do that directly.
339 * Note: In this case, the Y register will contain the correct
340 * value after removing the old code, so we don't need to load
343 if (L[1]->AM == AM65_ZP_INDY) {
344 X = NewCodeEntry (OP65_LDA, L[1]->AM, L[1]->Arg, 0, L[1]->LI);
345 CS_InsertEntry (S, X, IP++);
347 X = NewCodeEntry (OP65_TAY, AM65_IMP, 0, 0, L[1]->LI);
348 CS_InsertEntry (S, X, IP++);
350 X = NewCodeEntry (OP65_LDY, L[1]->AM, L[1]->Arg, 0, L[1]->LI);
351 CS_InsertEntry (S, X, IP++);
354 X = NewCodeEntry (OP65_LDX, L[5]->AM, L[5]->Arg, 0, L[5]->LI);
355 CS_InsertEntry (S, X, IP++);
357 X = NewCodeEntry (OP65_LDA, L[6]->AM, L[6]->Arg, 0, L[6]->LI);
358 CS_InsertEntry (S, X, IP++);
360 X = NewCodeEntry (OP65_STA, AM, Loc, 0, L[8]->LI);
361 CS_InsertEntry (S, X, IP++);
363 /* Remove the old code */
364 CS_DelEntries (S, I, 9);
366 /* Skip most of the generated replacement code */
369 /* Remember, we had changes */
379 /* Return the number of changes made */
385 unsigned OptPtrStore2 (CodeSeg* S)
386 /* Search for the sequence:
433 * depending on the code preceeding the sequence above.
436 unsigned Changes = 0;
439 /* Walk over the entries */
441 while (I < CS_GetEntryCount (S)) {
446 L[0] = CS_GetEntry (S, I);
448 /* Check for the sequence */
449 if (L[0]->OPC == OP65_CLC &&
450 CS_GetEntries (S, L+1, I+1, 9) &&
451 L[1]->OPC == OP65_ADC &&
452 (L[1]->AM == AM65_ABS ||
453 L[1]->AM == AM65_ZP ||
454 L[1]->AM == AM65_IMM ||
455 (L[1]->AM == AM65_ZP_INDY &&
456 RegValIsKnown (L[1]->RI->In.RegY))) &&
457 (L[2]->OPC == OP65_BCC || L[2]->OPC == OP65_JCC) &&
459 L[2]->JumpTo->Owner == L[4] &&
460 L[3]->OPC == OP65_INX &&
461 CE_IsCallTo (L[4], "pushax") &&
462 L[5]->OPC == OP65_LDY &&
463 CE_IsConstImm (L[5]) &&
464 L[6]->OPC == OP65_LDX &&
465 L[7]->OPC == OP65_LDA &&
466 L[7]->AM == AM65_ZP_INDY &&
467 strcmp (L[7]->Arg, "sp") == 0 &&
468 L[8]->OPC == OP65_LDY &&
469 (L[8]->AM == AM65_ABS ||
470 L[8]->AM == AM65_ZP ||
471 L[8]->AM == AM65_IMM) &&
472 CE_IsCallTo (L[9], "staspidx") &&
473 !CS_RangeHasLabel (S, I+1, 3) &&
474 !CS_RangeHasLabel (S, I+5, 5)) {
481 /* Track the insertion point */
482 unsigned IP = I + 10;
483 if ((Loc = LoadAXZP (S, I)) != 0) {
484 /* If the sequence is preceeded by a load of a ZP value,
485 * we can use this ZP value as a pointer using ZP
486 * indirect Y addressing.
489 } else if ((Loc = LoadAXImm (S, I)) != 0) {
490 /* If the sequence is preceeded by a load of an immediate
491 * value, we can use this absolute value as an address
492 * using absolute indexed Y addressing.
497 /* If we don't have a store location, we use ptr1 with zp
498 * indirect Y addressing. We must store the value in A/X into
507 X = NewCodeEntry (OP65_STA, AM65_ZP, "ptr1", 0, L[8]->LI);
508 CS_InsertEntry (S, X, IP++);
510 X = NewCodeEntry (OP65_STX, AM65_ZP, "ptr1+1", 0, L[8]->LI);
511 CS_InsertEntry (S, X, IP++);
515 /* Generate four different replacements depending on the addressing
516 * mode of the store and from where the index is loaded:
518 * 1. If the index is not loaded ZP indirect Y, we can use Y for
521 * 2. If the index is loaded ZP indirect Y and we store absolute
522 * indexed, we need Y to load the index and will therefore
523 * use X as index for the store. The disadvantage is that we
524 * need to reload X later.
526 * 3. If the index is loaded ZP indirect Y and we store ZP indirect
527 * Y, we must use Y for load and store and must therefore save
528 * the A register when loading Y the second time.
530 if (L[1]->AM != AM65_ZP_INDY) {
533 Arg = MakeHexArg (L[5]->Num - 2);
534 X = NewCodeEntry (OP65_LDY, AM65_IMM, Arg, 0, L[5]->LI);
535 CS_InsertEntry (S, X, IP++);
537 X = NewCodeEntry (OP65_LDX, L[6]->AM, L[6]->Arg, 0, L[6]->LI);
538 CS_InsertEntry (S, X, IP++);
540 X = NewCodeEntry (OP65_LDA, L[7]->AM, L[7]->Arg, 0, L[7]->LI);
541 CS_InsertEntry (S, X, IP++);
543 X = NewCodeEntry (OP65_LDY, L[1]->AM, L[1]->Arg, 0, L[1]->LI);
544 CS_InsertEntry (S, X, IP++);
546 X = NewCodeEntry (OP65_STA, AM, Loc, 0, L[9]->LI);
547 CS_InsertEntry (S, X, IP++);
549 } else if (AM == AM65_ABSY) {
552 X = NewCodeEntry (OP65_LDA, L[1]->AM, L[1]->Arg, 0, L[1]->LI);
553 CS_InsertEntry (S, X, IP++);
555 X = NewCodeEntry (OP65_TAX, AM65_IMP, 0, 0, L[1]->LI);
556 CS_InsertEntry (S, X, IP++);
558 Arg = MakeHexArg (L[5]->Num - 2);
559 X = NewCodeEntry (OP65_LDY, AM65_IMM, Arg, 0, L[5]->LI);
560 CS_InsertEntry (S, X, IP++);
562 X = NewCodeEntry (OP65_LDA, L[7]->AM, L[7]->Arg, 0, L[7]->LI);
563 CS_InsertEntry (S, X, IP++);
565 X = NewCodeEntry (OP65_STA, AM65_ABSX, Loc, 0, L[9]->LI);
566 CS_InsertEntry (S, X, IP++);
568 X = NewCodeEntry (OP65_LDX, L[6]->AM, L[6]->Arg, 0, L[6]->LI);
569 CS_InsertEntry (S, X, IP++);
574 Arg = MakeHexArg (L[5]->Num - 2);
575 X = NewCodeEntry (OP65_LDY, AM65_IMM, Arg, 0, L[5]->LI);
576 CS_InsertEntry (S, X, IP++);
578 X = NewCodeEntry (OP65_LDX, L[6]->AM, L[6]->Arg, 0, L[6]->LI);
579 CS_InsertEntry (S, X, IP++);
581 X = NewCodeEntry (OP65_LDA, L[7]->AM, L[7]->Arg, 0, L[7]->LI);
582 CS_InsertEntry (S, X, IP++);
584 X = NewCodeEntry (OP65_PHA, AM65_IMP, 0, 0, L[6]->LI);
585 CS_InsertEntry (S, X, IP++);
587 Arg = MakeHexArg (L[1]->RI->In.RegY);
588 X = NewCodeEntry (OP65_LDY, AM65_IMM, Arg, 0, L[1]->LI);
589 CS_InsertEntry (S, X, IP++);
591 X = NewCodeEntry (OP65_LDA, L[1]->AM, L[1]->Arg, 0, L[1]->LI);
592 CS_InsertEntry (S, X, IP++);
594 X = NewCodeEntry (OP65_TAY, AM65_IMP, 0, 0, L[1]->LI);
595 CS_InsertEntry (S, X, IP++);
597 X = NewCodeEntry (OP65_PLA, AM65_IMP, 0, 0, L[6]->LI);
598 CS_InsertEntry (S, X, IP++);
600 X = NewCodeEntry (OP65_STA, AM, Loc, 0, L[9]->LI);
601 CS_InsertEntry (S, X, IP++);
605 /* Remove the old code */
606 CS_DelEntries (S, I, 10);
608 /* Skip most of the generated replacement code */
611 /* Remember, we had changes */
621 /* Return the number of changes made */
627 unsigned OptPtrStore3 (CodeSeg* S)
628 /* Search for the sequence:
648 * In case a/x is loaded from the register bank before the pushax, we can even
649 * use the register bank instead of ptr1.
653 unsigned Changes = 0;
655 /* Walk over the entries */
657 while (I < CS_GetEntryCount (S)) {
663 L[0] = CS_GetEntry (S, I);
665 /* Check for the sequence */
666 if (CE_IsCallTo (L[0], "pushax") &&
667 CS_GetEntries (S, L+1, I+1, 3) &&
668 L[1]->OPC == OP65_LDY &&
669 CE_IsConstImm (L[1]) &&
670 !CE_HasLabel (L[1]) &&
671 CE_IsCallTo (L[2], "ldauidx") &&
672 !CE_HasLabel (L[2]) &&
673 (K = OptPtrStore1Sub (S, I+3, L+3)) > 0 &&
674 CS_GetEntries (S, L+3+K, I+3+K, 2) &&
675 L[3+K]->OPC == OP65_LDY &&
676 CE_IsConstImm (L[3+K]) &&
677 !CE_HasLabel (L[3+K]) &&
678 CE_IsCallTo (L[4+K], "staspidx") &&
679 !CE_HasLabel (L[4+K])) {
682 const char* RegBank = 0;
683 const char* ZPLoc = "ptr1";
687 /* Get the preceeding two instructions and check them. We check
694 P[0] = CS_GetEntry (S, I-2);
695 P[1] = CS_GetEntry (S, I-1);
696 if (P[0]->OPC == OP65_LDA &&
697 P[0]->AM == AM65_ZP &&
698 P[1]->OPC == OP65_LDX &&
699 P[1]->AM == AM65_ZP &&
700 !CE_HasLabel (P[1]) &&
701 strncmp (P[0]->Arg, "regbank+", 8) == 0) {
703 unsigned Len = strlen (P[0]->Arg);
705 if (strncmp (P[0]->Arg, P[1]->Arg, Len) == 0 &&
706 P[1]->Arg[Len+0] == '+' &&
707 P[1]->Arg[Len+1] == '1' &&
708 P[1]->Arg[Len+2] == '\0') {
710 /* Ok, found. Use the name of the register bank */
711 RegBank = ZPLoc = P[0]->Arg;
716 /* Insert the load via the zp pointer */
717 X = NewCodeEntry (OP65_LDX, AM65_IMM, "$00", 0, L[3]->LI);
718 CS_InsertEntry (S, X, I+3);
719 X = NewCodeEntry (OP65_LDA, AM65_ZP_INDY, ZPLoc, 0, L[2]->LI);
720 CS_InsertEntry (S, X, I+4);
722 /* Insert the store through the zp pointer */
723 X = NewCodeEntry (OP65_STA, AM65_ZP_INDY, ZPLoc, 0, L[3]->LI);
724 CS_InsertEntry (S, X, I+6+K);
726 /* Delete the old code */
727 CS_DelEntry (S, I+7+K); /* jsr spaspidx */
728 CS_DelEntry (S, I+2); /* jsr ldauidx */
730 /* Create and insert the stores into the zp pointer if needed */
732 X = NewCodeEntry (OP65_STA, AM65_ZP, "ptr1", 0, L[0]->LI);
733 CS_InsertEntry (S, X, I+1);
734 X = NewCodeEntry (OP65_STX, AM65_ZP, "ptr1+1", 0, L[0]->LI);
735 CS_InsertEntry (S, X, I+2);
738 /* Delete more old code. Do it here to keep a label attached to
741 CS_DelEntry (S, I); /* jsr pushax */
743 /* Remember, we had changes */
753 /* Return the number of changes made */