1 /*****************************************************************************/
5 /* Optimize stores through pointers */
9 /* (C) 2012, Ullrich von Bassewitz */
10 /* Roemerstrasse 52 */
11 /* D-70794 Filderstadt */
12 /* EMail: uz@cc65.org */
15 /* This software is provided 'as-is', without any expressed or implied */
16 /* warranty. In no event will the authors be held liable for any damages */
17 /* arising from the use of this software. */
19 /* Permission is granted to anyone to use this software for any purpose, */
20 /* including commercial applications, and to alter it and redistribute it */
21 /* freely, subject to the following restrictions: */
23 /* 1. The origin of this software must not be misrepresented; you must not */
24 /* claim that you wrote the original software. If you use this software */
25 /* in a product, an acknowledgment in the product documentation would be */
26 /* appreciated but is not required. */
27 /* 2. Altered source versions must be plainly marked as such, and must not */
28 /* be misrepresented as being the original software. */
29 /* 3. This notice may not be removed or altered from any source */
32 /*****************************************************************************/
47 #include "coptptrstore.h"
51 /*****************************************************************************/
52 /* Helper functions */
53 /*****************************************************************************/
57 static unsigned OptPtrStore1Sub (CodeSeg* S, unsigned I, CodeEntry** const L)
58 /* Check if this is one of the allowed suboperation for OptPtrStore1 */
60 /* Check for a label attached to the entry */
61 if (CE_HasLabel (L[0])) {
65 /* Check for single insn sub ops */
66 if (L[0]->OPC == OP65_AND ||
67 L[0]->OPC == OP65_EOR ||
68 L[0]->OPC == OP65_ORA ||
69 (L[0]->OPC == OP65_JSR &&
70 (strncmp (L[0]->Arg, "shlax", 5) == 0 ||
71 strncmp (L[0]->Arg, "shrax", 5) == 0) &&
72 strlen (L[0]->Arg) == 6 &&
73 IsDigit (L[0]->Arg[5]))) {
78 } else if (L[0]->OPC == OP65_CLC &&
79 (L[1] = CS_GetNextEntry (S, I)) != 0 &&
80 L[1]->OPC == OP65_ADC &&
81 !CE_HasLabel (L[1])) {
83 } else if (L[0]->OPC == OP65_SEC &&
84 (L[1] = CS_GetNextEntry (S, I)) != 0 &&
85 L[1]->OPC == OP65_SBC &&
86 !CE_HasLabel (L[1])) {
98 static const char* LoadAXZP (CodeSeg* S, unsigned I)
99 /* If the two instructions preceeding S/I are a load of A/X from a two byte
100 * zero byte location, return the name of the zero page location. Otherwise
108 CS_GetEntries (S, L, I-2, 2) &&
109 L[0]->OPC == OP65_LDA &&
110 L[0]->AM == AM65_ZP &&
111 L[1]->OPC == OP65_LDX &&
112 L[1]->AM == AM65_ZP &&
113 !CE_HasLabel (L[1]) &&
114 (Len = strlen (L[0]->Arg)) == strlen (L[1]->Arg) - 2 &&
115 memcmp (L[0]->Arg, L[1]->Arg, Len) == 0 &&
116 L[1]->Arg[Len] == '+' &&
117 L[1]->Arg[Len+1] == '1') {
119 /* Return the label */
132 static const char* LoadAXImm (CodeSeg* S, unsigned I)
133 /* If the instructions preceeding S/I are a load of A/X of a constant value
134 * or a word sized address label, return the address of the location as a
136 * Beware: In case of a numeric value, the result is returned in static
137 * storage which is overwritten with each call.
140 static StrBuf Buf = STATIC_STRBUF_INITIALIZER;
146 /* Fetch entry at I and check if A/X is known */
147 L[0] = CS_GetEntry (S, I);
149 RegValIsKnown (L[0]->RI->In.RegA) &&
150 RegValIsKnown (L[0]->RI->In.RegX)) {
152 /* Numeric argument - get low and high byte */
153 unsigned Lo = (L[0]->RI->In.RegA & 0xFF);
154 unsigned Hi = (L[0]->RI->In.RegX & 0xFF);
156 /* Format into buffer */
157 SB_Printf (&Buf, "$%04X", Lo | (Hi << 8));
159 /* Return the address as a string */
160 return SB_GetConstBuf (&Buf);
164 /* Search back for the two instructions loading A and X. Abort
165 * the search if the registers are changed in any other way or
166 * if a label is reached while we don't have both loads.
172 CodeEntry* E = CS_GetEntry (S, I);
174 /* Check for the loads of A and X */
175 if (ALoad == 0 && E->OPC == OP65_LDA && E->AM == AM65_IMM) {
177 } else if (E->Chg & REG_A) {
178 /* A is changed before we get the load */
180 } else if (XLoad == 0 && E->OPC == OP65_LDX && E->AM == AM65_IMM) {
182 } else if (E->Chg & REG_X) {
183 /* X is changed before we get the load */
187 if (ALoad != 0 && XLoad != 0) {
192 /* If we have a label, before both are found, bail out */
193 if (CE_HasLabel (E)) {
198 /* Check for a load of a label address */
199 if ((Len = strlen (ALoad->Arg)) > 3 &&
200 ALoad->Arg[0] == '<' &&
201 ALoad->Arg[1] == '(' &&
202 strlen (XLoad->Arg) == Len &&
203 XLoad->Arg[0] == '>' &&
204 memcmp (ALoad->Arg+1, XLoad->Arg+1, Len-1) == 0) {
206 /* Load of an address label */
207 SB_CopyBuf (&Buf, ALoad->Arg + 2, Len - 3);
209 return SB_GetConstBuf (&Buf);
218 /*****************************************************************************/
220 /*****************************************************************************/
224 unsigned OptPtrStore1 (CodeSeg* S)
225 /* Search for the sequence:
267 * depending on the code preceeding the sequence above.
270 unsigned Changes = 0;
273 /* Walk over the entries */
275 while (I < CS_GetEntryCount (S)) {
280 L[0] = CS_GetEntry (S, I);
282 /* Check for the sequence */
283 if (L[0]->OPC == OP65_CLC &&
284 CS_GetEntries (S, L+1, I+1, 8) &&
285 L[1]->OPC == OP65_ADC &&
286 (L[1]->AM == AM65_ABS ||
287 L[1]->AM == AM65_ZP ||
288 L[1]->AM == AM65_IMM ||
289 (L[1]->AM == AM65_ZP_INDY &&
290 RegValIsKnown (L[1]->RI->In.RegY))) &&
291 (L[2]->OPC == OP65_BCC || L[2]->OPC == OP65_JCC) &&
293 L[2]->JumpTo->Owner == L[4] &&
294 L[3]->OPC == OP65_INX &&
295 CE_IsCallTo (L[4], "pushax") &&
296 L[5]->OPC == OP65_LDX &&
297 L[6]->OPC == OP65_LDA &&
298 L[7]->OPC == OP65_LDY &&
299 CE_IsKnownImm (L[7], 0) &&
300 CE_IsCallTo (L[8], "staspidx") &&
301 !CS_RangeHasLabel (S, I+1, 3) &&
302 !CS_RangeHasLabel (S, I+5, 4)) {
308 /* Track the insertion point */
310 if ((Loc = LoadAXZP (S, I)) != 0) {
311 /* If the sequence is preceeded by a load of a ZP value,
312 * we can use this ZP value as a pointer using ZP
313 * indirect Y addressing.
316 } else if ((Loc = LoadAXImm (S, I)) != 0) {
317 /* If the sequence is preceeded by a load of an immediate
318 * value, we can use this absolute value as an address
319 * using absolute indexed Y addressing.
324 /* If we don't have a store location, we use ptr1 with zp
325 * indirect Y addressing. We must store the value in A/X into
334 X = NewCodeEntry (OP65_STA, AM65_ZP, "ptr1", 0, L[8]->LI);
335 CS_InsertEntry (S, X, IP++);
337 X = NewCodeEntry (OP65_STX, AM65_ZP, "ptr1+1", 0, L[8]->LI);
338 CS_InsertEntry (S, X, IP++);
342 /* If the index is loaded from (zp),y, we cannot do that directly.
343 * Note: In this case, the Y register will contain the correct
344 * value after removing the old code, so we don't need to load
347 if (L[1]->AM == AM65_ZP_INDY) {
348 X = NewCodeEntry (OP65_LDA, L[1]->AM, L[1]->Arg, 0, L[1]->LI);
349 CS_InsertEntry (S, X, IP++);
351 X = NewCodeEntry (OP65_TAY, AM65_IMP, 0, 0, L[1]->LI);
352 CS_InsertEntry (S, X, IP++);
354 X = NewCodeEntry (OP65_LDY, L[1]->AM, L[1]->Arg, 0, L[1]->LI);
355 CS_InsertEntry (S, X, IP++);
358 X = NewCodeEntry (OP65_LDX, L[5]->AM, L[5]->Arg, 0, L[5]->LI);
359 CS_InsertEntry (S, X, IP++);
361 X = NewCodeEntry (OP65_LDA, L[6]->AM, L[6]->Arg, 0, L[6]->LI);
362 CS_InsertEntry (S, X, IP++);
364 X = NewCodeEntry (OP65_STA, AM, Loc, 0, L[8]->LI);
365 CS_InsertEntry (S, X, IP++);
367 /* Remove the old code */
368 CS_DelEntries (S, I, 9);
370 /* Skip most of the generated replacement code */
373 /* Remember, we had changes */
383 /* Return the number of changes made */
389 unsigned OptPtrStore2 (CodeSeg* S)
390 /* Search for the sequence:
437 * depending on the code preceeding the sequence above.
440 unsigned Changes = 0;
443 /* Walk over the entries */
445 while (I < CS_GetEntryCount (S)) {
450 L[0] = CS_GetEntry (S, I);
452 /* Check for the sequence */
453 if (L[0]->OPC == OP65_CLC &&
454 CS_GetEntries (S, L+1, I+1, 9) &&
455 L[1]->OPC == OP65_ADC &&
456 (L[1]->AM == AM65_ABS ||
457 L[1]->AM == AM65_ZP ||
458 L[1]->AM == AM65_IMM ||
459 (L[1]->AM == AM65_ZP_INDY &&
460 RegValIsKnown (L[1]->RI->In.RegY))) &&
461 (L[2]->OPC == OP65_BCC || L[2]->OPC == OP65_JCC) &&
463 L[2]->JumpTo->Owner == L[4] &&
464 L[3]->OPC == OP65_INX &&
465 CE_IsCallTo (L[4], "pushax") &&
466 L[5]->OPC == OP65_LDY &&
467 CE_IsConstImm (L[5]) &&
468 L[6]->OPC == OP65_LDX &&
469 L[7]->OPC == OP65_LDA &&
470 L[7]->AM == AM65_ZP_INDY &&
471 strcmp (L[7]->Arg, "sp") == 0 &&
472 L[8]->OPC == OP65_LDY &&
473 (L[8]->AM == AM65_ABS ||
474 L[8]->AM == AM65_ZP ||
475 L[8]->AM == AM65_IMM) &&
476 CE_IsCallTo (L[9], "staspidx") &&
477 !CS_RangeHasLabel (S, I+1, 3) &&
478 !CS_RangeHasLabel (S, I+5, 5)) {
485 /* Track the insertion point */
486 unsigned IP = I + 10;
487 if ((Loc = LoadAXZP (S, I)) != 0) {
488 /* If the sequence is preceeded by a load of a ZP value,
489 * we can use this ZP value as a pointer using ZP
490 * indirect Y addressing.
493 } else if ((Loc = LoadAXImm (S, I)) != 0) {
494 /* If the sequence is preceeded by a load of an immediate
495 * value, we can use this absolute value as an address
496 * using absolute indexed Y addressing.
501 /* If we don't have a store location, we use ptr1 with zp
502 * indirect Y addressing. We must store the value in A/X into
511 X = NewCodeEntry (OP65_STA, AM65_ZP, "ptr1", 0, L[8]->LI);
512 CS_InsertEntry (S, X, IP++);
514 X = NewCodeEntry (OP65_STX, AM65_ZP, "ptr1+1", 0, L[8]->LI);
515 CS_InsertEntry (S, X, IP++);
519 /* Generate four different replacements depending on the addressing
520 * mode of the store and from where the index is loaded:
522 * 1. If the index is not loaded ZP indirect Y, we can use Y for
525 * 2. If the index is loaded ZP indirect Y and we store absolute
526 * indexed, we need Y to load the index and will therefore
527 * use X as index for the store. The disadvantage is that we
528 * need to reload X later.
530 * 3. If the index is loaded ZP indirect Y and we store ZP indirect
531 * Y, we must use Y for load and store and must therefore save
532 * the A register when loading Y the second time.
534 if (L[1]->AM != AM65_ZP_INDY) {
537 Arg = MakeHexArg (L[5]->Num - 2);
538 X = NewCodeEntry (OP65_LDY, AM65_IMM, Arg, 0, L[5]->LI);
539 CS_InsertEntry (S, X, IP++);
541 X = NewCodeEntry (OP65_LDX, L[6]->AM, L[6]->Arg, 0, L[6]->LI);
542 CS_InsertEntry (S, X, IP++);
544 X = NewCodeEntry (OP65_LDA, L[7]->AM, L[7]->Arg, 0, L[7]->LI);
545 CS_InsertEntry (S, X, IP++);
547 X = NewCodeEntry (OP65_LDY, L[1]->AM, L[1]->Arg, 0, L[1]->LI);
548 CS_InsertEntry (S, X, IP++);
550 X = NewCodeEntry (OP65_STA, AM, Loc, 0, L[9]->LI);
551 CS_InsertEntry (S, X, IP++);
553 } else if (AM == AM65_ABSY) {
556 X = NewCodeEntry (OP65_LDA, L[1]->AM, L[1]->Arg, 0, L[1]->LI);
557 CS_InsertEntry (S, X, IP++);
559 X = NewCodeEntry (OP65_TAX, AM65_IMP, 0, 0, L[1]->LI);
560 CS_InsertEntry (S, X, IP++);
562 Arg = MakeHexArg (L[5]->Num - 2);
563 X = NewCodeEntry (OP65_LDY, AM65_IMM, Arg, 0, L[5]->LI);
564 CS_InsertEntry (S, X, IP++);
566 X = NewCodeEntry (OP65_LDA, L[7]->AM, L[7]->Arg, 0, L[7]->LI);
567 CS_InsertEntry (S, X, IP++);
569 X = NewCodeEntry (OP65_STA, AM65_ABSX, Loc, 0, L[9]->LI);
570 CS_InsertEntry (S, X, IP++);
572 X = NewCodeEntry (OP65_LDX, L[6]->AM, L[6]->Arg, 0, L[6]->LI);
573 CS_InsertEntry (S, X, IP++);
578 Arg = MakeHexArg (L[5]->Num - 2);
579 X = NewCodeEntry (OP65_LDY, AM65_IMM, Arg, 0, L[5]->LI);
580 CS_InsertEntry (S, X, IP++);
582 X = NewCodeEntry (OP65_LDX, L[6]->AM, L[6]->Arg, 0, L[6]->LI);
583 CS_InsertEntry (S, X, IP++);
585 X = NewCodeEntry (OP65_LDA, L[7]->AM, L[7]->Arg, 0, L[7]->LI);
586 CS_InsertEntry (S, X, IP++);
588 X = NewCodeEntry (OP65_PHA, AM65_IMP, 0, 0, L[6]->LI);
589 CS_InsertEntry (S, X, IP++);
591 Arg = MakeHexArg (L[1]->RI->In.RegY);
592 X = NewCodeEntry (OP65_LDY, AM65_IMM, Arg, 0, L[1]->LI);
593 CS_InsertEntry (S, X, IP++);
595 X = NewCodeEntry (OP65_LDA, L[1]->AM, L[1]->Arg, 0, L[1]->LI);
596 CS_InsertEntry (S, X, IP++);
598 X = NewCodeEntry (OP65_TAY, AM65_IMP, 0, 0, L[1]->LI);
599 CS_InsertEntry (S, X, IP++);
601 X = NewCodeEntry (OP65_PLA, AM65_IMP, 0, 0, L[6]->LI);
602 CS_InsertEntry (S, X, IP++);
604 X = NewCodeEntry (OP65_STA, AM, Loc, 0, L[9]->LI);
605 CS_InsertEntry (S, X, IP++);
609 /* Remove the old code */
610 CS_DelEntries (S, I, 10);
612 /* Skip most of the generated replacement code */
615 /* Remember, we had changes */
625 /* Return the number of changes made */
631 unsigned OptPtrStore3 (CodeSeg* S)
632 /* Search for the sequence:
652 * In case a/x is loaded from the register bank before the pushax, we can even
653 * use the register bank instead of ptr1.
657 unsigned Changes = 0;
659 /* Walk over the entries */
661 while (I < CS_GetEntryCount (S)) {
667 L[0] = CS_GetEntry (S, I);
669 /* Check for the sequence */
670 if (CE_IsCallTo (L[0], "pushax") &&
671 CS_GetEntries (S, L+1, I+1, 3) &&
672 L[1]->OPC == OP65_LDY &&
673 CE_IsConstImm (L[1]) &&
674 !CE_HasLabel (L[1]) &&
675 CE_IsCallTo (L[2], "ldauidx") &&
676 !CE_HasLabel (L[2]) &&
677 (K = OptPtrStore1Sub (S, I+3, L+3)) > 0 &&
678 CS_GetEntries (S, L+3+K, I+3+K, 2) &&
679 L[3+K]->OPC == OP65_LDY &&
680 CE_IsConstImm (L[3+K]) &&
681 !CE_HasLabel (L[3+K]) &&
682 CE_IsCallTo (L[4+K], "staspidx") &&
683 !CE_HasLabel (L[4+K])) {
686 const char* RegBank = 0;
687 const char* ZPLoc = "ptr1";
691 /* Get the preceeding two instructions and check them. We check
698 P[0] = CS_GetEntry (S, I-2);
699 P[1] = CS_GetEntry (S, I-1);
700 if (P[0]->OPC == OP65_LDA &&
701 P[0]->AM == AM65_ZP &&
702 P[1]->OPC == OP65_LDX &&
703 P[1]->AM == AM65_ZP &&
704 !CE_HasLabel (P[1]) &&
705 strncmp (P[0]->Arg, "regbank+", 8) == 0) {
707 unsigned Len = strlen (P[0]->Arg);
709 if (strncmp (P[0]->Arg, P[1]->Arg, Len) == 0 &&
710 P[1]->Arg[Len+0] == '+' &&
711 P[1]->Arg[Len+1] == '1' &&
712 P[1]->Arg[Len+2] == '\0') {
714 /* Ok, found. Use the name of the register bank */
715 RegBank = ZPLoc = P[0]->Arg;
720 /* Insert the load via the zp pointer */
721 X = NewCodeEntry (OP65_LDX, AM65_IMM, "$00", 0, L[3]->LI);
722 CS_InsertEntry (S, X, I+3);
723 X = NewCodeEntry (OP65_LDA, AM65_ZP_INDY, ZPLoc, 0, L[2]->LI);
724 CS_InsertEntry (S, X, I+4);
726 /* Insert the store through the zp pointer */
727 X = NewCodeEntry (OP65_STA, AM65_ZP_INDY, ZPLoc, 0, L[3]->LI);
728 CS_InsertEntry (S, X, I+6+K);
730 /* Delete the old code */
731 CS_DelEntry (S, I+7+K); /* jsr spaspidx */
732 CS_DelEntry (S, I+2); /* jsr ldauidx */
734 /* Create and insert the stores into the zp pointer if needed */
736 X = NewCodeEntry (OP65_STA, AM65_ZP, "ptr1", 0, L[0]->LI);
737 CS_InsertEntry (S, X, I+1);
738 X = NewCodeEntry (OP65_STX, AM65_ZP, "ptr1+1", 0, L[0]->LI);
739 CS_InsertEntry (S, X, I+2);
742 /* Delete more old code. Do it here to keep a label attached to
745 CS_DelEntry (S, I); /* jsr pushax */
747 /* Remember, we had changes */
757 /* Return the number of changes made */