1 /*****************************************************************************/
5 /* Optimize stores through pointers */
9 /* (C) 2012, Ullrich von Bassewitz */
10 /* Roemerstrasse 52 */
11 /* D-70794 Filderstadt */
12 /* EMail: uz@cc65.org */
15 /* This software is provided 'as-is', without any expressed or implied */
16 /* warranty. In no event will the authors be held liable for any damages */
17 /* arising from the use of this software. */
19 /* Permission is granted to anyone to use this software for any purpose, */
20 /* including commercial applications, and to alter it and redistribute it */
21 /* freely, subject to the following restrictions: */
23 /* 1. The origin of this software must not be misrepresented; you must not */
24 /* claim that you wrote the original software. If you use this software */
25 /* in a product, an acknowledgment in the product documentation would be */
26 /* appreciated but is not required. */
27 /* 2. Altered source versions must be plainly marked as such, and must not */
28 /* be misrepresented as being the original software. */
29 /* 3. This notice may not be removed or altered from any source */
32 /*****************************************************************************/
44 #include "coptptrstore.h"
48 /*****************************************************************************/
49 /* Helper functions */
50 /*****************************************************************************/
54 static unsigned OptPtrStore1Sub (CodeSeg* S, unsigned I, CodeEntry** const L)
55 /* Check if this is one of the allowed suboperation for OptPtrStore1 */
57 /* Check for a label attached to the entry */
58 if (CE_HasLabel (L[0])) {
62 /* Check for single insn sub ops */
63 if (L[0]->OPC == OP65_AND ||
64 L[0]->OPC == OP65_EOR ||
65 L[0]->OPC == OP65_ORA ||
66 (L[0]->OPC == OP65_JSR && strncmp (L[0]->Arg, "shlax", 5) == 0) ||
67 (L[0]->OPC == OP65_JSR && strncmp (L[0]->Arg, "shrax", 5) == 0)) {
72 } else if (L[0]->OPC == OP65_CLC &&
73 (L[1] = CS_GetNextEntry (S, I)) != 0 &&
74 L[1]->OPC == OP65_ADC &&
75 !CE_HasLabel (L[1])) {
77 } else if (L[0]->OPC == OP65_SEC &&
78 (L[1] = CS_GetNextEntry (S, I)) != 0 &&
79 L[1]->OPC == OP65_SBC &&
80 !CE_HasLabel (L[1])) {
92 static const char* ZPLoadAX (CodeSeg* S, unsigned I)
93 /* If the two instructions at S/I are a load of A/X from a two byte zero byte
94 * location, return the name of the zero page location. Otherwise return NULL.
100 if (CS_GetEntries (S, L, I, 2) &&
101 L[0]->OPC == OP65_LDA &&
102 L[0]->AM == AM65_ZP &&
103 L[1]->OPC == OP65_LDX &&
104 L[1]->AM == AM65_ZP &&
105 !CE_HasLabel (L[1]) &&
106 (Len = strlen (L[0]->Arg)) == strlen (L[1]->Arg) - 2 &&
107 memcmp (L[0]->Arg, L[1]->Arg, Len) == 0 &&
108 L[1]->Arg[Len] == '+' &&
109 L[1]->Arg[Len+1] == '1') {
111 /* Return the label */
124 /*****************************************************************************/
126 /*****************************************************************************/
130 unsigned OptPtrStore1 (CodeSeg* S)
131 /* Search for the sequence:
153 unsigned Changes = 0;
155 /* Walk over the entries */
157 while (I < CS_GetEntryCount (S)) {
163 L[0] = CS_GetEntry (S, I);
165 /* Check for the sequence */
166 if (L[0]->OPC == OP65_LDA &&
167 L[0]->AM == AM65_IMM &&
168 CS_GetEntries (S, L+1, I+1, 10) &&
169 L[1]->OPC == OP65_LDX &&
170 L[1]->AM == AM65_IMM &&
171 L[2]->OPC == OP65_CLC &&
172 L[3]->OPC == OP65_ADC &&
173 (L[3]->AM == AM65_ABS || L[3]->AM == AM65_ZP) &&
174 (L[4]->OPC == OP65_BCC || L[4]->OPC == OP65_JCC) &&
176 L[4]->JumpTo->Owner == L[6] &&
177 L[5]->OPC == OP65_INX &&
178 CE_IsCallTo (L[6], "pushax") &&
179 L[7]->OPC == OP65_LDX &&
180 L[8]->OPC == OP65_LDA &&
181 L[9]->OPC == OP65_LDY &&
182 CE_IsKnownImm (L[9], 0) &&
183 CE_IsCallTo (L[10], "staspidx") &&
184 !CS_RangeHasLabel (S, I+1, 5) &&
185 !CS_RangeHasLabel (S, I+7, 4) &&
186 /* Check the label last because this is quite costly */
187 (Len = strlen (L[0]->Arg)) > 3 &&
188 L[0]->Arg[0] == '<' &&
189 L[0]->Arg[1] == '(' &&
190 strlen (L[1]->Arg) == Len &&
191 L[1]->Arg[0] == '>' &&
192 memcmp (L[0]->Arg+1, L[1]->Arg+1, Len-1) == 0) {
197 /* We will create all the new stuff behind the current one so
198 * we keep the line references.
200 X = NewCodeEntry (OP65_LDY, L[3]->AM, L[3]->Arg, 0, L[0]->LI);
201 CS_InsertEntry (S, X, I+11);
203 X = NewCodeEntry (OP65_LDX, L[7]->AM, L[7]->Arg, 0, L[7]->LI);
204 CS_InsertEntry (S, X, I+12);
206 X = NewCodeEntry (OP65_LDA, L[8]->AM, L[8]->Arg, 0, L[8]->LI);
207 CS_InsertEntry (S, X, I+13);
209 Label = memcpy (xmalloc (Len-2), L[0]->Arg+2, Len-3);
211 X = NewCodeEntry (OP65_STA, AM65_ABSY, Label, 0, L[10]->LI);
212 CS_InsertEntry (S, X, I+14);
215 /* Remove the old code */
216 CS_DelEntries (S, I, 11);
218 /* Remember, we had changes */
228 /* Return the number of changes made */
234 unsigned OptPtrStore2 (CodeSeg* S)
235 /* Search for the sequence:
260 unsigned Changes = 0;
262 /* Walk over the entries */
264 while (I < CS_GetEntryCount (S)) {
270 L[0] = CS_GetEntry (S, I);
272 /* Check for the sequence */
273 if (L[0]->OPC == OP65_LDA &&
274 L[0]->AM == AM65_IMM &&
275 CS_GetEntries (S, L+1, I+1, 11) &&
276 L[1]->OPC == OP65_LDX &&
277 L[1]->AM == AM65_IMM &&
278 L[2]->OPC == OP65_LDY &&
279 L[3]->OPC == OP65_CLC &&
280 L[4]->OPC == OP65_ADC &&
281 L[4]->AM == AM65_ZP_INDY &&
282 (L[5]->OPC == OP65_BCC || L[5]->OPC == OP65_JCC) &&
284 L[5]->JumpTo->Owner == L[7] &&
285 L[6]->OPC == OP65_INX &&
286 CE_IsCallTo (L[7], "pushax") &&
287 L[8]->OPC == OP65_LDX &&
288 L[9]->OPC == OP65_LDA &&
289 L[10]->OPC == OP65_LDY &&
290 CE_IsKnownImm (L[10], 0) &&
291 CE_IsCallTo (L[11], "staspidx") &&
292 !CS_RangeHasLabel (S, I+1, 6) &&
293 !CS_RangeHasLabel (S, I+8, 4) &&
294 /* Check the label last because this is quite costly */
295 (Len = strlen (L[0]->Arg)) > 3 &&
296 L[0]->Arg[0] == '<' &&
297 L[0]->Arg[1] == '(' &&
298 strlen (L[1]->Arg) == Len &&
299 L[1]->Arg[0] == '>' &&
300 memcmp (L[0]->Arg+1, L[1]->Arg+1, Len-1) == 0) {
305 /* We will create all the new stuff behind the current one so
306 * we keep the line references.
308 X = NewCodeEntry (OP65_LDY, L[2]->AM, L[2]->Arg, 0, L[2]->LI);
309 CS_InsertEntry (S, X, I+12);
311 X = NewCodeEntry (OP65_LDX, L[8]->AM, L[8]->Arg, 0, L[8]->LI);
312 CS_InsertEntry (S, X, I+13);
314 X = NewCodeEntry (OP65_LDA, L[4]->AM, L[4]->Arg, 0, L[4]->LI);
315 CS_InsertEntry (S, X, I+14);
317 X = NewCodeEntry (OP65_TAY, AM65_IMP, 0, 0, L[4]->LI);
318 CS_InsertEntry (S, X, I+15);
320 X = NewCodeEntry (OP65_LDA, L[9]->AM, L[9]->Arg, 0, L[9]->LI);
321 CS_InsertEntry (S, X, I+16);
323 Label = memcpy (xmalloc (Len-2), L[0]->Arg+2, Len-3);
325 X = NewCodeEntry (OP65_STA, AM65_ABSY, Label, 0, L[11]->LI);
326 CS_InsertEntry (S, X, I+17);
329 /* Remove the old code */
330 CS_DelEntries (S, I, 12);
332 /* Remember, we had changes */
342 /* Return the number of changes made */
348 unsigned OptPtrStore3 (CodeSeg* S)
349 /* Search for the sequence:
376 unsigned Changes = 0;
378 /* Walk over the entries */
380 while (I < CS_GetEntryCount (S)) {
386 L[0] = CS_GetEntry (S, I);
388 /* Check for the sequence */
389 if (L[0]->OPC == OP65_LDA &&
390 L[0]->AM == AM65_IMM &&
391 CS_GetEntries (S, L+1, I+1, 12) &&
392 L[1]->OPC == OP65_LDX &&
393 L[1]->AM == AM65_IMM &&
394 L[2]->OPC == OP65_LDY &&
395 L[3]->OPC == OP65_CLC &&
396 L[4]->OPC == OP65_ADC &&
397 L[4]->AM == AM65_ZP_INDY &&
398 (L[5]->OPC == OP65_BCC || L[5]->OPC == OP65_JCC) &&
400 L[5]->JumpTo->Owner == L[7] &&
401 L[6]->OPC == OP65_INX &&
402 CE_IsCallTo (L[7], "pushax") &&
403 L[8]->OPC == OP65_LDY &&
404 CE_IsConstImm (L[8]) &&
406 L[9]->OPC == OP65_LDX &&
407 L[10]->OPC == OP65_LDA &&
408 L[10]->AM == AM65_ZP_INDY &&
409 L[11]->OPC == OP65_LDY &&
410 CE_IsKnownImm (L[11], 0) &&
411 CE_IsCallTo (L[12], "staspidx") &&
412 !CS_RangeHasLabel (S, I+1, 6) &&
413 !CS_RangeHasLabel (S, I+8, 5) &&
414 /* Check the label last because this is quite costly */
415 (Len = strlen (L[0]->Arg)) > 3 &&
416 L[0]->Arg[0] == '<' &&
417 L[0]->Arg[1] == '(' &&
418 strlen (L[1]->Arg) == Len &&
419 L[1]->Arg[0] == '>' &&
420 memcmp (L[0]->Arg+1, L[1]->Arg+1, Len-1) == 0) {
426 /* We will create all the new stuff behind the current one so
427 * we keep the line references.
429 X = NewCodeEntry (OP65_LDY, L[2]->AM, L[2]->Arg, 0, L[2]->LI);
430 CS_InsertEntry (S, X, I+13);
432 X = NewCodeEntry (OP65_LDA, L[4]->AM, L[4]->Arg, 0, L[4]->LI);
433 CS_InsertEntry (S, X, I+14);
435 X = NewCodeEntry (OP65_TAX, AM65_IMP, 0, 0, L[4]->LI);
436 CS_InsertEntry (S, X, I+15);
438 Arg = MakeHexArg (L[8]->Num - 2);
439 X = NewCodeEntry (OP65_LDY, L[8]->AM, Arg, 0, L[8]->LI);
440 CS_InsertEntry (S, X, I+16);
442 X = NewCodeEntry (OP65_LDA, L[10]->AM, L[10]->Arg, 0, L[10]->LI);
443 CS_InsertEntry (S, X, I+17);
445 Label = memcpy (xmalloc (Len-2), L[0]->Arg+2, Len-3);
447 X = NewCodeEntry (OP65_STA, AM65_ABSX, Label, 0, L[12]->LI);
448 CS_InsertEntry (S, X, I+18);
451 X = NewCodeEntry (OP65_LDX, L[9]->AM, L[9]->Arg, 0, L[9]->LI);
452 CS_InsertEntry (S, X, I+19);
454 /* Remove the old code */
455 CS_DelEntries (S, I, 13);
457 /* Remember, we had changes */
467 /* Return the number of changes made */
473 unsigned OptPtrStore4 (CodeSeg* S)
474 /* Search for the sequence:
495 * In case a/x is loaded from the register bank before the clc, we can even
496 * use the register bank instead of ptr1.
499 unsigned Changes = 0;
501 /* Walk over the entries */
503 while (I < CS_GetEntryCount (S)) {
508 L[0] = CS_GetEntry (S, I);
510 /* Check for the sequence */
511 if (L[0]->OPC == OP65_CLC &&
512 CS_GetEntries (S, L+1, I+1, 8) &&
513 L[1]->OPC == OP65_ADC &&
514 (L[1]->AM == AM65_ABS ||
515 L[1]->AM == AM65_ZP ||
516 L[1]->AM == AM65_IMM) &&
517 (L[2]->OPC == OP65_BCC || L[2]->OPC == OP65_JCC) &&
519 L[2]->JumpTo->Owner == L[4] &&
520 L[3]->OPC == OP65_INX &&
521 CE_IsCallTo (L[4], "pushax") &&
522 L[5]->OPC == OP65_LDX &&
523 L[6]->OPC == OP65_LDA &&
524 L[7]->OPC == OP65_LDY &&
525 CE_IsKnownImm (L[7], 0) &&
526 CE_IsCallTo (L[8], "staspidx") &&
527 !CS_RangeHasLabel (S, I+1, 3) &&
528 !CS_RangeHasLabel (S, I+5, 4)) {
533 /* Track the insertion point */
536 /* If the sequence is preceeded by a load of a ZP value, we can
537 * use this ZP value as a pointer.
539 if (I < 2 || (ZPLoc = ZPLoadAX (S, I-2)) == 0) {
544 X = NewCodeEntry (OP65_STA, AM65_ZP, "ptr1", 0, L[8]->LI);
545 CS_InsertEntry (S, X, IP++);
547 X = NewCodeEntry (OP65_STX, AM65_ZP, "ptr1+1", 0, L[8]->LI);
548 CS_InsertEntry (S, X, IP++);
552 X = NewCodeEntry (OP65_LDY, L[1]->AM, L[1]->Arg, 0, L[1]->LI);
553 CS_InsertEntry (S, X, IP++);
555 X = NewCodeEntry (OP65_LDX, L[5]->AM, L[5]->Arg, 0, L[5]->LI);
556 CS_InsertEntry (S, X, IP++);
558 X = NewCodeEntry (OP65_LDA, L[6]->AM, L[6]->Arg, 0, L[6]->LI);
559 CS_InsertEntry (S, X, IP++);
561 X = NewCodeEntry (OP65_STA, AM65_ZP_INDY, ZPLoc, 0, L[8]->LI);
562 CS_InsertEntry (S, X, IP++);
564 /* Remove the old code */
565 CS_DelEntries (S, I, 9);
567 /* Remember, we had changes */
577 /* Return the number of changes made */
583 unsigned OptPtrStore5 (CodeSeg* S)
584 /* Search for the sequence:
607 * In case a/x is loaded from the register bank before the clc, we can even
608 * use the register bank instead of ptr1.
611 unsigned Changes = 0;
613 /* Walk over the entries */
615 while (I < CS_GetEntryCount (S)) {
620 L[0] = CS_GetEntry (S, I);
622 /* Check for the sequence */
623 if (L[0]->OPC == OP65_CLC &&
624 CS_GetEntries (S, L+1, I+1, 9) &&
625 L[1]->OPC == OP65_ADC &&
626 (L[1]->AM == AM65_ABS ||
627 L[1]->AM == AM65_ZP ||
628 L[1]->AM == AM65_IMM) &&
629 (L[2]->OPC == OP65_BCC || L[2]->OPC == OP65_JCC) &&
631 L[2]->JumpTo->Owner == L[4] &&
632 L[3]->OPC == OP65_INX &&
633 CE_IsCallTo (L[4], "pushax") &&
634 L[5]->OPC == OP65_LDY &&
635 CE_IsConstImm (L[5]) &&
636 L[6]->OPC == OP65_LDX &&
637 L[7]->OPC == OP65_LDA &&
638 L[7]->AM == AM65_ZP_INDY &&
639 strcmp (L[7]->Arg, "sp") == 0 &&
640 L[8]->OPC == OP65_LDY &&
641 (L[8]->AM == AM65_ABS ||
642 L[8]->AM == AM65_ZP ||
643 L[8]->AM == AM65_IMM) &&
644 CE_IsCallTo (L[9], "staspidx") &&
645 !CS_RangeHasLabel (S, I+1, 3) &&
646 !CS_RangeHasLabel (S, I+5, 5)) {
652 /* Track the insertion point */
653 unsigned IP = I + 10;
655 /* If the sequence is preceeded by a load of a ZP value, we can
656 * use this ZP value as a pointer.
658 if (I < 2 || (ZPLoc = ZPLoadAX (S, I-2)) == 0) {
663 X = NewCodeEntry (OP65_STA, AM65_ZP, "ptr1", 0, L[9]->LI);
664 CS_InsertEntry (S, X, IP++);
666 X = NewCodeEntry (OP65_STX, AM65_ZP, "ptr1+1", 0, L[9]->LI);
667 CS_InsertEntry (S, X, IP++);
671 Arg = MakeHexArg (L[5]->Num - 2);
672 X = NewCodeEntry (OP65_LDY, AM65_IMM, Arg, 0, L[5]->LI);
673 CS_InsertEntry (S, X, IP++);
675 X = NewCodeEntry (OP65_LDX, L[6]->AM, L[6]->Arg, 0, L[6]->LI);
676 CS_InsertEntry (S, X, IP++);
678 X = NewCodeEntry (OP65_LDA, L[7]->AM, L[7]->Arg, 0, L[7]->LI);
679 CS_InsertEntry (S, X, IP++);
681 X = NewCodeEntry (OP65_LDY, L[1]->AM, L[1]->Arg, 0, L[1]->LI);
682 CS_InsertEntry (S, X, IP++);
684 X = NewCodeEntry (OP65_STA, AM65_ZP_INDY, ZPLoc, 0, L[9]->LI);
685 CS_InsertEntry (S, X, IP++);
687 /* Remove the old code */
688 CS_DelEntries (S, I, 10);
690 /* Remember, we had changes */
700 /* Return the number of changes made */
706 unsigned OptPtrStore6 (CodeSeg* S)
707 /* Search for the sequence:
727 * In case a/x is loaded from the register bank before the pushax, we can even
728 * use the register bank instead of ptr1.
732 unsigned Changes = 0;
734 /* Walk over the entries */
736 while (I < CS_GetEntryCount (S)) {
742 L[0] = CS_GetEntry (S, I);
744 /* Check for the sequence */
745 if (CE_IsCallTo (L[0], "pushax") &&
746 CS_GetEntries (S, L+1, I+1, 3) &&
747 L[1]->OPC == OP65_LDY &&
748 CE_IsConstImm (L[1]) &&
749 !CE_HasLabel (L[1]) &&
750 CE_IsCallTo (L[2], "ldauidx") &&
751 !CE_HasLabel (L[2]) &&
752 (K = OptPtrStore1Sub (S, I+3, L+3)) > 0 &&
753 CS_GetEntries (S, L+3+K, I+3+K, 2) &&
754 L[3+K]->OPC == OP65_LDY &&
755 CE_IsConstImm (L[3+K]) &&
756 !CE_HasLabel (L[3+K]) &&
757 CE_IsCallTo (L[4+K], "staspidx") &&
758 !CE_HasLabel (L[4+K])) {
761 const char* RegBank = 0;
762 const char* ZPLoc = "ptr1";
766 /* Get the preceeding two instructions and check them. We check
773 P[0] = CS_GetEntry (S, I-2);
774 P[1] = CS_GetEntry (S, I-1);
775 if (P[0]->OPC == OP65_LDA &&
776 P[0]->AM == AM65_ZP &&
777 P[1]->OPC == OP65_LDX &&
778 P[1]->AM == AM65_ZP &&
779 !CE_HasLabel (P[1]) &&
780 strncmp (P[0]->Arg, "regbank+", 8) == 0) {
782 unsigned Len = strlen (P[0]->Arg);
784 if (strncmp (P[0]->Arg, P[1]->Arg, Len) == 0 &&
785 P[1]->Arg[Len+0] == '+' &&
786 P[1]->Arg[Len+1] == '1' &&
787 P[1]->Arg[Len+2] == '\0') {
789 /* Ok, found. Use the name of the register bank */
790 RegBank = ZPLoc = P[0]->Arg;
795 /* Insert the load via the zp pointer */
796 X = NewCodeEntry (OP65_LDX, AM65_IMM, "$00", 0, L[3]->LI);
797 CS_InsertEntry (S, X, I+3);
798 X = NewCodeEntry (OP65_LDA, AM65_ZP_INDY, ZPLoc, 0, L[2]->LI);
799 CS_InsertEntry (S, X, I+4);
801 /* Insert the store through the zp pointer */
802 X = NewCodeEntry (OP65_STA, AM65_ZP_INDY, ZPLoc, 0, L[3]->LI);
803 CS_InsertEntry (S, X, I+6+K);
805 /* Delete the old code */
806 CS_DelEntry (S, I+7+K); /* jsr spaspidx */
807 CS_DelEntry (S, I+2); /* jsr ldauidx */
809 /* Create and insert the stores into the zp pointer if needed */
811 X = NewCodeEntry (OP65_STA, AM65_ZP, "ptr1", 0, L[0]->LI);
812 CS_InsertEntry (S, X, I+1);
813 X = NewCodeEntry (OP65_STX, AM65_ZP, "ptr1+1", 0, L[0]->LI);
814 CS_InsertEntry (S, X, I+2);
817 /* Delete more old code. Do it here to keep a label attached to
820 CS_DelEntry (S, I); /* jsr pushax */
822 /* Remember, we had changes */
832 /* Return the number of changes made */