1 /*****************************************************************************/
5 /* Optimizer subroutines */
9 /* (C) 2001-2002 Ullrich von Bassewitz */
11 /* D-70597 Stuttgart */
12 /* EMail: uz@cc65.org */
15 /* This software is provided 'as-is', without any expressed or implied */
16 /* warranty. In no event will the authors be held liable for any damages */
17 /* arising from the use of this software. */
19 /* Permission is granted to anyone to use this software for any purpose, */
20 /* including commercial applications, and to alter it and redistribute it */
21 /* freely, subject to the following restrictions: */
23 /* 1. The origin of this software must not be misrepresented; you must not */
24 /* claim that you wrote the original software. If you use this software */
25 /* in a product, an acknowledgment in the product documentation would be */
26 /* appreciated but is not required. */
27 /* 2. Altered source versions must be plainly marked as such, and must not */
28 /* be misrepresented as being the original software. */
29 /* 3. This notice may not be removed or altered from any source */
32 /*****************************************************************************/
66 /*****************************************************************************/
68 /*****************************************************************************/
83 /*****************************************************************************/
85 /*****************************************************************************/
89 static unsigned OptShift1 (CodeSeg* S)
90 /* A call to the shlaxN routine may get replaced by one or more asl insns
91 * if the value of X is not used later.
96 /* Walk over the entries */
98 while (I < CS_GetEntryCount (S)) {
101 CodeEntry* E = CS_GetEntry (S, I);
103 /* Check for the sequence */
104 if (E->OPC == OP65_JSR &&
105 (strncmp (E->Arg, "shlax", 5) == 0 ||
106 strncmp (E->Arg, "aslax", 5) == 0) &&
107 strlen (E->Arg) == 6 &&
108 IsDigit (E->Arg[5]) &&
109 !RegXUsed (S, I+1)) {
111 /* Insert shift insns */
112 unsigned Count = E->Arg[5] - '0';
114 CodeEntry* X = NewCodeEntry (OP65_ASL, AM65_ACC, "a", 0, E->LI);
115 CS_InsertEntry (S, X, I+1);
118 /* Delete the call to shlax */
121 /* Remember, we had changes */
131 /* Return the number of changes made */
137 static unsigned OptShift2 (CodeSeg* S)
138 /* A call to the shraxN routine may get replaced by one or more lsr insns
139 * if the value of X is not used later.
142 unsigned Changes = 0;
144 /* Walk over the entries */
146 while (I < CS_GetEntryCount (S)) {
149 CodeEntry* E = CS_GetEntry (S, I);
151 /* Check for the sequence */
152 if (E->OPC == OP65_JSR &&
153 strncmp (E->Arg, "shrax", 5) == 0 &&
154 strlen (E->Arg) == 6 &&
155 IsDigit (E->Arg[5]) &&
156 !RegXUsed (S, I+1)) {
158 /* Insert shift insns */
159 unsigned Count = E->Arg[5] - '0';
161 CodeEntry* X = NewCodeEntry (OP65_LSR, AM65_ACC, "a", 0, E->LI);
162 CS_InsertEntry (S, X, I+1);
165 /* Delete the call to shlax */
168 /* Remember, we had changes */
178 /* Return the number of changes made */
184 static unsigned GetShiftType (const char* Sub)
185 /* Helper function for OptShift3 */
188 if (strcmp (Sub+1, "slax1") == 0) {
190 } else if (strcmp (Sub+1, "srax1") == 0) {
193 } else if (*Sub == 's') {
194 if (strcmp (Sub+1, "hlax1") == 0) {
196 } else if (strcmp (Sub+1, "hrax1") == 0) {
205 static unsigned OptShift3 (CodeSeg* S)
206 /* Search for the sequence
210 * jsr aslax1/asrax1/shlax1/shrax1
223 * or similar, provided that a/x is not used later
226 unsigned Changes = 0;
228 /* Walk over the entries */
230 while (I < CS_GetEntryCount (S)) {
236 L[0] = CS_GetEntry (S, I);
238 /* Check for the sequence */
239 if (L[0]->OPC == OP65_LDA &&
240 (L[0]->AM == AM65_ABS || L[0]->AM == AM65_ZP) &&
241 CS_GetEntries (S, L+1, I+1, 4) &&
242 !CS_RangeHasLabel (S, I+1, 4) &&
243 L[1]->OPC == OP65_LDX &&
244 (L[1]->AM == AM65_ABS || L[1]->AM == AM65_ZP) &&
245 L[2]->OPC == OP65_JSR &&
246 (ShiftType = GetShiftType (L[2]->Arg)) != SHIFT_NONE&&
247 L[3]->OPC == OP65_STA &&
248 (L[3]->AM == AM65_ABS || L[3]->AM == AM65_ZP) &&
249 L[4]->OPC == OP65_STX &&
250 (L[4]->AM == AM65_ABS || L[4]->AM == AM65_ZP) &&
251 !RegAXUsed (S, I+5)) {
255 /* Handle the four shift types differently */
259 X = NewCodeEntry (OP65_LDA, L[1]->AM, L[1]->Arg, 0, L[1]->LI);
260 CS_InsertEntry (S, X, I+5);
261 X = NewCodeEntry (OP65_CMP, AM65_IMM, "$80", 0, L[2]->LI);
262 CS_InsertEntry (S, X, I+6);
263 X = NewCodeEntry (OP65_ROR, AM65_ACC, "a", 0, L[2]->LI);
264 CS_InsertEntry (S, X, I+7);
265 X = NewCodeEntry (OP65_STA, L[4]->AM, L[4]->Arg, 0, L[4]->LI);
266 CS_InsertEntry (S, X, I+8);
267 X = NewCodeEntry (OP65_LDA, L[0]->AM, L[0]->Arg, 0, L[0]->LI);
268 CS_InsertEntry (S, X, I+9);
269 X = NewCodeEntry (OP65_ROR, AM65_ACC, "a", 0, L[2]->LI);
270 CS_InsertEntry (S, X, I+10);
271 X = NewCodeEntry (OP65_STA, L[3]->AM, L[3]->Arg, 0, L[3]->LI);
272 CS_InsertEntry (S, X, I+11);
273 CS_DelEntries (S, I, 5);
277 X = NewCodeEntry (OP65_LDA, L[1]->AM, L[1]->Arg, 0, L[1]->LI);
278 CS_InsertEntry (S, X, I+5);
279 X = NewCodeEntry (OP65_LSR, AM65_ACC, "a", 0, L[2]->LI);
280 CS_InsertEntry (S, X, I+6);
281 X = NewCodeEntry (OP65_STA, L[4]->AM, L[4]->Arg, 0, L[4]->LI);
282 CS_InsertEntry (S, X, I+7);
283 X = NewCodeEntry (OP65_LDA, L[0]->AM, L[0]->Arg, 0, L[0]->LI);
284 CS_InsertEntry (S, X, I+8);
285 X = NewCodeEntry (OP65_ROR, AM65_ACC, "a", 0, L[2]->LI);
286 CS_InsertEntry (S, X, I+9);
287 X = NewCodeEntry (OP65_STA, L[3]->AM, L[3]->Arg, 0, L[3]->LI);
288 CS_InsertEntry (S, X, I+10);
289 CS_DelEntries (S, I, 5);
294 /* These two are identical */
295 X = NewCodeEntry (OP65_ASL, AM65_ACC, "a", 0, L[2]->LI);
296 CS_InsertEntry (S, X, I+1);
297 X = NewCodeEntry (OP65_STA, L[3]->AM, L[3]->Arg, 0, L[3]->LI);
298 CS_InsertEntry (S, X, I+2);
299 X = NewCodeEntry (OP65_LDA, L[1]->AM, L[1]->Arg, 0, L[1]->LI);
300 CS_InsertEntry (S, X, I+3);
301 X = NewCodeEntry (OP65_ROL, AM65_ACC, "a", 0, L[2]->LI);
302 CS_InsertEntry (S, X, I+4);
303 X = NewCodeEntry (OP65_STA, L[4]->AM, L[4]->Arg, 0, L[4]->LI);
304 CS_InsertEntry (S, X, I+5);
305 CS_DelEntries (S, I+6, 4);
310 /* Remember, we had changes */
320 /* Return the number of changes made */
326 /*****************************************************************************/
327 /* Optimize stores through pointers */
328 /*****************************************************************************/
332 static unsigned OptPtrStore1Sub (CodeSeg* S, unsigned I, CodeEntry** const L)
333 /* Check if this is one of the allowed suboperation for OptPtrStore1 */
335 /* Check for a label attached to the entry */
336 if (CE_HasLabel (L[0])) {
340 /* Check for single insn sub ops */
341 if (L[0]->OPC == OP65_AND ||
342 L[0]->OPC == OP65_EOR ||
343 L[0]->OPC == OP65_ORA ||
344 (L[0]->OPC == OP65_JSR && strncmp (L[0]->Arg, "shlax", 5) == 0) ||
345 (L[0]->OPC == OP65_JSR && strncmp (L[0]->Arg, "shrax", 5) == 0)) {
350 } else if (L[0]->OPC == OP65_CLC &&
351 (L[1] = CS_GetNextEntry (S, I)) != 0 &&
352 L[1]->OPC == OP65_ADC &&
353 !CE_HasLabel (L[1])) {
355 } else if (L[0]->OPC == OP65_SEC &&
356 (L[1] = CS_GetNextEntry (S, I)) != 0 &&
357 L[1]->OPC == OP65_SBC &&
358 !CE_HasLabel (L[1])) {
370 static unsigned OptPtrStore1 (CodeSeg* S)
371 /* Search for the sequence:
392 unsigned Changes = 0;
394 /* Walk over the entries */
396 while (I < CS_GetEntryCount (S)) {
402 L[0] = CS_GetEntry (S, I);
404 /* Check for the sequence */
405 if (CE_IsCallTo (L[0], "pushax") &&
406 CS_GetEntries (S, L+1, I+1, 3) &&
407 L[1]->OPC == OP65_LDY &&
408 CE_KnownImm (L[1]) &&
409 !CE_HasLabel (L[1]) &&
410 CE_IsCallTo (L[2], "ldauidx") &&
411 !CE_HasLabel (L[2]) &&
412 (K = OptPtrStore1Sub (S, I+3, L+3)) > 0 &&
413 CS_GetEntries (S, L+3+K, I+3+K, 2) &&
414 L[3+K]->OPC == OP65_LDY &&
415 CE_KnownImm (L[3+K]) &&
416 !CE_HasLabel (L[3+K]) &&
417 CE_IsCallTo (L[4+K], "staspidx") &&
418 !CE_HasLabel (L[4+K])) {
422 /* Create and insert the stores */
423 X = NewCodeEntry (OP65_STA, AM65_ZP, "ptr1", 0, L[0]->LI);
424 CS_InsertEntry (S, X, I+1);
426 X = NewCodeEntry (OP65_STX, AM65_ZP, "ptr1+1", 0, L[0]->LI);
427 CS_InsertEntry (S, X, I+2);
429 /* Delete the call to pushax */
432 /* Delete the call to ldauidx */
433 CS_DelEntry (S, I+3);
435 /* Insert the load from ptr1 */
436 X = NewCodeEntry (OP65_LDX, AM65_IMM, "$00", 0, L[3]->LI);
437 CS_InsertEntry (S, X, I+3);
438 X = NewCodeEntry (OP65_LDA, AM65_ZP_INDY, "ptr1", 0, L[2]->LI);
439 CS_InsertEntry (S, X, I+4);
441 /* Insert the store through ptr1 */
442 X = NewCodeEntry (OP65_STA, AM65_ZP_INDY, "ptr1", 0, L[3]->LI);
443 CS_InsertEntry (S, X, I+6+K);
445 /* Delete the call to staspidx */
446 CS_DelEntry (S, I+7+K);
448 /* Remember, we had changes */
458 /* Return the number of changes made */
464 static unsigned OptPtrStore2 (CodeSeg* S)
465 /* Search for the sequence:
481 unsigned Changes = 0;
483 /* Walk over the entries */
485 while (I < CS_GetEntryCount (S)) {
490 L[0] = CS_GetEntry (S, I);
492 /* Check for the sequence */
493 if (CE_IsCallTo (L[0], "pushax") &&
494 CS_GetEntries (S, L+1, I+1, 3) &&
495 L[1]->OPC == OP65_LDA &&
496 !CE_HasLabel (L[1]) &&
497 L[2]->OPC == OP65_LDY &&
498 !CE_HasLabel (L[2]) &&
499 CE_IsCallTo (L[3], "staspidx") &&
500 !CE_HasLabel (L[3])) {
504 /* Create and insert the stores */
505 X = NewCodeEntry (OP65_STA, AM65_ZP, "ptr1", 0, L[0]->LI);
506 CS_InsertEntry (S, X, I+1);
508 X = NewCodeEntry (OP65_STX, AM65_ZP, "ptr1+1", 0, L[0]->LI);
509 CS_InsertEntry (S, X, I+2);
511 /* Delete the call to pushax */
514 /* Insert the store through ptr1 */
515 X = NewCodeEntry (OP65_STA, AM65_ZP_INDY, "ptr1", 0, L[3]->LI);
516 CS_InsertEntry (S, X, I+4);
518 /* Delete the call to staspidx */
519 CS_DelEntry (S, I+5);
521 /* Remember, we had changes */
531 /* Return the number of changes made */
537 /*****************************************************************************/
538 /* Optimize loads through pointers */
539 /*****************************************************************************/
543 static unsigned OptPtrLoad1 (CodeSeg* S)
544 /* Search for the sequence:
548 * lda (sp),y # May be any destination
563 unsigned Changes = 0;
565 /* Walk over the entries */
567 while (I < CS_GetEntryCount (S)) {
572 L[0] = CS_GetEntry (S, I);
574 /* Check for the sequence */
575 if (L[0]->OPC == OP65_TAX &&
576 CS_GetEntries (S, L+1, I+1, 4) &&
577 L[1]->OPC == OP65_DEY &&
578 !CE_HasLabel (L[1]) &&
579 L[2]->OPC == OP65_LDA &&
580 !CE_HasLabel (L[2]) &&
581 L[3]->OPC == OP65_LDY &&
582 !CE_HasLabel (L[3]) &&
583 CE_IsCallTo (L[4], "ldauidx") &&
584 !CE_HasLabel (L[4])) {
588 /* Store the high byte and remove the TAX instead */
589 X = NewCodeEntry (OP65_STA, AM65_ZP, "ptr1+1", 0, L[0]->LI);
590 CS_InsertEntry (S, X, I+1);
593 /* Store the low byte */
594 X = NewCodeEntry (OP65_STA, AM65_ZP, "ptr1", 0, L[2]->LI);
595 CS_InsertEntry (S, X, I+3);
597 /* Delete the call to ldauidx */
598 CS_DelEntry (S, I+5);
600 /* Load high and low byte */
601 X = NewCodeEntry (OP65_LDX, AM65_IMM, "$00", 0, L[3]->LI);
602 CS_InsertEntry (S, X, I+5);
603 X = NewCodeEntry (OP65_LDA, AM65_ZP_INDY, "ptr1", 0, L[3]->LI);
604 CS_InsertEntry (S, X, I+6);
606 /* Remember, we had changes */
616 /* Return the number of changes made */
622 static unsigned OptPtrLoad2 (CodeSeg* S)
623 /* Search for the sequence:
648 unsigned Changes = 0;
650 /* Walk over the entries */
652 while (I < CS_GetEntryCount (S)) {
657 L[0] = CS_GetEntry (S, I);
659 /* Check for the sequence */
660 if (L[0]->OPC == OP65_CLC &&
661 CS_GetEntries (S, L+1, I+1, 8) &&
662 L[1]->OPC == OP65_ADC &&
663 !CE_HasLabel (L[1]) &&
664 L[2]->OPC == OP65_TAY &&
665 !CE_HasLabel (L[2]) &&
666 L[3]->OPC == OP65_TXA &&
667 !CE_HasLabel (L[3]) &&
668 L[4]->OPC == OP65_ADC &&
669 !CE_HasLabel (L[4]) &&
670 L[5]->OPC == OP65_TAX &&
671 !CE_HasLabel (L[5]) &&
672 L[6]->OPC == OP65_TYA &&
673 !CE_HasLabel (L[6]) &&
674 L[7]->OPC == OP65_LDY &&
675 !CE_HasLabel (L[7]) &&
676 CE_IsCallTo (L[8], "ldauidx") &&
677 !CE_HasLabel (L[8])) {
682 /* Store the low byte and remove the TAY instead */
683 X = NewCodeEntry (OP65_STA, AM65_ZP, "ptr1", 0, L[1]->LI);
684 CS_InsertEntry (S, X, I+2);
685 CS_DelEntry (S, I+3);
687 /* Store the high byte */
688 X = NewCodeEntry (OP65_STA, AM65_ZP, "ptr1+1", 0, L[4]->LI);
689 CS_InsertEntry (S, X, I+5);
691 /* If the instruction before the adc is a ldx, replace the
692 * txa by and lda with the same location of the ldx.
694 if ((P = CS_GetPrevEntry (S, I)) != 0 &&
695 P->OPC == OP65_LDX &&
698 X = NewCodeEntry (OP65_LDA, P->AM, P->Arg, 0, P->LI);
699 CS_InsertEntry (S, X, I+4);
700 CS_DelEntry (S, I+3);
703 /* Delete more transfer insns */
704 CS_DelEntry (S, I+7);
705 CS_DelEntry (S, I+6);
707 /* Delete the call to ldauidx */
708 CS_DelEntry (S, I+7);
710 /* Load high and low byte */
711 X = NewCodeEntry (OP65_LDX, AM65_IMM, "$00", 0, L[7]->LI);
712 CS_InsertEntry (S, X, I+7);
713 X = NewCodeEntry (OP65_LDA, AM65_ZP_INDY, "ptr1", 0, L[7]->LI);
714 CS_InsertEntry (S, X, I+8);
716 /* Remember, we had changes */
726 /* Return the number of changes made */
732 static unsigned OptPtrLoad3 (CodeSeg* S)
733 /* Search for the sequence:
758 unsigned Changes = 0;
760 /* Walk over the entries */
762 while (I < CS_GetEntryCount (S)) {
767 L[0] = CS_GetEntry (S, I);
769 /* Check for the sequence */
770 if (L[0]->OPC == OP65_ADC &&
771 CS_GetEntries (S, L+1, I+1, 8) &&
772 L[1]->OPC == OP65_PHA &&
773 !CE_HasLabel (L[1]) &&
774 L[2]->OPC == OP65_TXA &&
775 !CE_HasLabel (L[2]) &&
776 L[3]->OPC == OP65_INY &&
777 !CE_HasLabel (L[3]) &&
778 L[4]->OPC == OP65_ADC &&
779 !CE_HasLabel (L[4]) &&
780 L[5]->OPC == OP65_TAX &&
781 !CE_HasLabel (L[5]) &&
782 L[6]->OPC == OP65_PLA &&
783 !CE_HasLabel (L[6]) &&
784 L[7]->OPC == OP65_LDY &&
785 !CE_HasLabel (L[7]) &&
786 CE_IsCallTo (L[8], "ldauidx") &&
787 !CE_HasLabel (L[8])) {
791 /* Store the low byte and remove the PHA instead */
792 X = NewCodeEntry (OP65_STA, AM65_ZP, "ptr1", 0, L[0]->LI);
793 CS_InsertEntry (S, X, I+1);
794 CS_DelEntry (S, I+2);
796 /* Store the high byte */
797 X = NewCodeEntry (OP65_STA, AM65_ZP, "ptr1+1", 0, L[4]->LI);
798 CS_InsertEntry (S, X, I+5);
800 /* Delete more transfer and PLA insns */
801 CS_DelEntry (S, I+7);
802 CS_DelEntry (S, I+6);
804 /* Delete the call to ldauidx */
805 CS_DelEntry (S, I+7);
807 /* Load high and low byte */
808 X = NewCodeEntry (OP65_LDX, AM65_IMM, "$00", 0, L[6]->LI);
809 CS_InsertEntry (S, X, I+7);
810 X = NewCodeEntry (OP65_LDA, AM65_ZP_INDY, "ptr1", 0, L[6]->LI);
811 CS_InsertEntry (S, X, I+8);
813 /* Remember, we had changes */
823 /* Return the number of changes made */
829 static unsigned OptPtrLoad4 (CodeSeg* S)
830 /* Search for the sequence:
848 unsigned Changes = 0;
850 /* Walk over the entries */
852 while (I < CS_GetEntryCount (S)) {
858 L[0] = CS_GetEntry (S, I);
860 /* Check for the sequence */
861 if (L[0]->OPC == OP65_LDA &&
862 L[0]->AM == AM65_IMM &&
863 CS_GetEntries (S, L+1, I+1, 7) &&
864 L[1]->OPC == OP65_LDX &&
865 L[1]->AM == AM65_IMM &&
866 !CE_HasLabel (L[1]) &&
867 L[2]->OPC == OP65_CLC &&
868 !CE_HasLabel (L[2]) &&
869 L[3]->OPC == OP65_ADC &&
870 (L[3]->AM == AM65_ABS || L[3]->AM == AM65_ZP) &&
871 !CE_HasLabel (L[3]) &&
872 (L[4]->OPC == OP65_BCC || L[4]->OPC == OP65_JCC) &&
874 L[4]->JumpTo->Owner == L[6] &&
875 !CE_HasLabel (L[4]) &&
876 L[5]->OPC == OP65_INX &&
877 !CE_HasLabel (L[5]) &&
878 L[6]->OPC == OP65_LDY &&
879 CE_KnownImm (L[6]) &&
881 CE_IsCallTo (L[7], "ldauidx") &&
882 !CE_HasLabel (L[7]) &&
883 /* Check the label last because this is quite costly */
884 (Len = strlen (L[0]->Arg)) > 3 &&
885 L[0]->Arg[0] == '<' &&
886 L[0]->Arg[1] == '(' &&
887 strlen (L[1]->Arg) == Len &&
888 L[1]->Arg[0] == '>' &&
889 memcmp (L[0]->Arg+1, L[1]->Arg+1, Len-1) == 0) {
894 /* We will create all the new stuff behind the current one so
895 * we keep the line references.
897 X = NewCodeEntry (OP65_LDY, L[3]->AM, L[3]->Arg, 0, L[0]->LI);
898 CS_InsertEntry (S, X, I+8);
900 X = NewCodeEntry (OP65_LDX, AM65_IMM, "$00", 0, L[0]->LI);
901 CS_InsertEntry (S, X, I+9);
903 Label = memcpy (xmalloc (Len-2), L[0]->Arg+2, Len-3);
905 X = NewCodeEntry (OP65_LDA, AM65_ABSY, Label, 0, L[0]->LI);
906 CS_InsertEntry (S, X, I+10);
909 /* Remove the old code */
910 CS_DelEntries (S, I, 8);
912 /* Remember, we had changes */
922 /* Return the number of changes made */
928 static unsigned OptPtrLoad5 (CodeSeg* S)
929 /* Search for the sequence:
950 unsigned Changes = 0;
952 /* Walk over the entries */
954 while (I < CS_GetEntryCount (S)) {
960 L[0] = CS_GetEntry (S, I);
962 /* Check for the sequence */
963 if (L[0]->OPC == OP65_LDA &&
964 L[0]->AM == AM65_IMM &&
965 CS_GetEntries (S, L+1, I+1, 8) &&
966 L[1]->OPC == OP65_LDX &&
967 L[1]->AM == AM65_IMM &&
968 !CE_HasLabel (L[1]) &&
969 L[2]->OPC == OP65_LDY &&
970 CE_KnownImm (L[2]) &&
971 !CE_HasLabel (L[2]) &&
972 L[3]->OPC == OP65_CLC &&
973 !CE_HasLabel (L[3]) &&
974 L[4]->OPC == OP65_ADC &&
975 L[4]->AM == AM65_ZP_INDY &&
976 !CE_HasLabel (L[4]) &&
977 (L[5]->OPC == OP65_BCC || L[5]->OPC == OP65_JCC) &&
979 L[5]->JumpTo->Owner == L[7] &&
980 !CE_HasLabel (L[5]) &&
981 L[6]->OPC == OP65_INX &&
982 !CE_HasLabel (L[6]) &&
983 L[7]->OPC == OP65_LDY &&
984 CE_KnownImm (L[7]) &&
986 CE_IsCallTo (L[8], "ldauidx") &&
987 !CE_HasLabel (L[8]) &&
988 /* Check the label last because this is quite costly */
989 (Len = strlen (L[0]->Arg)) > 3 &&
990 L[0]->Arg[0] == '<' &&
991 L[0]->Arg[1] == '(' &&
992 strlen (L[1]->Arg) == Len &&
993 L[1]->Arg[0] == '>' &&
994 memcmp (L[0]->Arg+1, L[1]->Arg+1, Len-1) == 0) {
1000 X = NewCodeEntry (OP65_LDA, AM65_ZP_INDY, L[4]->Arg, 0, L[0]->LI);
1001 CS_InsertEntry (S, X, I+3);
1004 X = NewCodeEntry (OP65_TAY, AM65_IMP, 0, 0, L[0]->LI);
1005 CS_InsertEntry (S, X, I+4);
1008 X = NewCodeEntry (OP65_LDX, AM65_IMM, "$00", 0, L[0]->LI);
1009 CS_InsertEntry (S, X, I+5);
1012 Label = memcpy (xmalloc (Len-2), L[0]->Arg+2, Len-3);
1013 Label[Len-3] = '\0';
1014 X = NewCodeEntry (OP65_LDA, AM65_ABSY, Label, 0, L[0]->LI);
1015 CS_InsertEntry (S, X, I+6);
1018 /* Remove the old code */
1019 CS_DelEntries (S, I, 2);
1020 CS_DelEntries (S, I+5, 6);
1022 /* Remember, we had changes */
1032 /* Return the number of changes made */
1038 static unsigned OptPtrLoad6 (CodeSeg* S)
1039 /* Search for the sequence
1044 * and replace it by:
1052 * This step must be execute *after* OptPtrLoad1!
1055 unsigned Changes = 0;
1057 /* Walk over the entries */
1059 while (I < CS_GetEntryCount (S)) {
1063 /* Get next entry */
1064 L[0] = CS_GetEntry (S, I);
1066 /* Check for the sequence */
1067 if (L[0]->OPC == OP65_LDY &&
1068 CS_GetEntries (S, L+1, I+1, 1) &&
1069 CE_IsCallTo (L[1], "ldauidx") &&
1070 !CE_HasLabel (L[1])) {
1074 /* Store the high byte */
1075 X = NewCodeEntry (OP65_STA, AM65_ZP, "ptr1", 0, L[0]->LI);
1076 CS_InsertEntry (S, X, I+1);
1078 /* Store the low byte */
1079 X = NewCodeEntry (OP65_STX, AM65_ZP, "ptr1+1", 0, L[0]->LI);
1080 CS_InsertEntry (S, X, I+2);
1082 /* Delete the call to ldauidx */
1083 CS_DelEntry (S, I+3);
1085 /* Load the high and low byte */
1086 X = NewCodeEntry (OP65_LDX, AM65_IMM, "$00", 0, L[0]->LI);
1087 CS_InsertEntry (S, X, I+3);
1088 X = NewCodeEntry (OP65_LDA, AM65_ZP_INDY, "ptr1", 0, L[0]->LI);
1089 CS_InsertEntry (S, X, I+4);
1091 /* Remember, we had changes */
1101 /* Return the number of changes made */
1107 /*****************************************************************************/
1108 /* Decouple operations */
1109 /*****************************************************************************/
1113 static unsigned OptDecouple (CodeSeg* S)
1114 /* Decouple operations, that is, do the following replacements:
1124 * lda zp -> lda #imm
1125 * ldx zp -> ldx #imm
1126 * ldy zp -> ldy #imm
1128 * Provided that the register values are known of course.
1131 unsigned Changes = 0;
1134 /* Generate register info for the following step */
1137 /* Walk over the entries */
1139 while (I < CS_GetEntryCount (S)) {
1143 /* Get next entry and it's input register values */
1144 CodeEntry* E = CS_GetEntry (S, I);
1145 const RegContents* In = &E->RI->In;
1147 /* Assume we have no replacement */
1150 /* Check the instruction */
1154 if (E->RI->In.RegX >= 0) {
1155 Arg = MakeHexArg ((E->RI->In.RegX - 1) & 0xFF);
1156 X = NewCodeEntry (OP65_LDX, AM65_IMM, Arg, 0, E->LI);
1161 if (E->RI->In.RegY >= 0) {
1162 Arg = MakeHexArg ((E->RI->In.RegY - 1) & 0xFF);
1163 X = NewCodeEntry (OP65_LDY, AM65_IMM, Arg, 0, E->LI);
1168 if (E->RI->In.RegX >= 0) {
1169 Arg = MakeHexArg ((E->RI->In.RegX + 1) & 0xFF);
1170 X = NewCodeEntry (OP65_LDX, AM65_IMM, Arg, 0, E->LI);
1175 if (E->RI->In.RegY >= 0) {
1176 Arg = MakeHexArg ((E->RI->In.RegY + 1) & 0xFF);
1177 X = NewCodeEntry (OP65_LDY, AM65_IMM, Arg, 0, E->LI);
1182 if (E->AM == AM65_ZP) {
1183 switch (GetKnownReg (E->Use, In)) {
1185 Arg = MakeHexArg (In->Tmp1);
1186 X = NewCodeEntry (OP65_LDA, AM65_IMM, Arg, 0, E->LI);
1190 Arg = MakeHexArg (In->SRegLo);
1191 X = NewCodeEntry (OP65_LDA, AM65_IMM, Arg, 0, E->LI);
1195 Arg = MakeHexArg (In->SRegHi);
1196 X = NewCodeEntry (OP65_LDA, AM65_IMM, Arg, 0, E->LI);
1203 if (E->AM == AM65_ZP) {
1204 switch (GetKnownReg (E->Use, In)) {
1206 Arg = MakeHexArg (In->Tmp1);
1207 X = NewCodeEntry (OP65_LDX, AM65_IMM, Arg, 0, E->LI);
1211 Arg = MakeHexArg (In->SRegLo);
1212 X = NewCodeEntry (OP65_LDX, AM65_IMM, Arg, 0, E->LI);
1216 Arg = MakeHexArg (In->SRegHi);
1217 X = NewCodeEntry (OP65_LDX, AM65_IMM, Arg, 0, E->LI);
1224 if (E->AM == AM65_ZP) {
1225 switch (GetKnownReg (E->Use, In)) {
1227 Arg = MakeHexArg (In->Tmp1);
1228 X = NewCodeEntry (OP65_LDY, AM65_IMM, Arg, 0, E->LI);
1232 Arg = MakeHexArg (In->SRegLo);
1233 X = NewCodeEntry (OP65_LDY, AM65_IMM, Arg, 0, E->LI);
1237 Arg = MakeHexArg (In->SRegHi);
1238 X = NewCodeEntry (OP65_LDY, AM65_IMM, Arg, 0, E->LI);
1245 if (E->RI->In.RegA >= 0) {
1246 Arg = MakeHexArg (In->RegA);
1247 X = NewCodeEntry (OP65_LDX, AM65_IMM, Arg, 0, E->LI);
1252 if (E->RI->In.RegA >= 0) {
1253 Arg = MakeHexArg (In->RegA);
1254 X = NewCodeEntry (OP65_LDY, AM65_IMM, Arg, 0, E->LI);
1259 if (E->RI->In.RegX >= 0) {
1260 Arg = MakeHexArg (In->RegX);
1261 X = NewCodeEntry (OP65_LDA, AM65_IMM, Arg, 0, E->LI);
1266 if (E->RI->In.RegY >= 0) {
1267 Arg = MakeHexArg (In->RegY);
1268 X = NewCodeEntry (OP65_LDA, AM65_IMM, Arg, 0, E->LI);
1273 /* Avoid gcc warnings */
1278 /* Insert the replacement if we have one */
1280 CS_InsertEntry (S, X, I+1);
1290 /* Free register info */
1293 /* Return the number of changes made */
1299 /*****************************************************************************/
1300 /* Size optimization */
1301 /*****************************************************************************/
1306 static unsigned OptSize1 (CodeSeg* S)
1307 /* Do size optimization by calling special subroutines that preload registers.
1308 * This routine does not work standalone, it needs a following register load
1312 typedef struct CallDesc CallDesc;
1314 const char* LongFunc; /* Long function name */
1315 short A, X, Y; /* Register contents */
1316 const char* ShortFunc; /* Short function name */
1319 static const CallDesc CallTable [] = {
1320 { "staxysp", -1, -1, 0, "stax0sp" },
1321 { "addeqysp", -1, -1, 0, "addeq0sp" },
1322 { "ldaxysp", -1, -1, 1, "ldax0sp" },
1323 { "ldeaxysp", -1, -1, 3, "ldeax0sp" },
1324 { "pushax", 0, 0, -1, "push0" },
1325 { "pushax", -1, 0, -1, "pusha0" },
1326 { "pushax", -1, 0xFF, -1, "pushaFF" },
1327 { "pushaysp", -1, -1, 0, "pusha0sp" },
1328 { "tosaddax", -1, 0, -1, "tosadda0" },
1329 { "tosandax", -1, 0, -1, "tosanda0" },
1330 { "tosdivax", -1, 0, -1, "tosdiva0" },
1331 { "toseqax", -1, 0, -1, "toseqa0" },
1332 { "tosgeax", -1, 0, -1, "tosgea0" },
1333 { "tosgtax", -1, 0, -1, "tosgta0" },
1334 { "laddeqysp", -1, -1, 0, "laddeq0sp" },
1335 { "ldaxidx", -1, -1, 1, "ldaxi" },
1336 { "ldeaxidx", -1, -1, 3, "ldeaxi" },
1337 { "ldeaxysp", -1, -1, 3, "ldeax0sp" },
1338 { "tosleax", -1, 0, -1, "toslea0" },
1339 { "lsubeqysp", -1, -1, 0, "lsubeq0sp" },
1341 "toslta0", /* tosltax, x = 0 */
1342 "tosmoda0", /* tosmodax, x = 0 */
1343 "tosmula0", /* tosmulax, x = 0 */
1344 "tosumula0", /* tosumulax, x = 0 */
1345 "tosnea0", /* tosneax, x = 0 */
1346 "tosora0", /* tosorax, x = 0 */
1347 "push1", /* pushax, x = 0, a = 1 */
1348 "push2", /* pushax, x = 0, a = 2 */
1349 "push3", /* pushax, x = 0, a = 3 */
1350 "push4", /* pushax, x = 0, a = 4 */
1351 "push5", /* pushax, x = 0, a = 5 */
1352 "push6", /* pushax, x = 0, a = 6 */
1353 "push7", /* pushax, x = 0, a = 7 */
1354 "pushc0", /* pusha, a = 0 */
1355 "pushc1", /* pusha, a = 1 */
1356 "pushc2", /* pusha, a = 2 */
1357 "tosrsuba0", /* tosrsubax, x = 0 */
1358 "tosshla0", /* tosshlax, x = 0 */
1359 "tosasla0", /* tosaslax, x = 0 */
1360 "tosshra0", /* tosshrax, x = 0 */
1361 "tosasra0", /* tosasrax, x = 0 */
1362 "steax0sp", /* steaxsp, y = 0 */
1363 "tossuba0", /* tossubax, x = 0 */
1364 "subeq0sp", /* subeqysp, y = 0 */
1365 "tosudiva0", /* tosudivax, x = 0 */
1366 "tosugea0", /* tosugeax, x = 0 */
1367 "tosugta0", /* tosugtax, x = 0 */
1368 "tosulea0", /* tosuleax, x = 0 */
1369 "tosulta0", /* tosultax, x = 0 */
1370 "tosumoda0", /* tosumodax, x = 0 */
1371 "tosxora0", /* tosxorax, x = 0 */
1373 "tosadd0ax", /* tosaddeax, sreg = 0 */
1374 "laddeqa", /* laddeq, sreg = 0, x = 0 */
1375 "laddeq1", /* laddeq, sreg = 0, x = 0, a = 1 */
1376 "tosand0ax", /* tosandeax, sreg = 0 */
1377 "tosdiv0ax", /* tosdiveax, sreg = 0 */
1378 "tosmod0ax", /* tosmodeax, sreg = 0 */
1379 "tosmul0ax", /* tosmuleax, sreg = 0 */
1380 "tosumul0ax", /* tosumuleax, sreg = 0 */
1381 "tosor0ax", /* tosoreax, sreg = 0 */
1382 "push0ax", /* pusheax, sreg = 0 */
1383 "tosrsub0ax", /* tosrsubeax, sreg = 0 */
1384 "tosshl0ax", /* tosshleax, sreg = 0 */
1385 "tosasl0ax", /* tosasleax, sreg = 0 */
1386 "tosshr0ax", /* tosshreax, sreg = 0 */
1387 "tosasr0ax", /* tosasreax, sreg = 0 */
1388 "tossub0ax", /* tossubeax, sreg = 0 */
1389 "lsubeqa", /* lsubeq, sreg = 0, x = 0 */
1390 "lsubeq1", /* lsubeq, sreg = 0, x = 0, a = 1 */
1391 "tosudiv0ax", /* tosudiveax, sreg = 0 */
1392 "tosumod0ax", /* tosumodeax, sreg = 0 */
1393 "tosxor0ax", /* tosxoreax, sreg = 0 */
1397 unsigned Changes = 0;
1400 /* Generate register info for the following step */
1403 /* Walk over the entries */
1405 while (I < CS_GetEntryCount (S)) {
1407 /* Get next entry */
1408 CodeEntry* E = CS_GetEntry (S, I);
1410 /* Check if it's a subroutine call */
1411 if (E->OPC == OP65_JSR) {
1413 /* Check for any of the known functions */
1424 /* Free register info */
1427 /* Return the number of changes made */
1434 static unsigned OptSize2 (CodeSeg* S)
1435 /* Do size optimization by using shorter code sequences, even if this
1436 * introduces relations between instructions. This step must be one of the
1437 * last steps, because it makes further work much more difficult.
1440 unsigned Changes = 0;
1443 /* Generate register info for the following step */
1446 /* Walk over the entries */
1448 while (I < CS_GetEntryCount (S)) {
1451 /* Get next entry */
1452 CodeEntry* E = CS_GetEntry (S, I);
1454 /* Assume we have no replacement */
1457 /* Check the instruction */
1461 if (CE_KnownImm (E)) {
1462 short Val = (short) E->Num;
1463 if (Val == E->RI->In.RegX) {
1464 X = NewCodeEntry (OP65_TXA, AM65_IMP, 0, 0, E->LI);
1465 } else if (Val == E->RI->In.RegY) {
1466 X = NewCodeEntry (OP65_TYA, AM65_IMP, 0, 0, E->LI);
1467 } else if (E->RI->In.RegA >= 0 && CPU >= CPU_65C02) {
1468 if (Val == ((E->RI->In.RegA - 1) & 0xFF)) {
1469 X = NewCodeEntry (OP65_DEA, AM65_IMP, 0, 0, E->LI);
1470 } else if (Val == ((E->RI->In.RegA + 1) & 0xFF)) {
1471 X = NewCodeEntry (OP65_INA, AM65_IMP, 0, 0, E->LI);
1478 if (CE_KnownImm (E)) {
1479 short Val = (short) E->Num;
1480 if (E->RI->In.RegX >= 0 && Val == ((E->RI->In.RegX - 1) & 0xFF)) {
1481 X = NewCodeEntry (OP65_DEX, AM65_IMP, 0, 0, E->LI);
1482 } else if (E->RI->In.RegX >= 0 && Val == ((E->RI->In.RegX + 1) & 0xFF)) {
1483 X = NewCodeEntry (OP65_INX, AM65_IMP, 0, 0, E->LI);
1484 } else if (Val == E->RI->In.RegA) {
1485 X = NewCodeEntry (OP65_TAX, AM65_IMP, 0, 0, E->LI);
1491 if (CE_KnownImm (E)) {
1492 short Val = (short) E->Num;
1493 if (E->RI->In.RegY >= 0 && Val == ((E->RI->In.RegY - 1) & 0xFF)) {
1494 X = NewCodeEntry (OP65_DEY, AM65_IMP, 0, 0, E->LI);
1495 } else if (E->RI->In.RegY >= 0 && Val == ((E->RI->In.RegY + 1) & 0xFF)) {
1496 X = NewCodeEntry (OP65_INY, AM65_IMP, 0, 0, E->LI);
1497 } else if (Val == E->RI->In.RegA) {
1498 X = NewCodeEntry (OP65_TAY, AM65_IMP, 0, 0, E->LI);
1504 /* Avoid gcc warnings */
1509 /* Insert the replacement if we have one */
1511 CS_InsertEntry (S, X, I+1);
1521 /* Free register info */
1524 /* Return the number of changes made */
1530 /*****************************************************************************/
1531 /* struct OptFunc */
1532 /*****************************************************************************/
1536 typedef struct OptFunc OptFunc;
1538 unsigned (*Func) (CodeSeg*); /* Optimizer function */
1539 const char* Name; /* Name of the function/group */
1540 unsigned CodeSizeFactor; /* Code size factor for this opt func */
1541 unsigned long TotalRuns; /* Total number of runs */
1542 unsigned long LastRuns; /* Last number of runs */
1543 unsigned long TotalChanges; /* Total number of changes */
1544 unsigned long LastChanges; /* Last number of changes */
1545 char Disabled; /* True if function disabled */
1550 /*****************************************************************************/
1552 /*****************************************************************************/
1556 /* A list of all the function descriptions */
1557 static OptFunc DOpt65C02BitOps = { Opt65C02BitOps, "Opt65C02BitOps", 66, 0, 0, 0, 0, 0 };
1558 static OptFunc DOpt65C02Ind = { Opt65C02Ind, "Opt65C02Ind", 100, 0, 0, 0, 0, 0 };
1559 static OptFunc DOpt65C02Stores = { Opt65C02Stores, "Opt65C02Stores", 100, 0, 0, 0, 0, 0 };
1560 static OptFunc DOptAdd1 = { OptAdd1, "OptAdd1", 125, 0, 0, 0, 0, 0 };
1561 static OptFunc DOptAdd2 = { OptAdd2, "OptAdd2", 200, 0, 0, 0, 0, 0 };
1562 static OptFunc DOptAdd3 = { OptAdd3, "OptAdd3", 40, 0, 0, 0, 0, 0 };
1563 static OptFunc DOptBoolTrans = { OptBoolTrans, "OptBoolTrans", 100, 0, 0, 0, 0, 0 };
1564 static OptFunc DOptBranchDist = { OptBranchDist, "OptBranchDist", 0, 0, 0, 0, 0, 0 };
1565 static OptFunc DOptCmp1 = { OptCmp1, "OptCmp1", 85, 0, 0, 0, 0, 0 };
1566 static OptFunc DOptCmp2 = { OptCmp2, "OptCmp2", 75, 0, 0, 0, 0, 0 };
1567 static OptFunc DOptCmp3 = { OptCmp3, "OptCmp3", 75, 0, 0, 0, 0, 0 };
1568 static OptFunc DOptCmp4 = { OptCmp4, "OptCmp4", 100, 0, 0, 0, 0, 0 };
1569 static OptFunc DOptCmp5 = { OptCmp5, "OptCmp5", 100, 0, 0, 0, 0, 0 };
1570 static OptFunc DOptCmp6 = { OptCmp6, "OptCmp6", 85, 0, 0, 0, 0, 0 };
1571 static OptFunc DOptCmp7 = { OptCmp7, "OptCmp7", 50, 0, 0, 0, 0, 0 };
1572 static OptFunc DOptCondBranches = { OptCondBranches, "OptCondBranches", 80, 0, 0, 0, 0, 0 };
1573 static OptFunc DOptDeadCode = { OptDeadCode, "OptDeadCode", 100, 0, 0, 0, 0, 0 };
1574 static OptFunc DOptDeadJumps = { OptDeadJumps, "OptDeadJumps", 100, 0, 0, 0, 0, 0 };
1575 static OptFunc DOptDecouple = { OptDecouple, "OptDecouple", 100, 0, 0, 0, 0, 0 };
1576 static OptFunc DOptDupLoads = { OptDupLoads, "OptDupLoads", 0, 0, 0, 0, 0, 0 };
1577 static OptFunc DOptJumpCascades = { OptJumpCascades, "OptJumpCascades", 100, 0, 0, 0, 0, 0 };
1578 static OptFunc DOptJumpTarget = { OptJumpTarget, "OptJumpTarget", 100, 0, 0, 0, 0, 0 };
1579 static OptFunc DOptRTS = { OptRTS, "OptRTS", 100, 0, 0, 0, 0, 0 };
1580 static OptFunc DOptRTSJumps1 = { OptRTSJumps1, "OptRTSJumps1", 100, 0, 0, 0, 0, 0 };
1581 static OptFunc DOptRTSJumps2 = { OptRTSJumps2, "OptRTSJumps2", 100, 0, 0, 0, 0, 0 };
1582 static OptFunc DOptNegA1 = { OptNegA1, "OptNegA1", 100, 0, 0, 0, 0, 0 };
1583 static OptFunc DOptNegA2 = { OptNegA2, "OptNegA2", 100, 0, 0, 0, 0, 0 };
1584 static OptFunc DOptNegAX1 = { OptNegAX1, "OptNegAX1", 100, 0, 0, 0, 0, 0 };
1585 static OptFunc DOptNegAX2 = { OptNegAX2, "OptNegAX2", 100, 0, 0, 0, 0, 0 };
1586 static OptFunc DOptNegAX3 = { OptNegAX3, "OptNegAX3", 100, 0, 0, 0, 0, 0 };
1587 static OptFunc DOptNegAX4 = { OptNegAX4, "OptNegAX4", 100, 0, 0, 0, 0, 0 };
1588 static OptFunc DOptPtrLoad1 = { OptPtrLoad1, "OptPtrLoad1", 100, 0, 0, 0, 0, 0 };
1589 static OptFunc DOptPtrLoad2 = { OptPtrLoad2, "OptPtrLoad2", 100, 0, 0, 0, 0, 0 };
1590 static OptFunc DOptPtrLoad3 = { OptPtrLoad3, "OptPtrLoad3", 100, 0, 0, 0, 0, 0 };
1591 static OptFunc DOptPtrLoad4 = { OptPtrLoad4, "OptPtrLoad4", 100, 0, 0, 0, 0, 0 };
1592 static OptFunc DOptPtrLoad5 = { OptPtrLoad5, "OptPtrLoad5", 100, 0, 0, 0, 0, 0 };
1593 static OptFunc DOptPtrLoad6 = { OptPtrLoad6, "OptPtrLoad6", 100, 0, 0, 0, 0, 0 };
1594 static OptFunc DOptPtrStore1 = { OptPtrStore1, "OptPtrStore1", 100, 0, 0, 0, 0, 0 };
1595 static OptFunc DOptPtrStore2 = { OptPtrStore2, "OptPtrStore2", 100, 0, 0, 0, 0, 0 };
1596 static OptFunc DOptPush1 = { OptPush1, "OptPush1", 65, 0, 0, 0, 0, 0 };
1597 static OptFunc DOptPushPop = { OptPushPop, "OptPushPop", 0, 0, 0, 0, 0, 0 };
1598 static OptFunc DOptShift1 = { OptShift1, "OptShift1", 100, 0, 0, 0, 0, 0 };
1599 static OptFunc DOptShift2 = { OptShift2, "OptShift2", 100, 0, 0, 0, 0, 0 };
1600 static OptFunc DOptShift3 = { OptShift3, "OptShift3", 110, 0, 0, 0, 0, 0 };
1601 /*static OptFunc DOptSize1 = { OptSize1, "OptSize1", 100, 0, 0, 0, 0, 0 };*/
1602 static OptFunc DOptSize2 = { OptSize2, "OptSize2", 100, 0, 0, 0, 0, 0 };
1603 static OptFunc DOptStackOps = { OptStackOps, "OptStackOps", 100, 0, 0, 0, 0, 0 };
1604 static OptFunc DOptStoreLoad = { OptStoreLoad, "OptStoreLoad", 0, 0, 0, 0, 0, 0 };
1605 static OptFunc DOptSub1 = { OptSub1, "OptSub1", 100, 0, 0, 0, 0, 0 };
1606 static OptFunc DOptSub2 = { OptSub2, "OptSub2", 100, 0, 0, 0, 0, 0 };
1607 static OptFunc DOptTest1 = { OptTest1, "OptTest1", 100, 0, 0, 0, 0, 0 };
1608 static OptFunc DOptTransfers = { OptTransfers, "OptTransfers", 0, 0, 0, 0, 0, 0 };
1609 static OptFunc DOptUnusedLoads = { OptUnusedLoads, "OptUnusedLoads", 0, 0, 0, 0, 0, 0 };
1610 static OptFunc DOptUnusedStores = { OptUnusedStores, "OptUnusedStores", 0, 0, 0, 0, 0, 0 };
1613 /* Table containing all the steps in alphabetical order */
1614 static OptFunc* OptFuncs[] = {
1670 #define OPTFUNC_COUNT (sizeof(OptFuncs) / sizeof(OptFuncs[0]))
1674 static int CmpOptStep (const void* Key, const void* Func)
1675 /* Compare function for bsearch */
1677 return strcmp (Key, (*(const OptFunc**)Func)->Name);
1682 static OptFunc* FindOptFunc (const char* Name)
1683 /* Find an optimizer step by name in the table and return a pointer. Return
1684 * NULL if no such step is found.
1687 /* Search for the function in the list */
1688 OptFunc** O = bsearch (Name, OptFuncs, OPTFUNC_COUNT, sizeof (OptFuncs[0]), CmpOptStep);
1694 static OptFunc* GetOptFunc (const char* Name)
1695 /* Find an optimizer step by name in the table and return a pointer. Print an
1696 * error and call AbEnd if not found.
1699 /* Search for the function in the list */
1700 OptFunc* F = FindOptFunc (Name);
1703 AbEnd ("Optimization step `%s' not found", Name);
1710 void DisableOpt (const char* Name)
1711 /* Disable the optimization with the given name */
1713 if (strcmp (Name, "any") == 0) {
1715 for (I = 0; I < OPTFUNC_COUNT; ++I) {
1716 OptFuncs[I]->Disabled = 1;
1719 GetOptFunc(Name)->Disabled = 1;
1725 void EnableOpt (const char* Name)
1726 /* Enable the optimization with the given name */
1728 if (strcmp (Name, "any") == 0) {
1730 for (I = 0; I < OPTFUNC_COUNT; ++I) {
1731 OptFuncs[I]->Disabled = 0;
1734 GetOptFunc(Name)->Disabled = 0;
1740 void ListOptSteps (FILE* F)
1741 /* List all optimization steps */
1744 for (I = 0; I < OPTFUNC_COUNT; ++I) {
1745 fprintf (F, "%s\n", OptFuncs[I]->Name);
1751 static void ReadOptStats (const char* Name)
1752 /* Read the optimizer statistics file */
1757 /* Try to open the file */
1758 FILE* F = fopen (Name, "r");
1760 /* Ignore the error */
1764 /* Read and parse the lines */
1766 while (fgets (Buf, sizeof (Buf), F) != 0) {
1774 unsigned long TotalRuns;
1775 unsigned long TotalChanges;
1780 /* Remove trailing white space including the line terminator */
1783 while (Len > 0 && IsSpace (B[Len-1])) {
1788 /* Remove leading whitespace */
1789 while (IsSpace (*B)) {
1793 /* Check for empty and comment lines */
1794 if (*B == '\0' || *B == ';' || *B == '#') {
1798 /* Parse the line */
1799 if (sscanf (B, "%31s %lu %*u %lu %*u", Name, &TotalRuns, &TotalChanges) != 3) {
1804 /* Search for the optimizer step. */
1805 Func = FindOptFunc (Name);
1811 /* Found the step, set the fields */
1812 Func->TotalRuns = TotalRuns;
1813 Func->TotalChanges = TotalChanges;
1817 /* Close the file, ignore errors here. */
1823 static void WriteOptStats (const char* Name)
1824 /* Write the optimizer statistics file */
1828 /* Try to open the file */
1829 FILE* F = fopen (Name, "w");
1831 /* Ignore the error */
1835 /* Write a header */
1837 "; Optimizer Total Last Total Last\n"
1838 "; Step Runs Runs Chg Chg\n");
1841 /* Write the data */
1842 for (I = 0; I < OPTFUNC_COUNT; ++I) {
1843 const OptFunc* O = OptFuncs[I];
1845 "%-20s %6lu %6lu %6lu %6lu\n",
1853 /* Close the file, ignore errors here. */
1859 static unsigned RunOptFunc (CodeSeg* S, OptFunc* F, unsigned Max)
1860 /* Run one optimizer function Max times or until there are no more changes */
1862 unsigned Changes, C;
1864 /* Don't run the function if it is disabled or if it is prohibited by the
1867 if (F->Disabled || CodeSizeFactor < F->CodeSizeFactor) {
1871 /* Run this until there are no more changes */
1875 /* Run the function */
1882 F->TotalChanges += C;
1883 F->LastChanges += C;
1885 } while (--Max && C > 0);
1887 /* Return the number of changes */
1893 static unsigned RunOptGroup1 (CodeSeg* S)
1894 /* Run the first group of optimization steps. These steps translate known
1895 * patterns emitted by the code generator into more optimal patterns. Order
1896 * of the steps is important, because some of the steps done earlier cover
1897 * the same patterns as later steps as subpatterns.
1900 unsigned Changes = 0;
1902 Changes += RunOptFunc (S, &DOptPtrStore1, 1);
1903 Changes += RunOptFunc (S, &DOptPtrStore2, 1);
1904 Changes += RunOptFunc (S, &DOptPtrLoad1, 1);
1905 Changes += RunOptFunc (S, &DOptPtrLoad2, 1);
1906 Changes += RunOptFunc (S, &DOptPtrLoad3, 1);
1907 Changes += RunOptFunc (S, &DOptPtrLoad4, 1);
1908 Changes += RunOptFunc (S, &DOptPtrLoad5, 1);
1909 Changes += RunOptFunc (S, &DOptNegAX1, 1);
1910 Changes += RunOptFunc (S, &DOptNegAX2, 1);
1911 Changes += RunOptFunc (S, &DOptNegAX3, 1);
1912 Changes += RunOptFunc (S, &DOptNegAX4, 1);
1913 Changes += RunOptFunc (S, &DOptAdd1, 1);
1914 Changes += RunOptFunc (S, &DOptAdd2, 1);
1915 Changes += RunOptFunc (S, &DOptShift1, 1);
1916 Changes += RunOptFunc (S, &DOptShift2, 1);
1917 Changes += RunOptFunc (S, &DOptShift3, 1);
1919 /* Return the number of changes */
1925 static unsigned RunOptGroup2 (CodeSeg* S)
1926 /* Run one group of optimization steps. This step involves just decoupling
1927 * instructions by replacing them by instructions that do not depend on
1928 * previous instructions. This makes it easier to find instructions that
1932 unsigned Changes = 0;
1934 Changes += RunOptFunc (S, &DOptDecouple, 1);
1936 /* Return the number of changes */
1942 static unsigned RunOptGroup3 (CodeSeg* S)
1943 /* Run one group of optimization steps. These steps depend on each other,
1944 * that means that one step may allow another step to do additional work,
1945 * so we will repeat the steps as long as we see any changes.
1948 unsigned Changes, C;
1954 C += RunOptFunc (S, &DOptPtrLoad6, 1);
1955 C += RunOptFunc (S, &DOptNegA1, 1);
1956 C += RunOptFunc (S, &DOptNegA2, 1);
1957 C += RunOptFunc (S, &DOptSub1, 1);
1958 C += RunOptFunc (S, &DOptSub2, 1);
1959 C += RunOptFunc (S, &DOptAdd3, 1);
1960 C += RunOptFunc (S, &DOptStackOps, 1);
1961 C += RunOptFunc (S, &DOptJumpCascades, 1);
1962 C += RunOptFunc (S, &DOptDeadJumps, 1);
1963 C += RunOptFunc (S, &DOptRTS, 1);
1964 C += RunOptFunc (S, &DOptDeadCode, 1);
1965 C += RunOptFunc (S, &DOptJumpTarget, 1);
1966 C += RunOptFunc (S, &DOptCondBranches, 1);
1967 C += RunOptFunc (S, &DOptRTSJumps1, 1);
1968 C += RunOptFunc (S, &DOptBoolTrans, 1);
1969 C += RunOptFunc (S, &DOptCmp1, 1);
1970 C += RunOptFunc (S, &DOptCmp2, 1);
1971 C += RunOptFunc (S, &DOptCmp3, 1);
1972 C += RunOptFunc (S, &DOptCmp4, 1);
1973 C += RunOptFunc (S, &DOptCmp5, 1);
1974 C += RunOptFunc (S, &DOptCmp6, 1);
1975 C += RunOptFunc (S, &DOptCmp7, 1);
1976 C += RunOptFunc (S, &DOptTest1, 1);
1977 C += RunOptFunc (S, &DOptUnusedLoads, 1);
1978 C += RunOptFunc (S, &DOptUnusedStores, 1);
1979 C += RunOptFunc (S, &DOptDupLoads, 1);
1980 C += RunOptFunc (S, &DOptStoreLoad, 1);
1981 C += RunOptFunc (S, &DOptTransfers, 1);
1982 C += RunOptFunc (S, &DOptPushPop, 1);
1988 /* Return the number of changes */
1994 static unsigned RunOptGroup4 (CodeSeg* S)
1995 /* 65C02 specific optimizations. */
1997 unsigned Changes = 0;
1999 if (CPU >= CPU_65C02) {
2000 Changes += RunOptFunc (S, &DOpt65C02BitOps, 1);
2001 Changes += RunOptFunc (S, &DOpt65C02Ind, 1);
2002 Changes += RunOptFunc (S, &DOpt65C02Stores, 1);
2004 /* The 65C02 replacement codes do often make the use of a register
2005 * value unnecessary, so if we have changes, run another load
2008 Changes += RunOptFunc (S, &DOptUnusedLoads, 1);
2012 /* Return the number of changes */
2018 static unsigned RunOptGroup5 (CodeSeg* S)
2019 /* Run another round of pattern replacements. These are done late, since there
2020 * may be better replacements before.
2023 unsigned Changes = 0;
2025 Changes += RunOptFunc (S, &DOptPush1, 1);
2027 /* Return the number of changes */
2033 static unsigned RunOptGroup6 (CodeSeg* S)
2034 /* The last group of optimization steps. Adjust branches, do size optimizations.
2037 unsigned Changes = 0;
2040 /* Optimize for size, that is replace operations by shorter ones, even
2041 * if this does hinder further optimizations (no problem since we're
2044 Changes += RunOptFunc (S, &DOptSize2, 1);
2046 /* Run the jump target optimization again, since the size optimization
2047 * above may have opened new oportunities.
2049 Changes += RunOptFunc (S, &DOptJumpTarget, 5);
2051 /* Adjust branch distances */
2052 Changes += RunOptFunc (S, &DOptBranchDist, 3);
2054 /* Replace conditional branches to RTS. If we had changes, we must run dead
2055 * code elimination again, since the change may have introduced dead code.
2057 C = RunOptFunc (S, &DOptRTSJumps2, 1);
2060 Changes += RunOptFunc (S, &DOptDeadCode, 1);
2063 /* Return the number of changes */
2069 void RunOpt (CodeSeg* S)
2070 /* Run the optimizer */
2072 const char* StatFileName;
2074 /* If we shouldn't run the optimizer, bail out */
2079 /* Check if we are requested to write optimizer statistics */
2080 StatFileName = getenv ("CC65_OPTSTATS");
2082 ReadOptStats (StatFileName);
2085 /* Print the name of the function we are working on */
2087 Print (stdout, 1, "Running optimizer for function `%s'\n", S->Func->Name);
2089 Print (stdout, 1, "Running optimizer for global code segment\n");
2092 /* Run groups of optimizations */
2100 /* Write statistics */
2102 WriteOptStats (StatFileName);