1 /*****************************************************************************/
9 /* (C) 2001-2012, Ullrich von Bassewitz */
10 /* Roemerstrasse 52 */
11 /* D-70794 Filderstadt */
12 /* EMail: uz@cc65.org */
15 /* This software is provided 'as-is', without any expressed or implied */
16 /* warranty. In no event will the authors be held liable for any damages */
17 /* arising from the use of this software. */
19 /* Permission is granted to anyone to use this software for any purpose, */
20 /* including commercial applications, and to alter it and redistribute it */
21 /* freely, subject to the following restrictions: */
23 /* 1. The origin of this software must not be misrepresented; you must not */
24 /* claim that you wrote the original software. If you use this software */
25 /* in a product, an acknowledgment in the product documentation would be */
26 /* appreciated but is not required. */
27 /* 2. Altered source versions must be plainly marked as such, and must not */
28 /* be misrepresented as being the original software. */
29 /* 3. This notice may not be removed or altered from any source */
32 /*****************************************************************************/
42 #include "coptshift.h"
46 /*****************************************************************************/
48 /*****************************************************************************/
52 /* Shift types. Shift type is in the first byte, shift count in the second */
57 SHIFT_MASK_COUNT = 0x00FF,
58 SHIFT_MASK_DIR = 0x0F00,
59 SHIFT_MASK_MODE = 0xF000, /* Arithmetic or logical */
60 SHIFT_MASK_TYPE = SHIFT_MASK_DIR | SHIFT_MASK_MODE,
63 SHIFT_COUNT_Y = 0x0000, /* Count is in Y register */
64 SHIFT_COUNT_1 = 0x0001,
65 SHIFT_COUNT_2 = 0x0002,
66 SHIFT_COUNT_3 = 0x0003,
67 SHIFT_COUNT_4 = 0x0004,
68 SHIFT_COUNT_5 = 0x0005,
69 SHIFT_COUNT_6 = 0x0006,
70 SHIFT_COUNT_7 = 0x0007,
72 /* Shift directions */
73 SHIFT_DIR_LEFT = 0x0100,
74 SHIFT_DIR_RIGHT = 0x0200,
77 SHIFT_MODE_ARITH = 0x1000,
78 SHIFT_MODE_LOGICAL = 0x2000,
81 SHIFT_TYPE_ASL = SHIFT_DIR_LEFT | SHIFT_MODE_ARITH,
82 SHIFT_TYPE_ASR = SHIFT_DIR_RIGHT | SHIFT_MODE_ARITH,
83 SHIFT_TYPE_LSL = SHIFT_DIR_LEFT | SHIFT_MODE_LOGICAL,
84 SHIFT_TYPE_LSR = SHIFT_DIR_RIGHT | SHIFT_MODE_LOGICAL,
87 SHIFT_ASL_Y = SHIFT_TYPE_ASL | SHIFT_COUNT_Y,
88 SHIFT_ASR_Y = SHIFT_TYPE_ASR | SHIFT_COUNT_Y,
89 SHIFT_LSL_Y = SHIFT_TYPE_LSL | SHIFT_COUNT_Y,
90 SHIFT_LSR_Y = SHIFT_TYPE_LSR | SHIFT_COUNT_Y,
92 SHIFT_ASL_1 = SHIFT_TYPE_ASL | SHIFT_COUNT_1,
93 SHIFT_ASR_1 = SHIFT_TYPE_ASR | SHIFT_COUNT_1,
94 SHIFT_LSL_1 = SHIFT_TYPE_LSL | SHIFT_COUNT_1,
95 SHIFT_LSR_1 = SHIFT_TYPE_LSR | SHIFT_COUNT_1,
97 SHIFT_ASL_2 = SHIFT_TYPE_ASL | SHIFT_COUNT_2,
98 SHIFT_ASR_2 = SHIFT_TYPE_ASR | SHIFT_COUNT_2,
99 SHIFT_LSL_2 = SHIFT_TYPE_LSL | SHIFT_COUNT_2,
100 SHIFT_LSR_2 = SHIFT_TYPE_LSR | SHIFT_COUNT_2,
102 SHIFT_ASL_3 = SHIFT_TYPE_ASL | SHIFT_COUNT_3,
103 SHIFT_ASR_3 = SHIFT_TYPE_ASR | SHIFT_COUNT_3,
104 SHIFT_LSL_3 = SHIFT_TYPE_LSL | SHIFT_COUNT_3,
105 SHIFT_LSR_3 = SHIFT_TYPE_LSR | SHIFT_COUNT_3,
107 SHIFT_ASL_4 = SHIFT_TYPE_ASL | SHIFT_COUNT_4,
108 SHIFT_ASR_4 = SHIFT_TYPE_ASR | SHIFT_COUNT_4,
109 SHIFT_LSL_4 = SHIFT_TYPE_LSL | SHIFT_COUNT_4,
110 SHIFT_LSR_4 = SHIFT_TYPE_LSR | SHIFT_COUNT_4,
112 SHIFT_ASL_5 = SHIFT_TYPE_ASL | SHIFT_COUNT_5,
113 SHIFT_ASR_5 = SHIFT_TYPE_ASR | SHIFT_COUNT_5,
114 SHIFT_LSL_5 = SHIFT_TYPE_LSL | SHIFT_COUNT_5,
115 SHIFT_LSR_5 = SHIFT_TYPE_LSR | SHIFT_COUNT_5,
117 SHIFT_ASL_6 = SHIFT_TYPE_ASL | SHIFT_COUNT_6,
118 SHIFT_ASR_6 = SHIFT_TYPE_ASR | SHIFT_COUNT_6,
119 SHIFT_LSL_6 = SHIFT_TYPE_LSL | SHIFT_COUNT_6,
120 SHIFT_LSR_6 = SHIFT_TYPE_LSR | SHIFT_COUNT_6,
122 SHIFT_ASL_7 = SHIFT_TYPE_ASL | SHIFT_COUNT_7,
123 SHIFT_ASR_7 = SHIFT_TYPE_ASR | SHIFT_COUNT_7,
124 SHIFT_LSL_7 = SHIFT_TYPE_LSL | SHIFT_COUNT_7,
125 SHIFT_LSR_7 = SHIFT_TYPE_LSR | SHIFT_COUNT_7,
130 /* Macros to extract values from a shift type */
131 #define SHIFT_COUNT(S) ((S) & SHIFT_MASK_COUNT)
132 #define SHIFT_DIR(S) ((S) & SHIFT_MASK_DIR)
133 #define SHIFT_MODE(S) ((S) & SHIFT_MASK_MODE)
134 #define SHIFT_TYPE(S) ((S) & SHIFT_MASK_TYPE)
138 /*****************************************************************************/
139 /* Helper routines */
140 /*****************************************************************************/
144 static unsigned GetShift (const char* Name)
145 /* Determine the shift from the name of the subroutine */
149 if (strncmp (Name, "aslax", 5) == 0) {
150 Type = SHIFT_TYPE_ASL;
151 } else if (strncmp (Name, "asrax", 5) == 0) {
152 Type = SHIFT_TYPE_ASR;
153 } else if (strncmp (Name, "shlax", 5) == 0) {
154 Type = SHIFT_TYPE_LSL;
155 } else if (strncmp (Name, "shrax", 5) == 0) {
156 Type = SHIFT_TYPE_LSR;
158 /* Nothing we know */
164 case 'y': Type |= SHIFT_COUNT_Y; break;
165 case '1': Type |= SHIFT_COUNT_1; break;
166 case '2': Type |= SHIFT_COUNT_2; break;
167 case '3': Type |= SHIFT_COUNT_3; break;
168 case '4': Type |= SHIFT_COUNT_4; break;
169 case '5': Type |= SHIFT_COUNT_5; break;
170 case '6': Type |= SHIFT_COUNT_6; break;
171 case '7': Type |= SHIFT_COUNT_7; break;
172 default: return SHIFT_NONE;
175 /* Make sure nothing follows */
176 if (Name[6] == '\0') {
185 /*****************************************************************************/
186 /* Optimize shifts */
187 /*****************************************************************************/
191 unsigned OptShift1 (CodeSeg* S)
192 /* A call to the shlaxN routine may get replaced by one or more asl insns
193 ** if the value of X is not used later. If X is used later, but it is zero
194 ** on entry and it's a shift by one, it may get replaced by:
202 unsigned Changes = 0;
205 /* Walk over the entries */
207 while (I < CS_GetEntryCount (S)) {
215 CodeEntry* E = CS_GetEntry (S, I);
217 /* Check for the sequence */
218 if (E->OPC == OP65_JSR &&
219 (Shift = GetShift (E->Arg)) != SHIFT_NONE &&
220 SHIFT_DIR (Shift) == SHIFT_DIR_LEFT) {
223 unsigned Count = SHIFT_COUNT (Shift);
224 if (!RegXUsed (S, I+1)) {
226 if (Count == SHIFT_COUNT_Y) {
230 if (S->CodeSizeFactor < 200) {
243 X = NewCodeEntry (OP65_ASL, AM65_ACC, "a", 0, E->LI);
244 CS_InsertEntry (S, X, I+1);
245 L = CS_GenLabel (S, X);
248 X = NewCodeEntry (OP65_DEY, AM65_IMP, 0, 0, E->LI);
249 CS_InsertEntry (S, X, I+2);
252 X = NewCodeEntry (OP65_BPL, AM65_BRA, L->Name, L, E->LI);
253 CS_InsertEntry (S, X, I+3);
256 X = NewCodeEntry (OP65_ROR, AM65_ACC, "a", 0, E->LI);
257 CS_InsertEntry (S, X, I+4);
260 /* Insert shift insns */
262 X = NewCodeEntry (OP65_ASL, AM65_ACC, "a", 0, E->LI);
263 CS_InsertEntry (S, X, I+1);
267 } else if (E->RI->In.RegX == 0 &&
269 (N = CS_GetNextEntry (S, I)) != 0) {
272 X = NewCodeEntry (OP65_ASL, AM65_ACC, "a", 0, E->LI);
273 CS_InsertEntry (S, X, I+1);
276 L = CS_GenLabel (S, N);
277 X = NewCodeEntry (OP65_BCC, AM65_BRA, L->Name, L, E->LI);
278 CS_InsertEntry (S, X, I+2);
281 X = NewCodeEntry (OP65_INX, AM65_IMP, 0, 0, E->LI);
282 CS_InsertEntry (S, X, I+3);
286 /* We won't handle this one */
291 /* Delete the call to shlax */
294 /* Remember, we had changes */
304 /* Return the number of changes made */
310 unsigned OptShift2 (CodeSeg* S)
317 ** might be replaced by N copies of
322 ** if X is not used later (X is assumed to be zero on entry).
323 ** If the sequence is followed immediately by another
327 ** then their shifts are combined.
330 unsigned Changes = 0;
333 /* Walk over the entries */
334 while (I < CS_GetEntryCount (S)) {
336 unsigned Count, Count2;
341 L[0] = CS_GetEntry (S, I);
343 /* Check for the sequence */
344 if (L[0]->OPC == OP65_BPL &&
346 CS_GetEntries (S, L+1, I+1, 3) &&
347 L[1]->OPC == OP65_DEX &&
348 L[0]->JumpTo->Owner == L[2] &&
349 !CS_RangeHasLabel (S, I, 2) &&
350 L[2]->OPC == OP65_JSR &&
351 SHIFT_TYPE (Shift = GetShift (L[2]->Arg)) == SHIFT_TYPE_ASR &&
352 (Count = SHIFT_COUNT (Shift)) > 0) {
354 if (L[3]->OPC == OP65_JSR &&
355 SHIFT_TYPE (Shift = GetShift (L[3]->Arg)) == SHIFT_TYPE_ASR &&
356 (Count2 = SHIFT_COUNT (Shift)) > 0) {
358 /* Found a second jsr asraxN */
364 if (Count * 100 <= S->CodeSizeFactor &&
365 !RegXUsed (S, I+K)) {
370 /* Generate the replacement sequence */
373 X = NewCodeEntry (OP65_CMP, AM65_IMM, "$80", 0, L[2]->LI);
374 CS_InsertEntry (S, X, J++);
377 X = NewCodeEntry (OP65_ROR, AM65_ACC, "a", 0, L[2]->LI);
378 CS_InsertEntry (S, X, J++);
381 /* Remove the bpl/dex/jsr */
382 CS_DelEntries (S, I, K);
384 /* Remember, we had changes */
393 /* Return the number of changes made */
399 unsigned OptShift3 (CodeSeg* S)
406 ** may get replaced by
410 ** if X is zero on entry. For shift counts > 1, more
417 unsigned Changes = 0;
420 /* Walk over the entries */
422 while (I < CS_GetEntryCount (S)) {
429 L[0] = CS_GetEntry (S, I);
431 /* Check for the sequence */
432 if ((L[0]->OPC == OP65_BCC || L[0]->OPC == OP65_JCC) &&
434 L[0]->RI->In.RegX == 0 &&
435 CS_GetEntries (S, L+1, I+1, 2) &&
436 L[1]->OPC == OP65_INX &&
437 L[0]->JumpTo->Owner == L[2] &&
438 !CS_RangeHasLabel (S, I, 2) &&
439 L[2]->OPC == OP65_JSR &&
440 (Shift = GetShift (L[2]->Arg)) != SHIFT_NONE &&
441 SHIFT_DIR (Shift) == SHIFT_DIR_RIGHT &&
442 (Count = SHIFT_COUNT (Shift)) > 0) {
444 /* Add the replacement insn instead */
445 CodeEntry* X = NewCodeEntry (OP65_ROR, AM65_ACC, "a", 0, L[2]->LI);
446 CS_InsertEntry (S, X, I+3);
448 X = NewCodeEntry (OP65_LSR, AM65_ACC, "a", 0, L[2]->LI);
449 CS_InsertEntry (S, X, I+4);
452 /* Remove the bcc/inx/jsr */
453 CS_DelEntries (S, I, 3);
455 /* Remember, we had changes */
465 /* Return the number of changes made */
471 unsigned OptShift4 (CodeSeg* S)
472 /* Calls to the asraxN or shraxN routines may get replaced by one or more lsr
473 ** insns if the value of X is zero.
476 unsigned Changes = 0;
479 /* Walk over the entries */
481 while (I < CS_GetEntryCount (S)) {
487 CodeEntry* E = CS_GetEntry (S, I);
489 /* Check for the sequence */
490 if (E->OPC == OP65_JSR &&
491 (Shift = GetShift (E->Arg)) != SHIFT_NONE &&
492 SHIFT_DIR (Shift) == SHIFT_DIR_RIGHT &&
493 E->RI->In.RegX == 0) {
497 /* Shift count may be in Y */
498 Count = SHIFT_COUNT (Shift);
499 if (Count == SHIFT_COUNT_Y) {
503 if (S->CodeSizeFactor < 200) {
515 ** A negative shift count or one that is greater or equal than
516 ** the bit width of the left operand (which is promoted to
517 ** integer before the operation) causes undefined behaviour, so
518 ** above transformation is safe.
522 X = NewCodeEntry (OP65_LSR, AM65_ACC, "a", 0, E->LI);
523 CS_InsertEntry (S, X, I+1);
524 L = CS_GenLabel (S, X);
527 X = NewCodeEntry (OP65_DEY, AM65_IMP, 0, 0, E->LI);
528 CS_InsertEntry (S, X, I+2);
531 X = NewCodeEntry (OP65_BPL, AM65_BRA, L->Name, L, E->LI);
532 CS_InsertEntry (S, X, I+3);
535 X = NewCodeEntry (OP65_ROL, AM65_ACC, "a", 0, E->LI);
536 CS_InsertEntry (S, X, I+4);
539 /* Insert shift insns */
541 X = NewCodeEntry (OP65_LSR, AM65_ACC, "a", 0, E->LI);
542 CS_InsertEntry (S, X, I+1);
547 /* Delete the call to shrax */
550 /* Remember, we had changes */
561 /* Return the number of changes made */
567 unsigned OptShift5 (CodeSeg* S)
568 /* Search for the sequence
572 ** jsr aslax1/asrax1/shlax1/shrax1
585 ** or similar, provided that a/x is not used later
588 unsigned Changes = 0;
590 /* Walk over the entries */
592 while (I < CS_GetEntryCount (S)) {
598 L[0] = CS_GetEntry (S, I);
600 /* Check for the sequence */
601 if (L[0]->OPC == OP65_LDA &&
602 (L[0]->AM == AM65_ABS || L[0]->AM == AM65_ZP) &&
603 CS_GetEntries (S, L+1, I+1, 4) &&
604 !CS_RangeHasLabel (S, I+1, 4) &&
605 L[1]->OPC == OP65_LDX &&
606 (L[1]->AM == AM65_ABS || L[1]->AM == AM65_ZP) &&
607 L[2]->OPC == OP65_JSR &&
608 (ShiftType = GetShift (L[2]->Arg)) != SHIFT_NONE &&
609 SHIFT_COUNT(ShiftType) == 1 &&
610 L[3]->OPC == OP65_STA &&
611 (L[3]->AM == AM65_ABS || L[3]->AM == AM65_ZP) &&
612 L[4]->OPC == OP65_STX &&
613 (L[4]->AM == AM65_ABS || L[4]->AM == AM65_ZP) &&
614 !RegAXUsed (S, I+5)) {
618 /* Handle the four shift types differently */
622 X = NewCodeEntry (OP65_LDA, L[1]->AM, L[1]->Arg, 0, L[1]->LI);
623 CS_InsertEntry (S, X, I+5);
624 X = NewCodeEntry (OP65_CMP, AM65_IMM, "$80", 0, L[2]->LI);
625 CS_InsertEntry (S, X, I+6);
626 X = NewCodeEntry (OP65_ROR, AM65_ACC, "a", 0, L[2]->LI);
627 CS_InsertEntry (S, X, I+7);
628 X = NewCodeEntry (OP65_STA, L[4]->AM, L[4]->Arg, 0, L[4]->LI);
629 CS_InsertEntry (S, X, I+8);
630 X = NewCodeEntry (OP65_LDA, L[0]->AM, L[0]->Arg, 0, L[0]->LI);
631 CS_InsertEntry (S, X, I+9);
632 X = NewCodeEntry (OP65_ROR, AM65_ACC, "a", 0, L[2]->LI);
633 CS_InsertEntry (S, X, I+10);
634 X = NewCodeEntry (OP65_STA, L[3]->AM, L[3]->Arg, 0, L[3]->LI);
635 CS_InsertEntry (S, X, I+11);
636 CS_DelEntries (S, I, 5);
640 X = NewCodeEntry (OP65_LDA, L[1]->AM, L[1]->Arg, 0, L[1]->LI);
641 CS_InsertEntry (S, X, I+5);
642 X = NewCodeEntry (OP65_LSR, AM65_ACC, "a", 0, L[2]->LI);
643 CS_InsertEntry (S, X, I+6);
644 X = NewCodeEntry (OP65_STA, L[4]->AM, L[4]->Arg, 0, L[4]->LI);
645 CS_InsertEntry (S, X, I+7);
646 X = NewCodeEntry (OP65_LDA, L[0]->AM, L[0]->Arg, 0, L[0]->LI);
647 CS_InsertEntry (S, X, I+8);
648 X = NewCodeEntry (OP65_ROR, AM65_ACC, "a", 0, L[2]->LI);
649 CS_InsertEntry (S, X, I+9);
650 X = NewCodeEntry (OP65_STA, L[3]->AM, L[3]->Arg, 0, L[3]->LI);
651 CS_InsertEntry (S, X, I+10);
652 CS_DelEntries (S, I, 5);
657 /* These two are identical */
658 X = NewCodeEntry (OP65_ASL, AM65_ACC, "a", 0, L[2]->LI);
659 CS_InsertEntry (S, X, I+1);
660 X = NewCodeEntry (OP65_STA, L[3]->AM, L[3]->Arg, 0, L[3]->LI);
661 CS_InsertEntry (S, X, I+2);
662 X = NewCodeEntry (OP65_LDA, L[1]->AM, L[1]->Arg, 0, L[1]->LI);
663 CS_InsertEntry (S, X, I+3);
664 X = NewCodeEntry (OP65_ROL, AM65_ACC, "a", 0, L[2]->LI);
665 CS_InsertEntry (S, X, I+4);
666 X = NewCodeEntry (OP65_STA, L[4]->AM, L[4]->Arg, 0, L[4]->LI);
667 CS_InsertEntry (S, X, I+5);
668 CS_DelEntries (S, I+6, 4);
673 /* Remember, we had changes */
683 /* Return the number of changes made */
689 unsigned OptShift6 (CodeSeg* S)
690 /* Inline the shift subroutines. */
692 unsigned Changes = 0;
694 /* Walk over the entries */
696 while (I < CS_GetEntryCount (S)) {
704 CodeEntry* E = CS_GetEntry (S, I);
706 /* Check for a call to one of the shift routine */
707 if (E->OPC == OP65_JSR &&
708 (Shift = GetShift (E->Arg)) != SHIFT_NONE &&
709 SHIFT_DIR (Shift) == SHIFT_DIR_LEFT &&
710 (Count = SHIFT_COUNT (Shift)) > 0) {
717 ** (repeat ShiftCount-1 times)
720 ** which makes 4 + 3 * ShiftCount bytes, compared to the original
721 ** 3 bytes for the subroutine call. However, in most cases, the
722 ** final load of the X register gets merged with some other insn
723 ** and replaces a txa, so for a shift count of 1, we get a factor
724 ** of 200, which matches nicely the CodeSizeFactor enabled with -Oi
726 if (Count > 1 || S->CodeSizeFactor > 200) {
727 unsigned Size = 4 + 3 * Count;
728 if ((Size * 100 / 3) > S->CodeSizeFactor) {
734 /* Inline the code. Insertion point is behind the subroutine call */
738 X = NewCodeEntry (OP65_STX, AM65_ZP, "tmp1", 0, E->LI);
739 CS_InsertEntry (S, X, IP++);
743 X = NewCodeEntry (OP65_ASL, AM65_ACC, "a", 0, E->LI);
744 CS_InsertEntry (S, X, IP++);
747 X = NewCodeEntry (OP65_ROL, AM65_ZP, "tmp1", 0, E->LI);
748 CS_InsertEntry (S, X, IP++);
752 X = NewCodeEntry (OP65_LDX, AM65_ZP, "tmp1", 0, E->LI);
753 CS_InsertEntry (S, X, IP++);
755 /* Remove the subroutine call */
758 /* Remember, we had changes */
768 /* Return the number of changes made */