1 | # we win |
---|
2 | |
---|
3 | # need to ask eric how to schedule the 16 and 24 bit quadruples |
---|
4 | |
---|
5 | EXPORT .quadruple_screen # export the code symbol |
---|
6 | |
---|
7 | kEveryOtherLine: EQU 0x8000 |
---|
8 | k16BitMode: EQU 0x4000 |
---|
9 | k32BitMode: EQU 0x2000 |
---|
10 | kVerticalOnlyMode: EQU 0x1000 |
---|
11 | |
---|
12 | source: EQU 0 |
---|
13 | dest: EQU 4 |
---|
14 | width: EQU 8 |
---|
15 | height: EQU 10 |
---|
16 | bytes_per_row: EQU 12 |
---|
17 | src_slop: EQU 14 |
---|
18 | dst_slop: EQU 16 |
---|
19 | flags: EQU 18 |
---|
20 | |
---|
21 | rParamPtr: EQU r3 |
---|
22 | rSrcPtr: EQU r4 |
---|
23 | rDstPtr1: EQU r5 |
---|
24 | rDstPtr2: EQU r6 |
---|
25 | rHeight: EQU r7 |
---|
26 | rWidth: EQU r8 |
---|
27 | rDstSlop: EQU r10 |
---|
28 | rSrcSlop: EQU r11 |
---|
29 | |
---|
30 | |
---|
31 | rTemp1: EQU r9 |
---|
32 | rTemp2: EQU r12 |
---|
33 | rTemp3: EQU r3 ;# HEY YOU, WEÕRE RE-USING THIS! |
---|
34 | |
---|
35 | fpTemp1: EQU fp0 |
---|
36 | fpTemp2: EQU fp1 |
---|
37 | |
---|
38 | .quadruple_screen: |
---|
39 | lwz rSrcPtr, source(rParamPtr) |
---|
40 | subi rSrcPtr, rSrcPtr, 4 ; Adjust for update |
---|
41 | |
---|
42 | lwz rDstPtr1, dest(rParamPtr) |
---|
43 | subi rDstPtr1, rDstPtr1, 8 ; Adjust for update |
---|
44 | lhz rDstPtr2, bytes_per_row(rParamPtr) |
---|
45 | add rDstPtr2, rDstPtr2, rDstPtr1 |
---|
46 | |
---|
47 | lhz rHeight, height(rParamPtr) |
---|
48 | |
---|
49 | lhz rDstSlop, dst_slop(rParamPtr) |
---|
50 | |
---|
51 | lhz rSrcSlop, src_slop(rParamPtr) |
---|
52 | |
---|
53 | lhz rTemp1, flags(rParamPtr) |
---|
54 | |
---|
55 | cmpi 0, 0, rTemp1, k16BitMode |
---|
56 | beq @quad16bit |
---|
57 | cmpi 0, 0, rTemp1, k16BitMode|kVerticalOnlyMode |
---|
58 | beq @duo |
---|
59 | |
---|
60 | cmpi 0, 0, rTemp1, k32BitMode |
---|
61 | beq @quad32bit |
---|
62 | cmpi 0, 0, rTemp1, k16BitMode|kVerticalOnlyMode |
---|
63 | beq @duo |
---|
64 | |
---|
65 | cmpi 0, 0, rTemp1, kVerticalOnlyMode |
---|
66 | beq @duo |
---|
67 | |
---|
68 | @quad8bit: |
---|
69 | lhz rWidth, width(rParamPtr) |
---|
70 | subi rWidth, rWidth, 1 |
---|
71 | |
---|
72 | @quad8bit_start: |
---|
73 | lwzu rTemp1, 4(rSrcPtr) ;load src long |
---|
74 | mr rTemp2, rTemp1 ;save copy in rTemp2 |
---|
75 | mr rTemp3, rTemp1 |
---|
76 | inslwi rTemp2, rTemp1, 16, 8 |
---|
77 | insrwi rTemp3, rTemp1, 16, 8 |
---|
78 | rlwimi rTemp2, rTemp1, 16, 24, 31 |
---|
79 | stw rTemp2, -8(SP) ; store high half into redzone |
---|
80 | rlwimi rTemp3, rTemp1, 16, 0, 7 |
---|
81 | stw rTemp3, -4(SP) ; store low half into redzone |
---|
82 | mtctr rWidth |
---|
83 | lfd fpTemp1, -8(SP) ; load double from redzone |
---|
84 | |
---|
85 | @quad8bit_loop: |
---|
86 | lwzu rTemp1, 4(rSrcPtr) ;load a long into r10 |
---|
87 | stfdu fpTemp1, 8(rDstPtr1) |
---|
88 | mr rTemp2, rTemp1 ;put a copy in r0 |
---|
89 | mr rTemp3, rTemp1 |
---|
90 | inslwi rTemp2, rTemp1, 16, 8 |
---|
91 | insrwi rTemp3, rTemp1, 16, 8 |
---|
92 | rlwimi rTemp2, rTemp1, 16, 24, 31 |
---|
93 | stw rTemp2, -8(SP) ; store high half into redzone |
---|
94 | rlwimi rTemp3, rTemp1, 16, 0, 7 |
---|
95 | stw rTemp3, -4(SP) ; store low half into redzone |
---|
96 | stfdu fpTemp1, 8(rDstPtr2) |
---|
97 | lfd fpTemp1, -8(SP) ; load double from redzone |
---|
98 | bdnz @quad8bit_loop |
---|
99 | |
---|
100 | stfdu fpTemp1, 8(rDstPtr1) |
---|
101 | subic. rHeight, rHeight, 1 ;we've done one scanline |
---|
102 | |
---|
103 | add rSrcPtr, rSrcPtr, rSrcSlop ;add in "rowBytes" |
---|
104 | add rDstPtr1, rDstPtr1, rDstSlop ;add in "rowBytes" |
---|
105 | |
---|
106 | stfdu fpTemp1, 8(rDstPtr2) |
---|
107 | add rDstPtr2, rDstPtr2, rDstSlop ;add in "rowBytes" |
---|
108 | bne @quad8bit_start ;loop for all height |
---|
109 | |
---|
110 | blr ; outta here |
---|
111 | |
---|
112 | @duo |
---|
113 | subi rSrcPtr, rSrcPtr, 4 ; Adjust for update again |
---|
114 | lhz rWidth, width(rParamPtr) |
---|
115 | srwi rWidth, rWidth, 2 ; we're doing 16 bytes at a time |
---|
116 | |
---|
117 | @duo_start |
---|
118 | mtctr rWidth |
---|
119 | |
---|
120 | @duo_loop |
---|
121 | lfd fpTemp1, 8(rSrcPtr) |
---|
122 | lfdu fpTemp2, 16(rSrcPtr) |
---|
123 | stfd fpTemp1, 8(rDstPtr1) |
---|
124 | stfd fpTemp1, 8(rDstPtr2) |
---|
125 | stfdu fpTemp2, 16(rDstPtr1) |
---|
126 | stfdu fpTemp2, 16(rDstPtr2) |
---|
127 | bdnz @duo_loop |
---|
128 | |
---|
129 | subic. rHeight, rHeight, 1 |
---|
130 | |
---|
131 | add rSrcPtr, rSrcPtr, rSrcSlop ;add in "rowBytes" |
---|
132 | add rDstPtr1, rDstPtr1, rDstSlop ;add in "rowBytes" |
---|
133 | add rDstPtr2, rDstPtr2, rDstSlop ;add in "rowBytes" |
---|
134 | |
---|
135 | bne @duo_start ;loop for all height |
---|
136 | |
---|
137 | blr ; outta here |
---|
138 | |
---|
139 | @quad16bit: |
---|
140 | lhz rWidth, width(rParamPtr) |
---|
141 | subi rWidth, rWidth, 1 |
---|
142 | |
---|
143 | @quad16bit_start: |
---|
144 | lwzu rTemp1, 4(rSrcPtr) ;load src long |
---|
145 | mr rTemp2, rTemp1 ;save copy in rTemp2 |
---|
146 | rlwimi rTemp1, rTemp1, 16, 0, 15 |
---|
147 | stw rTemp1, -4(SP) ; store low half into redzone |
---|
148 | rlwimi rTemp2, rTemp2, 16, 16, 31 |
---|
149 | stw rTemp2, -8(SP) ; store high half into redzone |
---|
150 | mtctr rWidth |
---|
151 | lfd fpTemp1, -8(SP) ; load double from redzone |
---|
152 | |
---|
153 | @quad16bit_loop: |
---|
154 | lwzu rTemp1, 4(rSrcPtr) ;load a long into r10 |
---|
155 | stfdu fpTemp1, 8(rDstPtr1) |
---|
156 | mr rTemp2, rTemp1 ;put a copy in r0 |
---|
157 | rlwimi rTemp1, rTemp1, 16, 0, 15 |
---|
158 | rlwimi rTemp2, rTemp2, 16, 16, 31 |
---|
159 | stw rTemp1, -4(SP) ; store low half into redzone |
---|
160 | stw rTemp2, -8(SP) ; store high half into redzone |
---|
161 | stfdu fpTemp1, 8(rDstPtr2) |
---|
162 | lfd fpTemp1, -8(SP) ; load double from redzone |
---|
163 | bdnz @quad16bit_loop |
---|
164 | |
---|
165 | stfdu fpTemp1, 8(rDstPtr1) |
---|
166 | subic. rHeight, rHeight, 1 ;we've done one scanline |
---|
167 | |
---|
168 | add rSrcPtr, rSrcPtr, rSrcSlop ;add in "rowBytes" |
---|
169 | add rDstPtr1, rDstPtr1, rDstSlop ;add in "rowBytes" |
---|
170 | |
---|
171 | stfdu fpTemp1, 8(rDstPtr2) |
---|
172 | add rDstPtr2, rDstPtr2, rDstSlop ;add in "rowBytes" |
---|
173 | bne @quad16bit_start ;loop for all height |
---|
174 | |
---|
175 | blr |
---|
176 | |
---|
177 | @quad32bit: |
---|
178 | lhz rWidth, width(rParamPtr) |
---|
179 | subi rWidth, rWidth, 1 |
---|
180 | |
---|
181 | @quad32bit_start: |
---|
182 | lwzu rTemp1, 4(rSrcPtr) ;load src long |
---|
183 | stw rTemp1, -4(SP) ; store low half into redzone |
---|
184 | stw rTemp1, -8(SP) ; store high half into redzone |
---|
185 | mtctr rWidth |
---|
186 | lfd fpTemp1, -8(SP) ; load double from redzone |
---|
187 | |
---|
188 | @quad32bit_loop: |
---|
189 | lwzu rTemp1, 4(rSrcPtr) ;load a long into r10 |
---|
190 | stfdu fpTemp1, 8(rDstPtr1) |
---|
191 | stw rTemp1, -4(SP) ; store low half into redzone |
---|
192 | stw rTemp1, -8(SP) ; store high half into redzone |
---|
193 | stfdu fpTemp1, 8(rDstPtr2) |
---|
194 | lfd fpTemp1, -8(SP) ; load double from redzone |
---|
195 | bdnz @quad32bit_loop |
---|
196 | |
---|
197 | stfdu fpTemp1, 8(rDstPtr1) |
---|
198 | subic. rHeight, rHeight, 1 ;we've done one scanline |
---|
199 | |
---|
200 | add rSrcPtr, rSrcPtr, rSrcSlop ;add in "rowBytes" |
---|
201 | add rDstPtr1, rDstPtr1, rDstSlop ;add in "rowBytes" |
---|
202 | |
---|
203 | stfdu fpTemp1, 8(rDstPtr2) |
---|
204 | add rDstPtr2, rDstPtr2, rDstSlop ;add in "rowBytes" |
---|
205 | bne @quad32bit_start ;loop for all height |
---|
206 | |
---|
207 | blr |
---|
208 | |
---|
209 | @exit: |
---|
210 | blr |
---|
211 | |
---|