# we win # need to ask eric how to schedule the 16 and 24 bit quadruples EXPORT .quadruple_screen # export the code symbol kEveryOtherLine: EQU 0x8000 k16BitMode: EQU 0x4000 k32BitMode: EQU 0x2000 kVerticalOnlyMode: EQU 0x1000 source: EQU 0 dest: EQU 4 width: EQU 8 height: EQU 10 bytes_per_row: EQU 12 src_slop: EQU 14 dst_slop: EQU 16 flags: EQU 18 rParamPtr: EQU r3 rSrcPtr: EQU r4 rDstPtr1: EQU r5 rDstPtr2: EQU r6 rHeight: EQU r7 rWidth: EQU r8 rDstSlop: EQU r10 rSrcSlop: EQU r11 rTemp1: EQU r9 rTemp2: EQU r12 rTemp3: EQU r3 ;# HEY YOU, WEĠRE RE-USING THIS! fpTemp1: EQU fp0 fpTemp2: EQU fp1 .quadruple_screen: lwz rSrcPtr, source(rParamPtr) subi rSrcPtr, rSrcPtr, 4 ; Adjust for update lwz rDstPtr1, dest(rParamPtr) subi rDstPtr1, rDstPtr1, 8 ; Adjust for update lhz rDstPtr2, bytes_per_row(rParamPtr) add rDstPtr2, rDstPtr2, rDstPtr1 lhz rHeight, height(rParamPtr) lhz rDstSlop, dst_slop(rParamPtr) lhz rSrcSlop, src_slop(rParamPtr) lhz rTemp1, flags(rParamPtr) cmpi 0, 0, rTemp1, k16BitMode beq @quad16bit cmpi 0, 0, rTemp1, k16BitMode|kVerticalOnlyMode beq @duo cmpi 0, 0, rTemp1, k32BitMode beq @quad32bit cmpi 0, 0, rTemp1, k16BitMode|kVerticalOnlyMode beq @duo cmpi 0, 0, rTemp1, kVerticalOnlyMode beq @duo @quad8bit: lhz rWidth, width(rParamPtr) subi rWidth, rWidth, 1 @quad8bit_start: lwzu rTemp1, 4(rSrcPtr) ;load src long mr rTemp2, rTemp1 ;save copy in rTemp2 mr rTemp3, rTemp1 inslwi rTemp2, rTemp1, 16, 8 insrwi rTemp3, rTemp1, 16, 8 rlwimi rTemp2, rTemp1, 16, 24, 31 stw rTemp2, -8(SP) ; store high half into redzone rlwimi rTemp3, rTemp1, 16, 0, 7 stw rTemp3, -4(SP) ; store low half into redzone mtctr rWidth lfd fpTemp1, -8(SP) ; load double from redzone @quad8bit_loop: lwzu rTemp1, 4(rSrcPtr) ;load a long into r10 stfdu fpTemp1, 8(rDstPtr1) mr rTemp2, rTemp1 ;put a copy in r0 mr rTemp3, rTemp1 inslwi rTemp2, rTemp1, 16, 8 insrwi rTemp3, rTemp1, 16, 8 rlwimi rTemp2, rTemp1, 16, 24, 31 stw rTemp2, -8(SP) ; store high half into redzone rlwimi rTemp3, rTemp1, 16, 0, 7 stw rTemp3, -4(SP) ; store low half into redzone stfdu fpTemp1, 8(rDstPtr2) lfd fpTemp1, -8(SP) ; load double from redzone bdnz @quad8bit_loop stfdu fpTemp1, 8(rDstPtr1) subic. rHeight, rHeight, 1 ;we've done one scanline add rSrcPtr, rSrcPtr, rSrcSlop ;add in "rowBytes" add rDstPtr1, rDstPtr1, rDstSlop ;add in "rowBytes" stfdu fpTemp1, 8(rDstPtr2) add rDstPtr2, rDstPtr2, rDstSlop ;add in "rowBytes" bne @quad8bit_start ;loop for all height blr ; outta here @duo subi rSrcPtr, rSrcPtr, 4 ; Adjust for update again lhz rWidth, width(rParamPtr) srwi rWidth, rWidth, 2 ; we're doing 16 bytes at a time @duo_start mtctr rWidth @duo_loop lfd fpTemp1, 8(rSrcPtr) lfdu fpTemp2, 16(rSrcPtr) stfd fpTemp1, 8(rDstPtr1) stfd fpTemp1, 8(rDstPtr2) stfdu fpTemp2, 16(rDstPtr1) stfdu fpTemp2, 16(rDstPtr2) bdnz @duo_loop subic. rHeight, rHeight, 1 add rSrcPtr, rSrcPtr, rSrcSlop ;add in "rowBytes" add rDstPtr1, rDstPtr1, rDstSlop ;add in "rowBytes" add rDstPtr2, rDstPtr2, rDstSlop ;add in "rowBytes" bne @duo_start ;loop for all height blr ; outta here @quad16bit: lhz rWidth, width(rParamPtr) subi rWidth, rWidth, 1 @quad16bit_start: lwzu rTemp1, 4(rSrcPtr) ;load src long mr rTemp2, rTemp1 ;save copy in rTemp2 rlwimi rTemp1, rTemp1, 16, 0, 15 stw rTemp1, -4(SP) ; store low half into redzone rlwimi rTemp2, rTemp2, 16, 16, 31 stw rTemp2, -8(SP) ; store high half into redzone mtctr rWidth lfd fpTemp1, -8(SP) ; load double from redzone @quad16bit_loop: lwzu rTemp1, 4(rSrcPtr) ;load a long into r10 stfdu fpTemp1, 8(rDstPtr1) mr rTemp2, rTemp1 ;put a copy in r0 rlwimi rTemp1, rTemp1, 16, 0, 15 rlwimi rTemp2, rTemp2, 16, 16, 31 stw rTemp1, -4(SP) ; store low half into redzone stw rTemp2, -8(SP) ; store high half into redzone stfdu fpTemp1, 8(rDstPtr2) lfd fpTemp1, -8(SP) ; load double from redzone bdnz @quad16bit_loop stfdu fpTemp1, 8(rDstPtr1) subic. rHeight, rHeight, 1 ;we've done one scanline add rSrcPtr, rSrcPtr, rSrcSlop ;add in "rowBytes" add rDstPtr1, rDstPtr1, rDstSlop ;add in "rowBytes" stfdu fpTemp1, 8(rDstPtr2) add rDstPtr2, rDstPtr2, rDstSlop ;add in "rowBytes" bne @quad16bit_start ;loop for all height blr @quad32bit: lhz rWidth, width(rParamPtr) subi rWidth, rWidth, 1 @quad32bit_start: lwzu rTemp1, 4(rSrcPtr) ;load src long stw rTemp1, -4(SP) ; store low half into redzone stw rTemp1, -8(SP) ; store high half into redzone mtctr rWidth lfd fpTemp1, -8(SP) ; load double from redzone @quad32bit_loop: lwzu rTemp1, 4(rSrcPtr) ;load a long into r10 stfdu fpTemp1, 8(rDstPtr1) stw rTemp1, -4(SP) ; store low half into redzone stw rTemp1, -8(SP) ; store high half into redzone stfdu fpTemp1, 8(rDstPtr2) lfd fpTemp1, -8(SP) ; load double from redzone bdnz @quad32bit_loop stfdu fpTemp1, 8(rDstPtr1) subic. rHeight, rHeight, 1 ;we've done one scanline add rSrcPtr, rSrcPtr, rSrcSlop ;add in "rowBytes" add rDstPtr1, rDstPtr1, rDstSlop ;add in "rowBytes" stfdu fpTemp1, 8(rDstPtr2) add rDstPtr2, rDstPtr2, rDstSlop ;add in "rowBytes" bne @quad32bit_start ;loop for all height blr @exit: blr