PRINT OFF INCLUDE 'Traps.a' PRINT ON TITLE 'FHT10.a' STRING ASIS ; This file contains a 68000 assembly language implementation of the ; Fast Hartley Transform algorithm which is covered under United States ; Patent Number 4,646,256. ; ; This code may therefore be freely used and distributed only under the ; following conditions: ; ; 1) This header is included in every copy of the code; and ; ; 2) The code is used for noncommercial research purposes only. ; ; Firms using this code for commercial purposes will be infringing a United ; States patent and should contact the ; ; Office of Technology Licensing ; Stanford University ; 857 Serra Street, 2nd Floor ; Stanford, CA 94305-6225 ; (415) 723 0651 ; ; Questions about the implementation of this routine can be directed to me at: ; ; Until 4/15/90: Arlo Reeves ; Thayer School of Engineering ; Dartmouth College ; Hanover, NH 03755 ; (603) 643 9076 ; BITNET: arlo@mac.dartmouth.edu ; ; Permanently: Box 345 ; Mendocino, CA 95460 ; (707) 937 5686 ; FHT10 PROC EXPORT ; function FHT10(baseAddr: ptr; ; rowWords: integer; ; TwidTbl: ptr): ShiftTotal; (integer) ; ; This routine performs a simple decimation in time Radix 2 Fast Hartley Transform. ; baseAddr is a pointer to an array of words which is rowWords long. These ; integers must fall in the range [-8192..8191] because 2 bits of overflow can ; occur before any block floating point scaling is performed; the output is also ; limited to a 14 bit dynamic range. ; TwidTbl is a pointer to a twiddle factor lookup table with the following ; format: Longword Offset 1st word 2nd word ; k: (2^15)*cos(2šk/4096) (2^15)*sin(2šk/4096) ; where k ranges from 0 to 1023. The twiddle table is therefore 4K in size, so ; this routine can transform sequences and integer power of 2 in length from ; 4 to 4096. ; To process sequences up to 16K in length, a larger twiddle table must be created ; and the CSAdIncr (Cos/Sin address increment) variable must be increased ; from its current value of 2048 (doubled to 4096 for 8K sequence lengths). ; The routine, however, has not been tested for sequences longer than 4K. ; Bitreversal of the data must be done prior to calling this routine, e.g. with ; BitRevQ or BitRevEZ in one dimension, or with BitRevRows in two dimensions. ; The function returns the total number of bits shifted during the calculation. ; The correct scaling can be obtained by multiplying the transform by 2^ShiftTotal ; (though this may result in numbers bigger than 16 bits and does not add to the ; result's information content). ; Because of the Hartley Transform's symmetry, this routine can be used for ; both forward and inverse transformations. ; No checking is done for incorrect input; this is relegated to the calling routine. ; This routine will run on any 680x0 processor. ; ; The register map for this routine is as follows: ; D0 csAdOffset A0 x^ ; D1 numGps | gpNum A1 cs^ ; D2 gpSize | gpIndex A2 scratch ; D3 scratch A3 Ad1 | Ad2 ; D4 scratch A4 Ad3 | Ad4 ; D5 scratch A5 N | stage ; D6 scratch A6 FramePtr ; D7 scratch A7 StackPtr ; ; The other values used are ; A6 Offsets ShiftTot EQU 18 ; total bits shifted baseAddr EQU 14 ; Ptr to data AND N EQU 12 ; length of xform TwidTbl EQU 8 ; ptr to table of twiddle factors OldShift EQU -2 ; numBits to shift on this load this stage NewShift EQU -4 ; numBits to shift on this load next stage Ad2 EQU -6 ; Ad1 EQU -8 ; Ad1Ad2 EQU -8 ; starting stage address of 1st & 2nd data element Ad4 EQU -10 ; Ad3 EQU -12 ; Ad3Ad4 EQU -12 ; starting stage address of 3rd & 4th data element gpAdIncR EQU -14 ; gpAdIncF EQU -16 ; gpAdIncFR EQU -16 ; group Address Increment Forward & Retrograde indexing CSAdInc EQU -18 ; COS/SIN Address Increment UsedRegs REG A2-A5/D2-D7 start LINK A6, #-18 ; Lotsa Locals MOVEM.L UsedRegs, -(SP) ; Save registers MOVE.L baseAddr(A6), A4 ; A4: baseAddr MOVE.W N(A6), D0 ; move N into lo(D0) MOVE.W D0, D1 ; SWAP D0 ; MOVE.W D1, D0 ; MOVEA.L D0, A5 ; A5: N | N MOVE.W #0, ShiftTot(A6) ; ShiftTotal = 0 initially ; First 2 butterflys done in one loop Stage12 MOVE.W A5, D0 ; D0: N ASR.W #2, D0 ; D0: N div 4 SUBQ.W #1, D0 ; D0: N div 4 - 1 MOVEQ.L #0, D7 ; D7: NewShift | maxVal S12Loop MOVE.W D0, D1 ; calculate address offsets of data ASL.W #3, D1 ; #3 -> 2wice N for even addr. MOVE.L D1, D2 ; ADDQ.W #4, D2 ; D1: ?? | Ad1,2 SWAP D1 ; MOVE.W D2, D1 ; D1: Ad1,2 | Ad3,4 MOVE.L 0(A4,D1.W), D5 ; D5: c | d MOVE.L D5,D6 ; SWAP D6 ; D6: d | c SWAP D1 ; MOVE.L 0(A4,D1.W), D3 ; D3: a | b MOVE.L D3,D4 ; SWAP D4 ; D4: b | a ; D4 D3 D2 MOVE.W D3, D2 ; b a a b a b ADD.W D4, D3 ; b a a a+b a b SUB.W D2, D4 ; b a-b a a+b a b ; D6 D5 D2 MOVE.W D5, D2 ; d c c d c d ADD.W D6, D5 ; d c c c+d c d SUB.W D2, D6 ; d c-d c c+d c d MOVE.W D3, D2 ; c a+b ADD.W D5, D2 ; c a+b+c+d SWAP D2 ; a+b+c+d c MOVE.W D4, D2 ; a+b+c+d a-b ADD.W D6, D2 ; a+b+c+d a-b+c-d MOVE.L D2, 0(A4,D1.W) ; copy x[Ad1,2] back to memory SWAP D1 MOVE.W #0, D7 ; maxVal := 0 BTST.L #17, D7 ; already 2 bits OF detected? BNE.S GoOn1 ; No -> check for OF TST.W D2 ; BGE.S pos1 ; NEG.W D2 ; pos1 OR.W D2, D7 ; SWAP D2 ; TST.W D2 ; BGE.S pos2 ; NEG.W D2 ; pos2 OR.W D2, D7 ; set up maxVal GoOn1 MOVE.W D3, D2 ; a+b+c+d a+b SUB.W D5, D2 ; a+b+c+d a+b-c-d SWAP D2 ; a+b-c-d a+b+c+d MOVE.W D4, D2 ; a+b-c-d a-b SUB.W D6, D2 ; a+b-c-d a-b-c+d MOVE.L D2, 0(A4,D1.W) ; copy x[Ad3,4] back to memory BTST.L #17, D7 ; Already 2 bits of OF? BNE.S GoOn3 ; No -> check for OF TST.W D2 ; BGE.S pos3 ; NEG.W D2 ; pos3 OR.W D2, D7 ; SWAP D2 ; TST.W D2 ; BGE.S pos4 ; NEG.W D2 ; pos4 OR.W D2, D7 ; set up maxVal CMP.W #$1FFF, D7 ; now compare with mag limit BLE.S GoOn2 ; less than -> no OF BSET.L #16, D7 ; otherwise, shift next stage by 1 bit when loading GoOn2 CMP.W #$3FFF, D7 ; BLE.S GoOn3 ; BSET.L #17, D7 ; 2 bits of OF -> set Bit 17 GoOn3 DBRA.W D0, S12Loop ; end loop of first two stages SWAP D7 ; BTST.L #1, D7 ; 2 bits of OF? BEQ.S LEOneBitOF ; MOVE.W #2, D7 ; BRA.S Cont1 ; LEOneBitOF BTST.L #0, D7 ; BEQ.S NoOF ; MOVE.W #1, D7 ; BRA.S Cont1 ; NoOF MOVE.W #0, D7 ; Cont1 ADD.W D7, ShiftTot(A6) ; MOVE.W D7, OldShift(A6) ; for next stage MOVE.W A5, D7 ; D7 : N CMPI.W #8,D7 ; If transform length only 4 then end BLT exit ; MOVE.L TwidTbl(A6), A1 ; A1: cs^ MOVE.L A4, A0 ; A0: x^ ; set up the loop variables stage, gpNum and bfNum as well as gpSize numGps and numBfs MOVE.L A5, D0 ; MOVE.W #15, D1 ; logLoop BTST.L D1, D0 ; DBNE.W D1, logLoop ; MOVE.W D1, D0 ; MOVE.W D0, D3 ; copy SUBQ.W #3, D0 ; MOVE.L D0, A5 ; A5: N | stage ;; stage := Nlog2 - 3 MOVE.L A5, D1 ; SWAP D1 ASR.W #3, D1 ; numGps := maxN div 8; SWAP D1 ; D1: numGps | gpNum MOVE.L #$00020000, D2 ; D2: gpSize | gpIndex MOVE.L #$00000008, Ad1Ad2(A6) ; Set up Ad1 and Ad2 MOVE.L #$0004000C, Ad3Ad4(A6) ; Set up Ad3 and Ad4 MOVE.L #$000C0010,gpAdIncFR(A6); Set up Group Address Increments forward & retro MOVE.W #2048, CSAdInc(A6) ; Set up Cos/Sin Address Increment ; this address increment tailored to a TWID resource ; allowing 4K transform length - change accordingly Stage MOVE.W #0, NewShift(A6) ; no OF seen yet MOVE.L D1, D3 ; SWAP D3 ; SUBQ.W #1, D3 ; MOVE.W D3, D1 ; gpNum := numGps - 1 downto 0 MOVE.L Ad1Ad2(A6), A3 ; A3: Ad1 | Ad2 MOVE.L Ad3Ad4(A6), A4 ; A4: Ad3 | Ad4 ; fetch addresses Ad1 - Ad4 & calculate 1st butterfly Group MOVEQ.L #0, D7 ; MOVE.W OldShift(A6), D7 ; D7: maxVal | OldShift MOVE.L A3, D3 ; D3: Ad1 | Ad2 MOVE.W 0(A0, D3.W), D4 ; ASR.W D7, D4 ; shift if OF dictates SWAP D4 ; SWAP D3 ; MOVE.W 0(A0, D3.W), D4 ; D4: x[Ad2] | x[Ad1] ASR.W D7, D4 ; shift if OF dictates MOVE.L D4, D5 ; SWAP D5 ; MOVE.L D5, D6 ; ADD.W D4, D5 ; SUB.W D6, D4 ; MOVE.W D5, 0(A0, D3.W) ; x[Ad1] := x1 + x2 SWAP D3 ; MOVE.W D4, 0(A0, D3.W) ; x[Ad2] := x1 - x2 TST.W NewShift(A6) ; Over Flow already? BNE.S GoOn4 ; yes -> no need to check SWAP D7 ; now accumulate maxVal TST.W D4 ; BGE.S pos5 ; NEG.W D4 ; pos5 OR.W D4, D7 ; TST.W D5 ; BGE.S pos6 ; NEG.W D5 ; pos6 OR.W D5, D7 ; SWAP D7 ; GoOn4 MOVE.L A4, D3 ; MOVE.W 0(A0, D3.W), D4 ; ASR.W D7, D4 ; shift if OF dictates SWAP D4 ; SWAP D3 ; MOVE.W 0(A0, D3.W), D4 ; D4: x[Ad4] | x[Ad3] ASR.W D7, D4 ; shift if OF dictates MOVE.L D4, D5 ; SWAP D5 ; MOVE.L D5, D6 ; ADD.W D4, D5 ; SUB.W D6, D4 ; MOVE.W D5, 0(A0, D3.W) ; x[Ad3] := x3 + x4 SWAP D3 ; MOVE.W D4, 0(A0, D3.W) ; x[Ad4] := x3 - x4 TST.W NewShift(A6) ; Over Flow already? BNE.S GoOn5 ; yes -> no need to check SWAP D7 ; now accumulate maxVal TST.W D4 ; BGE.S pos7 ; NEG.W D4 ; pos7 OR.W D4, D7 ; TST.W D5 ; BGE.S pos8 ; NEG.W D5 ; pos8 OR.W D5, D7 ; CMP.W #$1FFF, D7 ; maxVal > #$1FFF => OverFlow BLT.S GoOn5 ; no OF -> go on MOVE #1, NewShift(A6) ; SWAP D7 ; D7: maxVal | oldShift GoOn5 MOVE.L D2, D3 ; SWAP D3 ; SUBQ.W #2, D3 ; MOVE.W D3, D2 ; gpIndex := gpSize - 2 downto 0 MOVE.L A3, D3 ; update Ad1 & Ad2 ADDQ.W #2, D3 ; Ad2 := Ad2 + 2 SWAP D3 ; ADDQ.W #2, D3 ; Ad1 := Ad1 + 2 SWAP D3 ; MOVEA.L D3, A3 ; MOVE.L A4, D3 ; update Ad3 & Ad4 MOVE.L D2, D4 ; SWAP D4 ; D4: gpIndex| gpSize ASL.W #1,D4 ; SWAP D3 ; ADD.W D4, D3 ; SUBQ.W #2, D3 ; SWAP D3 ; Ad4 := Ad4 + gpSize - 2 ADD.W D4, D3 ; SUBQ.W #2, D3 ; Ad3 := Ad3 + gpSize - 2 MOVEA.L D3, A4 ; MOVE.W CSAdInc(A6), D0 ; D0: CSAdOffset ; fetch addresses Ad1 - Ad4 & put CS[CSAd] into D7 bfLoop MOVE.W OldShift(A6), D7 ; MOVE.L A3, D3 ; D3: Ad1 | Ad2 MOVE.W 0(A0,D3.W), D5 ; D5: * | x[Ad2] ASR.W D7, D5 ; shift if OF dictates SWAP D3 ; MOVE.W 0(A0,D3.W), D6 ; D6: * | x[Ad1] ASR.W D7, D6 ; shift if OF dictates SWAP D6 ; D6: x[Ad1] | * MOVE.L A4, D3 ; D3: Ad3 | Ad4 MOVE.W 0(A0,D3.W), D4 ; D4: * | x[Ad4] ASR.W D7, D4 ; shift if OF dictates SWAP D3 ; MOVE.W 0(A0,D3.W), D6 ; D6: x[Ad1] | x[Ad3] ASR.W D7, D6 ; shift if OF dictates MOVEA.L D6, A2 ; save MOVE.L 0(A1,D0.W), D7 ; D7: cos | sin MOVE.L D7, D6 ; performing long subtraction avoids need CLR.W D6 ; to check for case of sin/cos = $8000 (N>=2048) SWAP D6 ; D6: 0 | cos MOVEQ.L #0, D3 ; MOVE.W D7, D3 ; D3: 0 | sin SUB.L D3, D6 ; D6: * | cos - sin (difference always < $8000) MULS.W D5, D6 ; D6: X2(c - s) MOVE.W D4, D3 ; D3: X4 ADD.W D5, D3 ; D3: X4 + X2 SUB.W D5, D4 ; D4: X4 - X2 CMPI.W #$8000, D7 ; is sin factor = 2^15? BEQ.S noMult1 ; long sequences (look at beg/end of TWID table) MULS.W D7, D3 ; D3: s(X4 + X2) BRA.S GoOn6 ; noMult1 SWAP D3 ; CLR.W D3 ; Equivalent to multiplying by 2^16 ASR.L #1, D3 ; D3: s(X4 + X2) GoOn6 SWAP D7 ; CMPI.W #$8000, D7 ; BEQ.S noMult2 ; MULS.W D7, D4 ; D4: c(X4 - X2) BRA.S GoOn7 ; noMult2 SWAP D4 ; CLR.W D4 ; Equivalent to multiplying by 2^16 ASR.L #1, D4 ; D4: c(X4 - X2) GoOn7 ADD.L D6, D3 ; D3: cX2 + sX4 ADD.L D6, D4 ; D4: cX4 - sX2 MOVE.W #15, D7 ; ASR.L D7, D3 ; Because mult'd by cos*2^15 BCC.S RoundDn1 ; ADDQ.W #1, D3 ; RoundDn1 ASR.L D7, D4 ; Because mult'd by cos*2^15 BCC.S RoundDn2 ; ADDQ.W #1, D4 ; RoundDn2 MOVE.L A2, D7 ; D7: x[Ad1] | x[Ad3] MOVE.W D7, D5 ; SUB.W D4, D5 ; D5: * | x3 ADD.W D4, D7 ; D7: * | x4 MOVE.L D7, D6 ; SWAP D6 ; MOVE.W D6, D4 ; SUB.W D3, D4 ; D4: * | x2 ADD.W D3, D6 ; D6: * | x1 MOVE.L A3, D3 MOVE.W D4, 0(A0,D3.W) ; store x2 ADDQ.W #2, D3 ; update Ad2 SWAP D3 MOVE.W D6, 0(A0,D3.W) ; store x1 ADDQ.W #2, D3 ; update Ad1 SWAP D3 ; MOVEA.L D3, A3 ; store updated Ad1 | Ad2 MOVE.L A4, D3 MOVE.W D7, 0(A0,D3.W) ; store x4 SUBQ.W #2, D3 ; update Ad4 SWAP D3 ; MOVE.W D5, 0(A0,D3.W) ; store x3 SUBQ.W #2, D3 ; update Ad3 SWAP D3 ; MOVEA.L D3, A4 ; store updated Ad3 | Ad4 TST.W NewShift(A6) ; BNE.S GoOn8 ; MOVE.W #0, D3 ; D4: maxVal TST.W D4 ; BGE.S pos9 ; NEG.W D4 ; pos9 OR.W D4, D3 ; TST.W D5 ; BGE.S pos10 ; NEG.W D5 ; pos10 OR.W D5, D3 ; TST.W D6 ; BGE.S pos11 ; NEG.W D6 ; pos11 OR.W D6, D3 ; TST.W D7 ; BGE.S pos12 ; NEG.W D7 ; pos12 OR.W D7, D3 ; CMP.W #$1FFF, D3 ; BLT.S GoOn8 ; MOVE.W #1, NewShift(A6) ; for next stage GoOn8 ADD.W CSAdInc(A6), D0 ; Update Cos/Sin Address DBRA D2, bfLoop ; gpIndex := gpIndex - 1, Loop if >= 0 MOVE.L gpAdIncFR(A6), D4 ; D4: gpAdIncF | gpAdIncR MOVE.L A4, D3 ; D3: Ad3 | Ad4 ADD.W D4, D3 ; gpAddress base := gpAddress base + adIncrement SWAP D3 ; ADD.W D4, D3 ; SWAP D3 ; MOVEA.L D3, A4 ; update Ad3 & Ad4 SWAP D4 MOVE.L A3, D3 ; ADD.W D4, D3 ; gpAddress base := gpAddress base + adIncrement SWAP D3 ; ADD.W D4, D3 ; SWAP D3 ; MOVEA.L D3, A3 ; update Ad1 & Ad2 DBRA D1, Group MOVE.L D1, D3 ; SWAP D3 ; ASR.W #1, D3 ; numGps := numGps div 2 SWAP D3 ; MOVE.L D3, D1 ; MOVE.L D2, D3 ; SWAP D3 ; ASL.W #1, D3 ; gpSize := gpSize * 2 SWAP D3 ; MOVE.L D3, D2 ; ASL.W Ad1(A6) ; update group Address bases ASL.W Ad2(A6) ; ASL.W Ad3(A6) ; ASL.W Ad4(A6) ; ASL.W gpAdIncF(A6) ; update group address increments ASL.W gpAdIncR(A6) ; ASR.W CSAdInc(A6) ; update Cos/Sin Address Increment MOVE.L A5, D3 ; MOVE.L D3, D4 ; SUBQ.W #1, D3 ; MOVEA.L D3, A5 ; stage := stage - 1 MOVE.W NewShift(A6), D7 ; ADD.W D7, ShiftTot(A6) ; Update ShiftTot MOVE.W NewShift(A6), OldShift(A6) ; Update OldShift DBRA.W D4, Stage ; TST.W OldShift(A6) ; was there to be a shift @ end? BEQ.S exit ; no -> exit MOVE.L A5, D0 ; SWAP D0 ; SUBQ.W #1, D0 ; D0: counter MOVE.W OldShift(A6), D1 ; D1: NumBits to shift CMPI.W #1, D1 ; if only one bit to shift, can do fast! BEQ.S OneBitLoop ; TwoBitLoop MOVE.W (A0), D2 ; Two bits to shift -> slower ASR.W D1, D2 ; MOVE.W D2, (A0)+ ; DBRA.W D0, TwoBitLoop ; BRA.S exit ; then exit OneBitLoop ASR.W (A0)+ ; only way to use this instr directly on mem DBRA.W D0, OneBitLoop ; exit MOVEM.L (SP)+,UsedRegs ; restore reg's UNLK A6 ; restore old A6 MOVEA.L (SP)+, A0 ; pop return address ADD.L #10, SP ; discard parameters JMP (A0) ; return to caller DC.B 'FHT10 ' ; MacsBug Name ENDPROC END