diff -urP linux/arch/arm/fastfpe/CPDO.S linuxa300/arch/arm/fastfpe/CPDO.S --- linux/arch/arm/fastfpe/CPDO.S 2002-05-15 21:37:41.000000000 +0900 +++ linuxa300/arch/arm/fastfpe/CPDO.S 2004-08-21 09:48:06.000000000 +0900 @@ -1,224 +1,442 @@ /* -Inside the emulator the FP numbers are kept with 32 bit accuracy for both -mantissa and exponent. The FP structure has 4 words reserved for each -register, the first is used just for the sign in bit 31, the second is the -mantissa (unsigned integer) and the third is the exponent (signed integer). - -The functions do actually only work properly for normalized values, and if -no overflow occurs. Hopfully most programs are not disturbed by this, and it -will probably be improved in future versions. +The FP structure has 4 words reserved for each register, the first is used just +for the sign in bit 31, the second and third are for the mantissa (unsigned +integer, high 32 bit first) and the fourth is the exponent (signed integer). +The mantissa is always normalized. + +If the exponent is 0x80000000, that is the most negative value, the number +represented is 0 and both mantissa words are also 0. + +If the exponent is 0x7fffffff, that is the biggest positive value, the number +represented is infinity if the high 32 mantissa bit are also 0, otherwise it is +a NaN. The low 32 mantissa bit are 0 if the number represented is infinity. + +Decimal and packed decimal numbers are not supported yet. The parameters to these functions are r0=destination pointer, r1 and r2 -source pointers. r4 is the instruction. They may use r0-r7. The return address -is in r14, except CPDO_rnf_core which expects the return address in r5 to -save memory accesses. +source pointers. r4 is the instruction. They may use r0-r8 and r14. They return +to fastfpe_next, except CPDO_rnf_core which expects the return address in r14. */ /*---------------------------------------------------------------------------*/ .globl CPDO_adf CPDO_adf: - ldmia r1,{r1,r3,r5} - ldmia r2,{r2,r4,r6} + ldmia r1,{r1,r3,r5,r7} + ldmia r2,{r2,r4,r6,r8} + + cmp r7,#0x7fffffff + cmpne r8,#0x7fffffff + beq CPDO_adf_extra + cmp r1,r2 bne CPDO_suf_s CPDO_adf_s: - subs r2,r5,r6 - bge CPDO_adf_1 + subs r2,r7,r8 + bge CPDO_adf_2nd + mov r7,r8 rsb r2,r2,#0 - mov r5,r6 - adds r3,r4,r3,lsr r2 - b CPDO_adf_2 - -CPDO_adf_1: - adds r3,r3,r4,lsr r2 - -CPDO_adf_2: - addcs r5,r5,#1 - movcss r3,r3,rrx - beq CPDO_zero - stmia r0,{r1,r3,r5} + cmp r2,#32 + ble CPDO_adf_1st2 + + sub r2,r2,#32 + cmp r2,#32 + movgt r2,#32 + mov r5,r3,lsr r2 + mov r3,#0 + b CPDO_adf_add + +CPDO_adf_1st2: + rsb r8,r2,#32 + mov r5,r5,lsr r2 + orr r5,r5,r3,lsl r8 + mov r3,r3,lsr r2 @ 1. op normalized + b CPDO_adf_add + +CPDO_adf_2nd: + cmp r2,#32 + ble CPDO_adf_2nd2 + + sub r2,r2,#32 + cmp r2,#32 + movgt r2,#32 + mov r6,r4,lsr r2 + mov r4,#0 + b CPDO_adf_add + +CPDO_adf_2nd2: + rsb r8,r2,#32 + mov r6,r6,lsr r2 + orr r6,r6,r4,lsl r8 + mov r4,r4,lsr r2 @ 2. op normalized + +CPDO_adf_add: + adds r5,r5,r6 + adcs r3,r3,r4 @ do addition + bcc CPDO_adf_end + + add r7,r7,#1 + movs r3,r3,rrx + mov r5,r5,rrx @ correct for overflow + +CPDO_adf_end: + cmp r7,#0x20000000 + bge CPDO_inf + + stmia r0,{r1,r3,r5,r7} + b fastfpe_next + +CPDO_adf_extra: + cmp r7,#0x7fffffff @ was it the 1st ? + bne CPDO_infnan_2 @ no it was the 2nd + cmp r8,#0x7fffffff @ if 1st, 2nd too ? + bne CPDO_infnan_1 @ no only 1st + cmp r3,#0 + cmpeq r4,#0 + bne CPDO_nan_12 + b CPDO_inf + +/*---------------------------------------------------------------------------*/ + +CPDO_infnan_1: + stmia r0,{r1,r3,r5,r7} + b fastfpe_next + +CPDO_infnan_2: + stmia r0,{r2,r4,r6,r8} + b fastfpe_next - mov pc,r14 +CPDO_nan_12: + orr r2,r3,r4 + b CPDO_inf_1 + +CPDO_nan: + mov r2,#0x40000000 @ create non signalling NaN + b CPDO_inf_1 + +CPDO_inf: + mov r2,#0 +CPDO_inf_1: + mov r3,#0 + mov r4,#0x7fffffff +CPDO_store_1234: + stmia r0,{r1,r2,r3,r4} + b fastfpe_next + +CPDO_zero: + mov r1,#0 +CPDO_zero_1: + mov r2,#0 + mov r3,#0 + mov r4,#0x80000000 + stmia r0,{r1,r2,r3,r4} + b fastfpe_next /*---------------------------------------------------------------------------*/ .globl CPDO_suf CPDO_suf: - ldmia r1,{r1,r3,r5} - ldmia r2,{r2,r4,r6} + ldmia r1,{r1,r3,r5,r7} + ldmia r2,{r2,r4,r6,r8} CPDO_suf_l: + cmp r7,#0x7fffffff + cmpne r8,#0x7fffffff + beq CPDO_suf_extra + cmp r1,r2 bne CPDO_adf_s -CPDO_suf_s: - subs r2,r5,r6 - bge CPDO_suf_1 - +CPDO_suf_s: + subs r2,r7,r8 @ determine greater number + bgt CPDO_suf_2nd @ first number is greater + blt CPDO_suf_1st @ second number is greater + cmp r3,r4 @ also mantissa is important + cmpeq r5,r6 + bhi CPDO_suf_2nd @ first number is greater + beq CPDO_zero + +CPDO_suf_1st: + eor r1,r1,#0x80000000 @ second number is greater, invert sign + mov r7,r8 rsb r2,r2,#0 - mov r5,r6 - rsbs r3,r4,r3,lsr r2 - b CPDO_suf_2 - -CPDO_suf_1: - subs r3,r3,r4,lsr r2 - -CPDO_suf_2: - beq CPDO_zero - - eorcc r1,r1,#0x80000000 - rsbcc r3,r3,#0 + cmp r2,#32 + ble CPDO_suf_1st2 + + sub r2,r2,#32 + cmp r2,#32 + movgt r2,#32 + mov r5,r3,lsr r2 + mov r3,#0 + b CPDO_suf_1st_sub + +CPDO_suf_1st2: + rsb r8,r2,#32 + mov r5,r5,lsr r2 + orr r5,r5,r3,lsl r8 + mov r3,r3,lsr r2 @ 1. op normalized + +CPDO_suf_1st_sub: + subs r5,r6,r5 @ do subtraction + sbc r3,r4,r3 + b CPDO_suf_norm + +CPDO_suf_2nd: + cmp r2,#32 + ble CPDO_suf_2nd2 + + sub r2,r2,#32 + cmp r2,#32 + movgt r2,#32 + mov r6,r4,lsr r2 + mov r4,#0 + b CPDO_suf_2nd_sub + +CPDO_suf_2nd2: + rsb r8,r2,#32 + mov r6,r6,lsr r2 + orr r6,r6,r4,lsl r8 + mov r4,r4,lsr r2 @ 2. op normalized + +CPDO_suf_2nd_sub: + subs r5,r5,r6 + sbc r3,r3,r4 @ do subtraction + +CPDO_suf_norm: + teq r3,#0 @ normalize 32bit + moveq r3,r5 + moveq r5,#0 + subeq r7,r7,#32 - cmp r3,#0x00010000 + cmp r3,#0x00010000 @ 16bit movcc r3,r3,lsl#16 - subcc r5,r5,#16 + orrcc r3,r3,r5,lsr#16 + movcc r5,r5,lsl#16 + subcc r7,r7,#16 - cmp r3,#0x01000000 + cmp r3,#0x01000000 @ 8bit movcc r3,r3,lsl#8 - subcc r5,r5,#8 + orrcc r3,r3,r5,lsr#24 + movcc r5,r5,lsl#8 + subcc r7,r7,#8 - cmp r3,#0x10000000 + cmp r3,#0x10000000 @ 4bit movcc r3,r3,lsl#4 - subcc r5,r5,#4 + orrcc r3,r3,r5,lsr#28 + movcc r5,r5,lsl#4 + subcc r7,r7,#4 - cmp r3,#0x40000000 + cmp r3,#0x40000000 @ 2bit movcc r3,r3,lsl#2 - subcc r5,r5,#2 + orrcc r3,r3,r5,lsr#30 + movcc r5,r5,lsl#2 + subcc r7,r7,#2 - cmp r3,#0x80000000 + cmp r3,#0x80000000 @ 1bit movcc r3,r3,lsl#1 - subcc r5,r5,#1 - - stmia r0,{r1,r3,r5} - - mov pc,r14 + orrcc r3,r3,r5,lsr#31 + movcc r5,r5,lsl#1 + subcc r7,r7,#1 + + cmp r7,#0xe0000000 + ble CPDO_zero_1 + + stmia r0,{r1,r3,r5,r7} + b fastfpe_next + +CPDO_suf_extra: + cmp r7,#0x7fffffff @ was it the 1st ? + eorne r2,r2,#0x80000000 @ change sign, might have been INF + bne CPDO_infnan_2 @ no it was the 2nd + cmp r8,#0x7fffffff @ if 1st, 2nd too ? + bne CPDO_infnan_1 @ no only 1st + cmp r3,#0 + cmpeq r4,#0 + bne CPDO_nan_12 + b CPDO_nan @ here is difference with adf ! /*---------------------------------------------------------------------------*/ .globl CPDO_rsf CPDO_rsf: mov r3,r2 - ldmia r1,{r2,r4,r6} - ldmia r3,{r1,r3,r5} + ldmia r1,{r2,r4,r6,r8} + ldmia r3,{r1,r3,r5,r7} b CPDO_suf_l /*---------------------------------------------------------------------------*/ .globl CPDO_muf CPDO_muf: - ldmia r1,{r1,r3,r5} - ldmia r2,{r2,r4,r6} + ldmia r1,{r1,r3,r5,r7} + ldmia r2,{r2,r4,r6,r8} + + cmp r7,#0x7fffffff + cmpne r8,#0x7fffffff + beq CPDO_muf_extra eor r1,r1,r2 - add r6,r5,r6 - umulls r2,r5,r4,r3 - beq CPDO_zero + adds r8,r7,r8 + bvs CPDO_zero_1 + + umull r7,r2,r3,r4 + umull r14,r3,r6,r3 + adds r7,r7,r3 @ r2|r7|r14 = r2|r7|#0 + #0|r3|r14 + adc r2,r2,#0 + umull r4,r3,r5,r4 + adds r14,r14,r4 @ r2|r7|r14 += #0|r3|r4 + adcs r7,r7,r3 + adc r2,r2,#0 + umull r4,r3,r5,r6 + adds r14,r14,r3 @ r2|r7|r14 += #0|#0|r3 + adcs r7,r7,#0 + adcs r2,r2,#0 + bpl CPDO_muf_norm - add r6,r6,#1 - stmia r0,{r1,r5,r6} - mov pc,r14 - -CPDO_muf_norm: - adds r2,r2,r2 - adcs r5,r5,r5 + add r8,r8,#1 + b CPDO_muf_end - stmia r0,{r1,r5,r6} - mov pc,r14 +CPDO_muf_norm: + adds r14,r14,r14 + adcs r7,r7,r7 + adcs r2,r2,r2 + +CPDO_muf_end: + cmp r8,#0x20000000 + bge CPDO_inf + cmp r8,#0xe0000000 + ble CPDO_zero_1 + stmia r0,{r1,r2,r7,r8} + b fastfpe_next + +CPDO_muf_extra: + cmp r7,#0x7fffffff @ was it the first? + bne CPDO_muf_extra_2nd @ no, so it was the second + cmp r8,#0x7fffffff @ yes, second too? + bne CPDO_muf_extra_1st @ no, only first + orr r3,r3,r4 @ if both inf -> inf, otherwise nan + eor r1,r1,r2 @ sign for the inf case + b CPDO_infnan_1 + +CPDO_muf_extra_1st: + cmp r3,#0 @ is it a nan? + bne CPDO_infnan_1 + cmp r8,#0x80000000 @ is the second 0? + beq CPDO_nan + eor r1,r1,r2 @ correct sign for inf + b CPDO_inf + +CPDO_muf_extra_2nd: + cmp r4,#0 @ is it a nan? + bne CPDO_infnan_2 + cmp r7,#0x80000000 @ is the first 0? + beq CPDO_nan + eor r1,r1,r2 @ correct sign for inf + b CPDO_inf /*---------------------------------------------------------------------------*/ -/* Divison ignores the LSB in both mantissa, but needs only ~110 cycles. */ .globl CPDO_dvf CPDO_dvf: - ldmia r1,{r1,r3,r5} - ldmia r2,{r2,r4,r6} + ldmia r1,{r1,r3,r5,r7} + ldmia r2,{r2,r4,r6,r8} CPDO_dvf_l: - eor r1,r1,r2 - sub r6,r5,r6 + cmp r7,#0x7fffffff + cmpne r8,#0x7fffffff + beq CPDO_dvf_extra + cmp r8,#0x80000000 + beq CPDO_dvf_by0 - movs r3,r3,lsr#1 - beq CPDO_zero - mov r4,r4,lsr#1 - rsb r4,r4,#0 - - .macro div_step - adcs r3,r4,r3,lsl#1 - subcc r3,r3,r4 - adc r5,r5,r5 - .endm - - adds r3,r4,r3 - adc r5,r5,r5 - bcs CPDO_dvf_b - - subcc r3,r3,r4 - add r3,r4,r3,lsl#1 - mov r5,#1 - sub r6,r6,#1 - -CPDO_dvf_b: - div_step - div_step - div_step - div_step - div_step - div_step - div_step - div_step - div_step - div_step - div_step - div_step - div_step - div_step - div_step - div_step - div_step - div_step - div_step - div_step - div_step - div_step - div_step - div_step - div_step - div_step - div_step - div_step - div_step - div_step - div_step + eor r1,r1,r2 + cmp r7,#0x80000000 + beq CPDO_zero_1 -CPDO_dvf_e: - stmia r0,{r1,r5,r6} - mov pc,r14 + sub r8,r7,r8 -CPDO_zero: - mov r1,#0 mov r2,#0 - mov r3,#0x80000000 - stmia r0,{r1-r3} - mov pc,r14 + mov r7,#1 + + cmp r3,r4 + cmpeq r5,r6 + bcs CPDO_dvf_loop_ + + sub r8,r8,#1 + +CPDO_dvf_loop: + adds r5,r5,r5 + adcs r3,r3,r3 + bcs CPDO_dvf_anyway +CPDO_dvf_loop_: + subs r5,r5,r6 + sbcs r3,r3,r4 + bcs CPDO_dvf_okay + + adds r5,r5,r6 + adc r3,r3,r4 + adds r7,r7,r7 + adcs r2,r2,r2 + bcc CPDO_dvf_loop + b CPDO_dvf_end + +CPDO_dvf_anyway: + adcs r7,r7,r7 + adcs r2,r2,r2 + bcs CPDO_dvf_end + subs r5,r5,r6 + sbc r3,r3,r4 + b CPDO_dvf_loop + +CPDO_dvf_okay: + adcs r7,r7,r7 + adcs r2,r2,r2 + bcc CPDO_dvf_loop + +CPDO_dvf_end: + b CPDO_muf_end + +CPDO_dvf_by0: + cmp R7,#0x80000000 + beq CPDO_nan @ first also 0 -> nan + eor r1,r1,r2 @ otherwise calculatesign for inf + b CPDO_inf + +CPDO_dvf_extra: + cmp r7,#0x7fffffff @ was it the first? + bne CPDO_dvf_extra_2nd @ no, so it was the second + cmp r8,#0x7fffffff @ yes, second too? + bne CPDO_dvf_extra_1st @ no, only first + orrs r3,r3,r4 + beq CPDO_nan @ if both inf -> create nan + b CPDO_nan_12 @ otherwise keep nan + +CPDO_dvf_extra_1st: + eor r1,r1,r2 @ correct sign for inf + b CPDO_infnan_1 + +CPDO_dvf_extra_2nd: + cmp r4,#0 @ is it a nan? + bne CPDO_infnan_2 + eor r1,r1,r2 @ correct sign for zero + b CPDO_zero_1 /*---------------------------------------------------------------------------*/ .globl CPDO_rdf CPDO_rdf: mov r3,r2 - ldmia r1,{r2,r4,r6} - ldmia r3,{r1,r3,r5} + ldmia r1,{r2,r4,r6,r8} + ldmia r3,{r1,r3,r5,r7} b CPDO_dvf_l /*---------------------------------------------------------------------------*/ .globl CPDO_rmf CPDO_rmf: - mov pc,r14 + b fastfpe_next /*---------------------------------------------------------------------------*/ @@ -228,100 +446,139 @@ .globl CPDO_mvf CPDO_mvf: - ldmia r2,{r1,r2,r3} - stmia r0,{r1,r2,r3} - mov pc,r14 + ldmia r2,{r1,r2,r3,r4} + stmia r0,{r1,r2,r3,r4} + b fastfpe_next /*---------------------------------------------------------------------------*/ .globl CPDO_mnf CPDO_mnf: - ldmia r2,{r1,r2,r3} + ldmia r2,{r1,r2,r3,r4} eor r1,r1,#0x80000000 - stmia r0,{r1,r2,r3} - mov pc,r14 + stmia r0,{r1,r2,r3,r4} + b fastfpe_next /*---------------------------------------------------------------------------*/ .globl CPDO_abs CPDO_abs: - ldmia r2,{r1,r2,r3} + ldmia r2,{r1,r2,r3,r4} bic r1,r1,#0x80000000 - stmia r0,{r1,r2,r3} - mov pc,r14 + stmia r0,{r1,r2,r3,r4} + b fastfpe_next /*---------------------------------------------------------------------------*/ .globl CPDO_sqt CPDO_sqt: - ldmia r2,{r1,r2,r4} - - and r5,r4,#1 - movs r2,r2,lsl r5 - - mov r3,#0x80000000 + ldmia r2,{r1,r2,r3,r4} + cmp r1,#0 + bne CPDO_nan + cmp r4,#0x7fffffff + beq CPDO_store_1234 + + tst r4,r4,lsr#1 @carry=exponent bit 0 + bcc CPDO_sqt_exponenteven + adds r3,r3,r3 + adcs r2,r2,r2 @carry is needed in loop! +CPDO_sqt_exponenteven: + mov r4,r4,asr #1 + str r4,[r0,#12] + + mov r4,#0x80000000 + mov r5,#0 sub r2,r2,#0x80000000 - .macro sqrt_step,N - add r5,r3,#(0x40000000>>\N) - cmpcc r2,r5 - addcs r3,r3,#(0x80000000>>\N) - subcs r2,r2,r5 - movs r2,r2,lsl#1 - .endm - - sqrt_step 1 - sqrt_step 2 - sqrt_step 3 - sqrt_step 4 - sqrt_step 5 - sqrt_step 6 - sqrt_step 7 - sqrt_step 8 - sqrt_step 9 - sqrt_step 10 - sqrt_step 11 - sqrt_step 12 - sqrt_step 13 - sqrt_step 14 - sqrt_step 15 - sqrt_step 16 - sqrt_step 17 - sqrt_step 18 - sqrt_step 19 - sqrt_step 20 - sqrt_step 21 - sqrt_step 22 - sqrt_step 23 - sqrt_step 24 - sqrt_step 25 - sqrt_step 26 - sqrt_step 27 - sqrt_step 28 - sqrt_step 29 - sqrt_step 30 - sqrt_step 31 - - mov r4,r4,asr#1 - stmia r0,{r1,r3,r4} - mov pc,r14 + mov r8,#0x40000000 + mov r14,#0x80000000 + + mov r1,#1 + b CPDO_sqt_loop1_first +CPDO_sqt_loop1: + adds r3,r3,r3 + adcs r2,r2,r2 +CPDO_sqt_loop1_first: + add r6,r4,r8,lsr r1 @r7 const = r5 + bcs CPDO_sqt_loop1_1 + cmp r2,r6 + cmpeq r3,r5 @r5 for r7 + bcc CPDO_sqt_loop1_0 +CPDO_sqt_loop1_1: + orr r4,r4,r14,lsr r1 + subs r3,r3,r5 @r5 for r7 + sbc r2,r2,r6 +CPDO_sqt_loop1_0: + add r1,r1,#1 + cmp r1,#30 + ble CPDO_sqt_loop1 + + adds r3,r3,r3 + adcs r2,r2,r2 + bcs CPDO_sqt_between_1 + adds r7,r5,#0x80000000 + adc r6,r4,#0 + cmp r2,r6 + cmpeq r3,r7 + bcc CPDO_sqt_between_0 +CPDO_sqt_between_1: + orr r4,r4,#0x00000001 + subs r3,r3,r5 + sbc r2,r2,r4 + subs r3,r3,#0x80000000 + sbc r2,r2,#0 +CPDO_sqt_between_0: + mov r1,#0 + +CPDO_sqt_loop2: + adds r3,r3,r3 + adcs r2,r2,r2 + bcs CPDO_sqt_loop2_1 + adds r7,r5,r8,lsr r1 + adc r6,r4,#0 + cmp r2,r6 + cmpeq r3,r7 + bcc CPDO_sqt_loop2_0 +CPDO_sqt_loop2_1: + orr r5,r5,r14,lsr r1 + subs r3,r3,r5 + sbc r2,r2,r4 + subs r3,r3,r8,lsr r1 + sbc r2,r2,#0 +CPDO_sqt_loop2_0: + add r1,r1,#1 + cmp r1,#30 + ble CPDO_sqt_loop2 + + adds r3,r3,r3 + adcs r2,r2,r2 + bcs CPDO_sqt_after_1 + cmp r2,r6 + cmpeq r3,r7 + bcc CPDO_sqt_after_0 +CPDO_sqt_after_1: + orr r5,r5,#0x00000001 +CPDO_sqt_after_0: + + mov r1,#0 + stmia r0,{r1,r4,r5} + b fastfpe_next /*---------------------------------------------------------------------------*/ .globl CPDO_rnd CPDO_rnd: - adr r5,CPDO_rnd_store - b CPDO_rnd_core + ldmia r2,{r1,r2,r3,r5} + bl CPDO_rnd_core CPDO_rnd_store: - stmia r0,{r1,r2,r3} - mov pc,r14 + stmia r0,{r1,r2,r3,r5} + b fastfpe_next /*---------------------------------------------------------------------------*/ .globl CPDO_rnd_core CPDO_rnd_core: - ldmia r2,{r1,r2,r3} and r4,r4,#0x00000060 add pc,pc,r4,lsr#3 mov r0,r0 @@ -331,93 +588,95 @@ b CPDO_rnd_Z CPDO_rnd_N: - cmp r3,#-1 + cmp r5,#-1 blt CPDO_rnd_zero - cmp r3,#31 - bge CPDO_rnd_end - - rsb r4,r3,#30 - mov r2,r2,lsr r4 - add r2,r2,#1 - bic r2,r2,#1 - movs r2,r2,lsl r4 - addcs r3,r3,#1 - movcs r2,r2,rrx - - mov pc,r5 + cmp r5,#63 + movge pc,r14 + mov r4,#0x40000000 + cmp r5,#31 + bge CPDO_rnd_N_2 + + adds r2,r2,r4,lsr r5 + bcc CPDO_rnd_end + b CPDO_rnd_end_norm + +CPDO_rnd_N_2: +CPDO_rnd_P_2: + sub r6,r5,#32 + adds r3,r3,r4,ror r6 @ror ist needed to handle a -1 correctly + adcs r2,r2,#0 + bcc CPDO_rnd_end + b CPDO_rnd_end_norm CPDO_rnd_P: - cmp r3,#0 - blt CPDO_rnd_P_small - cmp r3,#31 - movge pc,r5 - tst r1,#0x80000000 - bne CPDO_rnd_end - - mov r4,#0x80000000 - sub r4,r4,#1 - adds r2,r2,r4,lsr r3 - addcs r3,r3,#1 - movcs r2,r2,rrx + bne CPDO_rnd_M_entry +CPDO_rnd_P_entry: + cmp r5,#0 + blt CPDO_rnd_P_small + cmp r5,#63 + movge pc,r14 + mov r4,#0x7fffffff + cmp r5,#32 + bge CPDO_rnd_P_2 + + adds r3,r3,#0xffffffff + adcs r2,r2,r4,lsr r5 + bcc CPDO_rnd_end + b CPDO_rnd_end_norm - b CPDO_rnd_end - CPDO_rnd_P_small: - cmp r2,#0 - beq CPDO_rnd_zero - tst r1,#0x80000000 - bne CPDO_rnd_zero + cmp r5,#0x80000000 + moveq pc,r14 b CPDO_rnd_one - - + CPDO_rnd_M: - cmp r3,#0 - blt CPDO_rnd_M_small - cmp r3,#31 - movge pc,r5 - tst r1,#0x80000000 - beq CPDO_rnd_end - - mov r4,#0x80000000 - sub r4,r4,#1 - adds r2,r2,r4,lsr r3 - addcs r3,r3,#1 - movcs r2,r2,rrx + bne CPDO_rnd_P_entry +CPDO_rnd_M_entry: + cmp r5,#0 + blt CPDO_rnd_zero + cmp r5,#63 + movge pc,r14 b CPDO_rnd_end -CPDO_rnd_M_small: - cmp r2,#0 - beq CPDO_rnd_zero - tst r1,#0x80000000 - beq CPDO_rnd_zero - b CPDO_rnd_one - CPDO_rnd_Z: - cmp r3,#0 + cmp r5,#0 blt CPDO_rnd_zero - cmp r3,#31 - movge pc,r5 - + cmp r5,#63 + movge pc,r14 b CPDO_rnd_end +CPDO_rnd_end_norm: + add r5,r5,#1 + movs r2,r2,rrx + mov r3,r3,rrx +CPDO_rnd_end: + rsbs r4,r5,#31 + bmi CPDO_rnd_end_2 + mov r3,#0 + mov r2,r2,lsr r4 + mov r2,r2,lsl r4 + mov pc,r14 + +CPDO_rnd_end_2: + rsb r4,r5,#63 + mov r3,r3,lsr r4 + mov r3,r3,lsl r4 + mov pc,r14 + CPDO_rnd_one: mov r2,#0x80000000 mov r3,#0 - mov pc,r5 + mov r5,#0 + mov pc,r14 CPDO_rnd_zero: mov r1,#0 mov r2,#0 - mov r3,#0x80000000 - mov pc,r5 + mov r3,#0 + mov r5,#0x80000000 + mov pc,r14 -CPDO_rnd_end: - rsb r4,r3,#31 - mov r2,r2,lsr r4 - mov r2,r2,lsl r4 - mov pc,r5 - /*---------------------------------------------------------------------------*/ diff -urP linux/arch/arm/fastfpe/CPDT.S linuxa300/arch/arm/fastfpe/CPDT.S --- linux/arch/arm/fastfpe/CPDT.S 2002-05-15 21:37:41.000000000 +0900 +++ linuxa300/arch/arm/fastfpe/CPDT.S 2004-08-21 09:48:06.000000000 +0900 @@ -1,14 +1,17 @@ /* -Inside the emulator the FP numbers are kept with 32 bit accuracy for both -mantissa and exponent. The FP structure has 4 words reserved for each -register, the first is used just for the sign in bit 31, the second is the -mantissa (unsigned integer) and the third is the exponent (signed integer). - -The functions do actually only work properly for normalized values, and if -no overflow occurs. Hopfully most programs are not disturbed by this, and it -will probably be improved in future versions. +The FP structure has 4 words reserved for each register, the first is used just +for the sign in bit 31, the second and third are for the mantissa (unsigned +integer, high 32 bit first) and the fourth is the exponent (signed integer). +The mantissa is always normalized. + +If the exponent is 0x80000000, that is the most negative value, the number +represented is 0 and both mantissa words are also 0. + +If the exponent is 0x7fffffff, that is the biggest positive value, the number +represented is infinity if the high 32 mantissa bit are also 0, otherwise it is +a NaN. The low 32 mantissa bit are 0 if the number represented is infinity. -Decimal and packed decimal numbers are not supported so yet. +Decimal and packed decimal numbers are not supported yet. */ /*---------------------------------------------------------------------------*/ @@ -17,213 +20,411 @@ CPDT_load_single: ldr r1,[r6] - bics r2,r1,#0x80000000 - beq CPDT_load_zero @ test for 0 + and r2,r1,#0x80000000 @ r2 = sign - mov r2,r1,lsl#8 - orr r2,r2,#0x80000000 @ insert leading 1 + mov r5,r1,lsr#23 + bics r5,r5,#0x100 + beq CPDT_ls_e0 @ exponent = 0; zero/denormalized + teq r5,#255 + beq CPDT_ls_e255 @ exponent = 255; infinity/NaN - mov r3,r1,lsr#23 - bic r3,r3,#0x100 - sub r3,r3,#127 @ subtract normalized bias + sub r5,r5,#127 @ r5 = exponent, remove normalized bias - and r1,r1,#0x80000000 @ only sign + mov r3,r1,lsl#8 + orr r3,r3,#0x80000000 + mov r4,#0 @ r3,r4 = mantissa - stmia r0,{r1-r3} - mov pc,r14 + stmia r0,{r2-r5} + b fastfpe_next + +CPDT_ls_e0: + movs r3,r1,lsl#9 + beq CPDT_load_zero + + mov r5,#-127 + +CPDT_ls_e0_norm: + tst r3,#0x80000000 + subeq r5,r5,#1 + moveq r3,r3,lsl#1 + beq CPDT_ls_e0_norm + + mov r4,#0 + stmia r0,{r2-r5} + b fastfpe_next + +CPDT_ls_e255: + mov r3,r1,lsl#9 + mov r4,#0 + mov r5,#0x7fffffff + stmia r0,{r2-r5} + b fastfpe_next CPDT_load_zero: - mov r1,#0 - mov r2,#0 - mov r3,#0x80000000 - stmia r0,{r1-r3} - mov pc,r14 + mov r3,#0 + mov r4,#0 + mov r5,#0x80000000 + stmia r0,{r2-r5} + b fastfpe_next /*---------------------------------------------------------------------------*/ .globl CPDT_load_double CPDT_load_double: - ldr r2,[r6,#4] ldr r1,[r6] + ldr r6,[r6,#4] - bics r3,r1,#0x80000000 - cmpeq r2,#0 - beq CPDT_load_zero @ test for 0 - - mov r2,r2,lsr#21 - orr r2,r2,r1,lsl#11 - orr r2,r2,#0x80000000 @ insert leading 1 - - mov r3,r1,lsr#20 - bic r3,r3,#0x800 - sub r3,r3,#1024 - add r3,r3,#1 @ subtract normalized bias + and r2,r1,#0x80000000 @ r2 = sign - and r1,r1,#0x80000000 - - cmp r2,#0 + mov r5,r1,lsr#20 + bics r5,r5,#0x800 + beq CPDT_ld_e0 @ exponent = 0; zero/denormalized + add r4,r5,#1 + teq r4,#2048 + beq CPDT_ld_e2047 @ exponent = 2047; infinity/NaN + + add r5,r5,#1 + sub r5,r5,#1024 @ r5 = exponent, remove normalized bias + + mov r3,r1,lsl#11 + orr r3,r3,#0x80000000 + orr r3,r3,r6,lsr #21 + mov r4,r6,lsl#11 @ r3,r4 = mantissa + + stmia r0,{r2-r5} + b fastfpe_next + +CPDT_ld_e0: + mov r3,r1,lsl#12 + orr r3,r3,r6,lsr#20 + movs r4,r6,lsl#12 + teqeq r3,#0 beq CPDT_load_zero - stmia r0,{r1-r3} - mov pc,r14 + mov r5,#1 + sub r5,r5,#1024 + +CPDT_ld_e0_norm: + tst r3,#0x80000000 + subeq r5,r5,#1 + moveqs r4,r4,lsl#1 + adceq r3,r3,r3 + beq CPDT_ld_e0_norm + + stmia r0,{r2-r5} + b fastfpe_next + +CPDT_ld_e2047: + mov r3,r1,lsl#12 + orr r3,r3,r6,lsr#1 + bic r6,r6,#0x80000000 + orr r3,r3,r6 @ to get all fraction bits ! + mov r4,#0 + mov r5,#0x7fffffff + stmia r0,{r2-r5} + b fastfpe_next /*---------------------------------------------------------------------------*/ .globl CPDT_load_extended CPDT_load_extended: - ldr r2,[r6,#4] ldr r1,[r6] + ldr r3,[r6,#4] + ldr r4,[r6,#8] - cmp r2,#0 - bics r3,r1,#0x80000000 - beq CPDT_load_zero @ test for 0 - - orr r2,r2,#0x80000000 @ insert leading 1 - - bic r3,r1,#0x80000000 - sub r3,r3,#16384 - add r3,r3,#1 @ subtract normalized bias - - and r1,r1,#0x80000000 + and r2,r1,#0x80000000 + bics r5,r1,#0x80000000 + beq CPDT_le_e0 + add r1,r5,#1 + teq r4,#32768 + beq CPDT_le_e32767 + + add r5,r5,#1 + sub r5,r5,#16384 + + stmia r0,{r2-r5} + b fastfpe_next + +CPDT_le_e0: + teq r3,#0 + teqeq r4,#0 + beq CPDT_load_zero - cmp r2,#0 - beq CPDT_load_zero - - stmia r0,{r1-r3} - mov pc,r14 + mov r5,#2 + sub r5,r5,#16384 + b CPDT_ld_e0_norm + +CPDT_le_e32767: + mov r3,r3,lsl#1 + orr r3,r3,r4,lsr#1 + bic r4,r4,#0x80000000 + orr r3,r3,r4 + mov r5,#0x7fffffff + stmia r0,{r2-r5} + b fastfpe_next /*---------------------------------------------------------------------------*/ .globl CPDT_load_decimal CPDT_load_decimal: - mov pc,r14 + b fastfpe_next /*---------------------------------------------------------------------------*/ .globl CPDT_store_single CPDT_store_single: - ldmia r0,{r1-r3} - - cmp r2,#0 - beq CPDT_store_single_zero - - adds r3,r3,#127 - ble CPDT_store_single_zero + ldmia r0,{r1-r4} - bic r3,r3,#0x100 - orr r1,r1,r3,lsl#23 + cmp r4,#-127 + ble CPDT_ss_e0 + cmp r4,#128 + bge CPDT_ss_e255 + + adds r2,r2,#1<<7 @ round to nearest + bcs CPDT_ss_rnd_ovfl @ very very seldom taken + +CPDT_ss_store: + add r4,r4,#127 + orr r1,r1,r4,lsl#23 bic r2,r2,#0x80000000 orr r1,r1,r2,lsr#8 str r1,[r6] - mov pc,r14 + b fastfpe_next + +CPDT_ss_rnd_ovfl: + add r4,r4,#1 + cmp r4,#128 + bge CPDT_ss_e255 + + mov r2,#0x80000000 + mov r3,#0 + b CPDT_ss_store -CPDT_store_single_zero: - mov r1,#0 +CPDT_ss_e0: + cmp r4,#-150 + ble CPDT_ss_zero + + add r4,r4,#126 +CPDT_ss_unnormalize: + mov r2,r2,lsr#1 + adds r4,r4,#1 + bne CPDT_ss_unnormalize + + orr r1,r1,r2,lsr#8 + +CPDT_ss_zero: str r1,[r6] - mov pc,r14 + b fastfpe_next + +CPDT_ss_e255: + cmp r4,#0x7fffffff + bne CPDT_ss_inf + cmp r2,#0 + beq CPDT_ss_inf + + orr r1,r1,#0x00200000 @ for safety so that it is not INF + orr r1,r1,r2,lsr#9 @ get highest bit of mantissa + +CPDT_ss_inf: + orr r1,r1,#0x7f000000 + orr r1,r1,#0x00800000 + str r1,[r6] + b fastfpe_next /*---------------------------------------------------------------------------*/ .globl CPDT_store_double CPDT_store_double: - ldmia r0,{r1-r3} - - cmp r2,#0 - beq CPDT_store_double_zero - - adds r3,r3,#1024 - ble CPDT_store_double_zero + ldmia r0,{r1-r4} - sub r3,r3,#1 - bic r3,r3,#0x800 - orr r1,r1,r3,lsl#20 + cmp r4,#1024 @ this check has to be first, or + bge CPDT_sd_e2047 @ overflow can occur on second ! + add r0,r4,#3 + cmp r0,#-1023+3 @ cmp with -1023 + ble CPDT_sd_e0 + + adds r3,r3,#1<<10 @ round to nearest + adcs r2,r2,#0 + bcs CPDT_sd_rnd_ovfl @ very very seldom taken + +CPDT_sd_store: + sub r4,r4,#1 + add r4,r4,#1024 + orr r1,r1,r4,lsl#20 bic r2,r2,#0x80000000 orr r1,r1,r2,lsr#11 mov r2,r2,lsl#21 + orr r2,r2,r3,lsr#11 + + stmia r6,{r1,r2} + b fastfpe_next + +CPDT_sd_rnd_ovfl: + add r4,r4,#1 + cmp r4,#1024 + bge CPDT_sd_e2047 + + mov r2,#0x80000000 + mov r3,#0 + b CPDT_sd_store + +CPDT_sd_e0: + add r0,r4,#1075-1024 + cmp r0,#-1024 + ble CPDT_sd_zero + + add r4,r4,#1024 + sub r4,r4,#2 +CPDT_sd_unnormalize: + movs r2,r2,lsr#1 + mov r3,r3,rrx + adds r4,r4,#1 + bne CPDT_sd_unnormalize + + orr r1,r1,r2,lsr#11 + mov r2,r2,lsl#21 + orr r2,r2,r3,lsr#11 - stmia r6,{r1-r2} - mov pc,r14 + stmia r6,{r1,r2} + b fastfpe_next -CPDT_store_double_zero: - mov r1,#0 +CPDT_sd_zero: mov r2,#0 - stmia r6,{r1-r2} - mov pc,r14 + stmia r6,{r1,r2} + b fastfpe_next + +CPDT_sd_e2047: + cmp r4,#0x7fffffff + bne CPDT_sd_inf + cmp r2,#0 + beq CPDT_sd_inf + + orr r1,r1,#0x00040000 @ for safety so that it is not INF + orr r1,r1,r2,lsr#12 @ get highest bit of mantissa + +CPDT_sd_inf: + orr r1,r1,#0x7f000000 + orr r1,r1,#0x00f00000 + stmia r6,{r1,r2} + b fastfpe_next /*---------------------------------------------------------------------------*/ .globl CPDT_store_extended CPDT_store_extended: - ldmia r0,{r1-r3} + ldmia r0,{r1-r4} - cmp r2,#0 - beq CPDT_store_extended_zero - - adds r3,r3,#16384 - ble CPDT_store_extended_zero + cmp r4,#16384 @ this check has to be first, or + bge CPDT_se_e32767 @ overflow can occur with second ! + add r0,r4,#63 + cmp r0,#-16383+63 + ble CPDT_se_e0 + + sub r4,r4,#1 + add r4,r4,#16384 + orr r1,r1,r4 - sub r3,r3,#1 - mov r3,r3,lsl#17 - orr r1,r1,r3,lsr#17 + stmia r6,{r1-r3} + b fastfpe_next - mov r3,#0 +CPDT_se_e0: + add r0,r4,#16446-16384 + cmp r0,#-16384 + ble CPDT_se_zero + + add r4,r4,#16384 + sub r4,r4,#2 +CPDT_se_unnormalize: + movs r2,r2,lsr#1 + mov r3,r3,rrx + adds r4,r4,#1 + bne CPDT_se_unnormalize stmia r6,{r1-r3} - mov pc,r14 + b fastfpe_next -CPDT_store_extended_zero: - mov r1,#0 +CPDT_se_zero: mov r2,#0 mov r3,#0 stmia r6,{r1-r3} - mov pc,r14 + b fastfpe_next + +CPDT_se_e32767: + cmp r4,#0x7fffffff + bne CPDT_se_inf + cmp r2,#0 + beq CPDT_se_inf + + mov r2,r2,lsl#1 + orr r2,r2,#0x20000000 + +CPDT_se_inf: + orr r1,r1,#0x00007f00 + orr r1,r1,#0x000000ff + stmia r6,{r1-r3} + b fastfpe_next /*---------------------------------------------------------------------------*/ .globl CPDT_store_decimal CPDT_store_decimal: - mov pc,r14 + b fastfpe_next /*---------------------------------------------------------------------------*/ .globl CPDT_sfm CPDT_sfm: add r2,r10,r0,lsr#8 - ldr r3,[r2],#4 + ldr r4,[r2,#0] + ldr r3,[r2,#4] + bic r3,r3,#0x80000000 + orr r3,r3,r4 str r3,[r6],#4 - ldr r3,[r2],#4 + ldr r3,[r2,#8] str r3,[r6],#4 - ldr r3,[r2],#4 + ldr r3,[r2,#12] str r3,[r6],#4 add r0,r0,#1<<12 and r0,r0,#7<<12 subs r1,r1,#1 bne CPDT_sfm - mov pc,r14 + b fastfpe_next /*---------------------------------------------------------------------------*/ .globl CPDT_lfm CPDT_lfm: add r2,r10,r0,lsr#8 + ldr r4,[r6],#4 + and r3,r4,#0x80000000 + str r3,[r2,#0] ldr r3,[r6],#4 - str r3,[r2],#4 + str r3,[r2,#8] ldr r3,[r6],#4 - str r3,[r2],#4 - ldr r3,[r6],#4 - str r3,[r2],#4 + str r3,[r2,#12] + + cmp r3,#0x80000000 @ does the exp indicate zero? + biceq r4,r4,#0x80000000 @ if so, indicate 'denormalized' + beq CPDT_lfm_storer4 + cmp r3,#0x7fffffff @ does the exp indicate inf or NaN? + biceq r4,r4,#0x80000000 @ if so, indicate 'denormalized' + beq CPDT_lfm_storer4 + orrne r4,r4,#0x80000000 @ otherwise, set normalized bit + +CPDT_lfm_storer4: + str r4,[r2,#4] add r0,r0,#1<<12 and r0,r0,#7<<12 subs r1,r1,#1 bne CPDT_lfm - mov pc,r14 + b fastfpe_next /*---------------------------------------------------------------------------*/ diff -urP linux/arch/arm/fastfpe/CPRT.S linuxa300/arch/arm/fastfpe/CPRT.S --- linux/arch/arm/fastfpe/CPRT.S 2002-05-15 21:37:41.000000000 +0900 +++ linuxa300/arch/arm/fastfpe/CPRT.S 2004-08-21 09:48:06.000000000 +0900 @@ -1,86 +1,105 @@ +/* +The FP structure has 4 words reserved for each register, the first is used +just +for the sign in bit 31, the second and third are for the mantissa (unsigned +integer, high 32 bit first) and the fourth is the exponent (signed integer). +The mantissa is always normalized. + +If the exponent is 0x80000000, that is the most negative value, the number +represented is 0 and both mantissa words are also 0. + +If the exponent is 0x7fffffff, that is the biggest positive value, the +number +represented is infinity if the high 32 mantissa bit are also 0, otherwise it +is +a NaN. The low 32 mantissa bit are 0 if the number represented is infinity. + +Decimal and packed decimal numbers are not supported yet. +*/ + /*---------------------------------------------------------------------------*/ .text .globl CPRT_flt CPRT_flt: add r0,r13,r0,lsr#10 - ldr r3,[r0] - cmp r3,#0 - beq CPRT_zero + ldr r2,[r0] + mov r3,#0 + cmp r2,#0 + beq CPRT_flt_zero - ands r2,r3,#0x80000000 - rsbne r3,r3,#0 + ands r0,r2,#0x80000000 + rsbne r2,r2,#0 mov r4,#31 - cmp r3,#0x00010000 - movcc r3,r3,lsl#16 + cmp r2,#0x00010000 + movcc r2,r2,lsl#16 subcc r4,r4,#16 - cmp r3,#0x01000000 - movcc r3,r3,lsl#8 + cmp r2,#0x01000000 + movcc r2,r2,lsl#8 subcc r4,r4,#8 - cmp r3,#0x10000000 - movcc r3,r3,lsl#4 + cmp r2,#0x10000000 + movcc r2,r2,lsl#4 subcc r4,r4,#4 - cmp r3,#0x40000000 - movcc r3,r3,lsl#2 + cmp r2,#0x40000000 + movcc r2,r2,lsl#2 subcc r4,r4,#2 - cmp r3,#0x80000000 - movcc r3,r3,lsl#1 + cmp r2,#0x80000000 + movcc r2,r2,lsl#1 subcc r4,r4,#1 - stmia r1,{r2,r3,r4} - mov pc,r14 + stmia r1,{r0,r2,r3,r4} + b fastfpe_next -CPRT_zero: - mov r2,#0 - mov r3,#0 +CPRT_flt_zero: + mov r0,#0 mov r4,#0x80000000 - stmia r1,{r2,r3,r4} - mov pc,r14 + stmia r1,{r0,r2,r3,r4} + b fastfpe_next /*---------------------------------------------------------------------------*/ .globl CPRT_fix CPRT_fix: - adr r5,CPRT_back - b CPDO_rnd_core + ldmia r2,{r1,r2,r3,r5} + bl CPDO_rnd_core CPRT_back: add r0,r13,r0,lsr#10 - cmp r3,#0 + cmp r5,#0 blt CPRT_int_zero - cmp r3,#30 + cmp r5,#30 bgt CPRT_overflow - rsb r3,r3,#31 - mov r2,r2,lsr r3 + rsb r5,r5,#31 + mov r2,r2,lsr r5 tst r1,#0x80000000 rsbne r2,r2,#0 str r2,[r0] - mov pc,r14 + b fastfpe_next CPRT_int_zero: mov r2,#0 str r2,[r0] - mov pc,r14 + b fastfpe_next CPRT_overflow: mov r2,#0x80000000 tst r1,#0x80000000 subeq r2,r2,#1 str r2,[r0] - mov pc,r14 + b fastfpe_next /*---------------------------------------------------------------------------*/ .globl CPRT_wfs CPRT_wfs: - mov pc,r14 + b fastfpe_next /*---------------------------------------------------------------------------*/ @@ -89,60 +108,77 @@ add r0,r13,r0,lsr#10 mov r1,#0x02000000 @ Software Emulation, not Acorn FPE str r1,[r0] - mov pc,r14 + b fastfpe_next /*---------------------------------------------------------------------------*/ .globl CPRT_cmf CPRT_cmf: - ldmia r1,{r1,r3,r5} - ldmia r2,{r2,r4,r6} + ldmia r1,{r1,r3,r5,r7} + ldmia r2,{r2,r4,r6,r8} CPRT_cmf_e: ldr r0,[r13,#16*4] - bic r0,r0,#0xf0000000 + cmp r7,#0x7fffffff + bic r0,r0,#0xf0000000 + + cmpeq r3,#0xffffffff + beq CPRT_cmf_unordered + cmp r8,#0x7fffffff + cmpeq r4,#0xffffffff + beq CPRT_cmf_unordered + cmp r1,r2 beq CPRT_cmf_equalsign b CPRT_cmf_sign CPRT_cmf_equalsign: - cmp r5,r6 + cmp r7,r8 beq CPRT_cmf_equalexponent bgt CPRT_cmf_sign b CPRT_cmf_signb CPRT_cmf_equalexponent: cmp r3,r4 + cmpeq r5,r6 beq CPRT_cmf_equal - bgt CPRT_cmf_sign + bhi CPRT_cmf_sign b CPRT_cmf_signb CPRT_cmf_sign: + cmp r7,#0x80000000 @ (0.0 == -0.0)? + cmpeq r7,r8 + beq CPRT_cmf_equal tst r1,#0x80000000 orreq r0,r0,#0x20000000 orrne r0,r0,#0x80000000 str r0,[r13,#16*4] - mov pc,r14 + b fastfpe_next CPRT_cmf_signb: tst r1,#0x80000000 orrne r0,r0,#0x20000000 orreq r0,r0,#0x80000000 str r0,[r13,#16*4] - mov pc,r14 + b fastfpe_next CPRT_cmf_equal: orr r0,r0,#0x60000000 str r0,[r13,#16*4] - mov pc,r14 + b fastfpe_next + +CPRT_cmf_unordered: + orr r0,r0,#0x10000000 + str r0,[r13,#16*4] + b fastfpe_next /*---------------------------------------------------------------------------*/ .globl CPRT_cnf CPRT_cnf: - ldmia r1,{r1,r3,r5} - ldmia r2,{r2,r4,r6} + ldmia r1,{r1,r3,r5,r7} + ldmia r2,{r2,r4,r6,r8} eor r2,r2,#0x80000000 b CPRT_cmf_e diff -urP linux/arch/arm/fastfpe/Makefile linuxa300/arch/arm/fastfpe/Makefile --- linux/arch/arm/fastfpe/Makefile 2002-05-15 21:37:41.000000000 +0900 +++ linuxa300/arch/arm/fastfpe/Makefile 2004-08-21 09:48:06.000000000 +0900 @@ -17,6 +17,8 @@ obj-$(CONFIG_FPE_FASTFPE) += fastfpe.o +USE_STANDARD_AS_RULE := true + include $(TOPDIR)/Rules.make fastfpe.o: $(fastfpe-objs) linux/arch/arm/fastfpeだけに発見: config.h diff -urP linux/arch/arm/fastfpe/entry.S linuxa300/arch/arm/fastfpe/entry.S --- linux/arch/arm/fastfpe/entry.S 2002-05-15 21:37:41.000000000 +0900 +++ linuxa300/arch/arm/fastfpe/entry.S 2004-08-21 09:48:06.000000000 +0900 @@ -13,14 +13,14 @@ .data fp_const: - .word 0, 0x00000000, 0x80000000, 0 @ 0 - .word 0, 0x80000000, 0, 0 @ 1 - .word 0, 0x80000000, 1, 0 @ 2 - .word 0, 0xc0000000, 1, 0 @ 3 - .word 0, 0x80000000, 2, 0 @ 4 - .word 0, 0xa0000000, 2, 0 @ 5 - .word 0, 0x80000000, -1, 0 @ 0.5 - .word 0, 0xa0000000, 3, 0 @ 10 + .word 0, 0x00000000, 0, 0x80000000 @ 0 + .word 0, 0x80000000, 0, 0 @ 1 + .word 0, 0x80000000, 0, 1 @ 2 + .word 0, 0xc0000000, 0, 1 @ 3 + .word 0, 0x80000000, 0, 2 @ 4 + .word 0, 0xa0000000, 0, 2 @ 5 + .word 0, 0x80000000, 0, -1 @ 0.5 + .word 0, 0xa0000000, 0, 3 @ 10 fp_undef: .word 0 fp_cond: @@ -76,7 +76,8 @@ /*---------------------------------------------------------------------------*/ -finish: + .globl fastfpe_next +fastfpe_next: ldr r5,[r13,#60] next_after_cond: __x1: @@ -123,6 +124,9 @@ subeq r7,r6,r7,lsl#2 @ r6=base address +/- offset tst r4,#0x01000000 @ preindexing ? movne r6,r7 + tst r4,#0x00200000 @ write back ? + cmpne r5,#0x000f0000 @ base register = pc ? + strne r7,[r13,r5,lsr#14] and r0,r4,#0x00007000 @ r0=fp register number << 12 add r0,r10,r0,lsr#8 @ r0=address of fp register @@ -134,26 +138,17 @@ tst r4,#0x00100000 orrne r1,r1,#4 @ L/S - adr r14,CPDT_1_writeback @ for being able to "call" something add pc,pc,r1,lsl#2 mov r0,r0 b CPDT_store_single @ these functions get b CPDT_store_double @ r0=address of fp register b CPDT_store_extended @ r6=address of data - b undefined @ CPDT_store_decimal @ and may modify r0-r3 + b undefined @ CPDT_store_decimal b CPDT_load_single b CPDT_load_double b CPDT_load_extended b undefined @ CPDT_load_decimal -CPDT_1_writeback: - tst r4,#0x00200000 @ write back ? - cmpne r5,#0x000f0000 @ base register = pc ? - beq finish - - str r7,[r13,r5,lsr#14] - b finish - /*---------------------------------------------------------------------------*/ CPDT_M_enter: @@ -168,6 +163,9 @@ subeq r7,r6,r7,lsl#2 @ r7=base address +/- offset tst r4,#0x01000000 @ preindexing ? movne r6,r7 + tst r4,#0x00200000 @ write back ? + cmpne r5,#0x000f0000 @ base register = pc ? + strne r7,[r13,r5,lsr#14] and r0,r4,#0x00007000 @ r0=fp register number << 12 and r1,r4,#0x00008000 @@ -176,19 +174,10 @@ orrs r1,r1,r2,lsr#21 @ N1 addeq r1,r1,#4 @ r1=register count - adr r14,CPDT_M_writeback @ for being able to "call" something tst r4,#0x00100000 @ load/store beq CPDT_sfm b CPDT_lfm -CPDT_M_writeback: - tst r4,#0x00200000 @ write back ? - cmpne r5,#0x000f0000 @ base register = pc ? - beq finish - - str r7,[r13,r5,lsr#14] - b finish - /*---------------------------------------------------------------------------*/ CPDO_CPRT_enter: @@ -209,7 +198,6 @@ tst r4,#0x00008000 orrne r3,r3,#0x01000000 - adr r14,finish @ call return address add pc,pc,r3,lsr#18 mov r0,r0 b CPDO_adf @@ -243,7 +231,7 @@ b undefined b undefined b CPDO_rnd - b finish + b fastfpe_next CPDO_const: ldr r2,=fp_const @@ -265,7 +253,6 @@ CPRT_constback: and r3,r4,#0x00f00000 - adr r14,finish @ call return address add pc,pc,r3,lsr#18 mov r0,r0 b CPRT_flt @@ -306,4 +293,3 @@ .previous /*---------------------------------------------------------------------------*/ - diff -urP linux/arch/arm/fastfpe/module.c linuxa300/arch/arm/fastfpe/module.c --- linux/arch/arm/fastfpe/module.c 2002-05-15 21:37:41.000000000 +0900 +++ linuxa300/arch/arm/fastfpe/module.c 2004-08-21 09:48:06.000000000 +0900 @@ -1,6 +1,6 @@ /* Fast Floating Point Emulator - (c) Peter Teichmann + (c) Peter Teichmann This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -56,7 +56,7 @@ return 0; #endif - printk("Fast Floating Point Emulator V0.0 (c) Peter Teichmann.\n"); + printk("Fast Floating Point Emulator V0.9 (c) Peter Teichmann.\n"); /* Save pointer to the old FP handler and then patch ourselves in */ orig_fp_enter = kern_fp_enter; @@ -74,5 +74,5 @@ module_init(fpe_init); module_exit(fpe_exit); -MODULE_AUTHOR("Peter Teichmann "); -MODULE_DESCRIPTION("Fast floating point emulator"); +MODULE_AUTHOR("Peter Teichmann "); +MODULE_DESCRIPTION("Fast floating point emulator with full precision");