diff -Nru linux_v18c/arch/arm/nwfpe/ARM-gcc.h linux/arch/arm/nwfpe/ARM-gcc.h
--- linux_v18c/arch/arm/nwfpe/ARM-gcc.h	2006-04-08 17:52:56.000000000 +0900
+++ linux/arch/arm/nwfpe/ARM-gcc.h	2006-03-29 10:59:57.000000000 +0900
@@ -16,7 +16,7 @@
 to the same as `int'.
 -------------------------------------------------------------------------------
 */
-typedef char flag;
+typedef int flag;
 typedef unsigned char uint8;
 typedef signed char int8;
 typedef int uint16;
@@ -70,51 +70,3 @@
 */
 #define INLINE extern __inline__
 
-
-/* For use as a GCC soft-float library we need some special function names. */
-
-#ifdef __LIBFLOAT__
-
-/* Some 32-bit ops can be mapped straight across by just changing the name. */
-#define float32_add			__addsf3
-#define float32_sub			__subsf3
-#define float32_mul			__mulsf3
-#define float32_div			__divsf3
-#define int32_to_float32		__floatsisf
-#define float32_to_int32_round_to_zero	__fixsfsi
-#define float32_to_uint32_round_to_zero	__fixunssfsi
-
-/* These ones go through the glue code.  To avoid namespace pollution
-   we rename the internal functions too.  */
-#define float32_eq			___float32_eq
-#define float32_le			___float32_le
-#define float32_lt			___float32_lt
-
-/* All the 64-bit ops have to go through the glue, so we pull the same
-   trick.  */
-#define float64_add			___float64_add
-#define float64_sub			___float64_sub
-#define float64_mul			___float64_mul
-#define float64_div			___float64_div
-#define int32_to_float64		___int32_to_float64
-#define float64_to_int32_round_to_zero	___float64_to_int32_round_to_zero
-#define float64_to_uint32_round_to_zero	___float64_to_uint32_round_to_zero
-#define float64_to_float32		___float64_to_float32
-#define float32_to_float64		___float32_to_float64
-#define float64_eq			___float64_eq
-#define float64_le			___float64_le
-#define float64_lt			___float64_lt
-
-#if 0
-#define float64_add			__adddf3
-#define float64_sub			__subdf3
-#define float64_mul			__muldf3
-#define float64_div			__divdf3
-#define int32_to_float64		__floatsidf
-#define float64_to_int32_round_to_zero	__fixdfsi
-#define float64_to_uint32_round_to_zero	__fixunsdfsi
-#define float64_to_float32		__truncdfsf2
-#define float32_to_float64		__extendsfdf2
-#endif
-
-#endif
diff -Nru linux_v18c/arch/arm/nwfpe/Makefile linux/arch/arm/nwfpe/Makefile
--- linux_v18c/arch/arm/nwfpe/Makefile	2006-04-08 17:52:56.000000000 +0900
+++ linux/arch/arm/nwfpe/Makefile	2006-04-08 14:50:43.000000000 +0900
@@ -17,12 +17,13 @@
 obj-$(CONFIG_FPE_NWFPE)	+= nwfpe.o
 
 nwfpe-objs		:= fpa11.o fpa11_cpdo.o fpa11_cpdt.o fpa11_cprt.o \
-			   fpmodule.o fpopcode.o softfloat.o \
-			   single_cpdo.o double_cpdo.o
+			   fpmodule.o fpopcode.o softfloat.o uaccsess_nwfpe.o
 
-ifeq ($(CONFIG_FPE_NWFPE_XP),y)
-nwfpe-objs		+= extended_cpdo.o
-endif
+CFLAGS_fpa11.o		:= -fomit-frame-pointer
+CFLAGS_fpa11_cpdo.o	:= -fomit-frame-pointer
+CFLAGS_fpa11_cpdt.o	:= -fomit-frame-pointer
+CFLAGS_fpa11_cprt.o	:= -fomit-frame-pointer
+CFLAGS_softfloat.o	:= -fomit-frame-pointer
 
 ifeq ($(CONFIG_CPU_26),y)
 nwfpe-objs		+= entry26.o
diff -Nru linux_v18c/arch/arm/nwfpe/double_cpdo.c linux/arch/arm/nwfpe/double_cpdo.c
--- linux_v18c/arch/arm/nwfpe/double_cpdo.c	2006-04-08 17:52:56.000000000 +0900
+++ linux/arch/arm/nwfpe/double_cpdo.c	2006-04-08 00:03:35.000000000 +0900
@@ -19,10 +19,6 @@
     Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
 */
 
-#include "fpa11.h"
-#include "softfloat.h"
-#include "fpopcode.h"
-
 union float64_components {
 	float64 f64;
 	unsigned int i[2];
@@ -40,37 +36,22 @@
 float64 float64_pow(float64 rFn, float64 rFm);
 float64 float64_pol(float64 rFn, float64 rFm);
 
-static float64 float64_rsf(float64 rFn, float64 rFm)
+static inline float64 float64_rsf(float64 rFn, float64 rFm)
 {
 	return float64_sub(rFm, rFn);
 }
 
-static float64 float64_rdv(float64 rFn, float64 rFm)
+static inline float64 float64_rdv(float64 rFn, float64 rFm)
 {
 	return float64_div(rFm, rFn);
 }
 
-static float64 (*const dyadic_double[16])(float64 rFn, float64 rFm) = {
-	[ADF_CODE >> 20] = float64_add,
-	[MUF_CODE >> 20] = float64_mul,
-	[SUF_CODE >> 20] = float64_sub,
-	[RSF_CODE >> 20] = float64_rsf,
-	[DVF_CODE >> 20] = float64_div,
-	[RDF_CODE >> 20] = float64_rdv,
-	[RMF_CODE >> 20] = float64_rem,
-
-	/* strictly, these opcodes should not be implemented */
-	[FML_CODE >> 20] = float64_mul,
-	[FDV_CODE >> 20] = float64_div,
-	[FRD_CODE >> 20] = float64_rdv,
-};
-
-static float64 float64_mvf(float64 rFm)
+static inline float64 float64_mvf(float64 rFm)
 {
 	return rFm;
 }
 
-static float64 float64_mnf(float64 rFm)
+static inline float64 float64_mnf(float64 rFm)
 {
 	union float64_components u;
 
@@ -80,7 +61,7 @@
 	return u.f64;
 }
 
-static float64 float64_abs(float64 rFm)
+static inline float64 float64_abs(float64 rFm)
 {
 	union float64_components u;
 
@@ -90,19 +71,8 @@
 	return u.f64;
 }
 
-static float64 (*const monadic_double[16])(float64 rFm) = {
-	[MVF_CODE >> 20] = float64_mvf,
-	[MNF_CODE >> 20] = float64_mnf,
-	[ABS_CODE >> 20] = float64_abs,
-	[RND_CODE >> 20] = float64_round_to_int,
-	[URD_CODE >> 20] = float64_round_to_int,
-	[SQT_CODE >> 20] = float64_sqrt,
-	[NRM_CODE >> 20] = float64_mvf,
-};
-
-unsigned int DoubleCPDO(const unsigned int opcode, FPREG * rFd)
+static inline unsigned int DoubleCPDO(FPA11 *fpa11, const unsigned int opcode, FPREG * rFd, const unsigned int nDest)
 {
-	FPA11 *fpa11 = GET_FPA11();
 	float64 rFm;
 	unsigned int Fm, opc_mask_shift;
 
@@ -124,7 +94,7 @@
 		}
 	}
 
-	opc_mask_shift = (opcode & MASK_ARITHMETIC_OPCODE) >> 20;
+	opc_mask_shift = (opcode >> 20) & (MASK_ARITHMETIC_OPCODE >> 20);
 	if (!MONADIC_INSTRUCTION(opcode)) {
 		unsigned int Fn = getFn(opcode);
 		float64 rFn;
@@ -142,18 +112,104 @@
 			return 0;
 		}
 
-		if (dyadic_double[opc_mask_shift]) {
-			rFd->fDouble = dyadic_double[opc_mask_shift](rFn, rFm);
-		} else {
+		switch (opc_mask_shift) {
+		case ADF_CODE >> 20:
+			rFd->fDouble = float64_add(rFn, rFm);
+			break;
+
+		case MUF_CODE >> 20:
+			rFd->fDouble = float64_mul(rFn, rFm);
+			break;
+
+		case SUF_CODE >> 20:
+			rFd->fDouble = float64_sub(rFn, rFm);
+			break;
+
+		case RSF_CODE >> 20:
+			rFd->fDouble = float64_rsf(rFn, rFm);
+			break;
+
+		case DVF_CODE >> 20:
+			rFd->fDouble = float64_div(rFn, rFm);
+			break;
+
+		case RDF_CODE >> 20:
+			rFd->fDouble = float64_rdv(rFn, rFm);
+			break;
+
+		case RMF_CODE >> 20:
+			rFd->fDouble = float64_rem(rFn, rFm);
+			break;
+
+		case FML_CODE >> 20:
+			rFd->fDouble = float64_mul(rFn, rFm);
+			break;
+
+		case FDV_CODE >> 20:
+			rFd->fDouble = float64_div(rFn, rFm);
+			break;
+
+		case FRD_CODE >> 20:
+			rFd->fDouble = float64_rdv(rFn, rFm);
+			break;
+
+		default:
 			return 0;
 		}
 	} else {
-		if (monadic_double[opc_mask_shift]) {
-			rFd->fDouble = monadic_double[opc_mask_shift](rFm);
-		} else {
+		switch (opc_mask_shift) {
+		case MVF_CODE >> 20:
+			rFd->fDouble = float64_mvf(rFm);
+			break;
+
+		case MNF_CODE >> 20:
+			rFd->fDouble = float64_mnf(rFm);
+			break;
+
+		case ABS_CODE >> 20:
+			rFd->fDouble = float64_abs(rFm);
+			break;
+
+		case RND_CODE >> 20:
+			rFd->fDouble = float64_round_to_int(rFm);
+			break;
+
+		case URD_CODE >> 20:
+			rFd->fDouble = float64_round_to_int(rFm);
+			break;
+
+		case SQT_CODE >> 20:
+			rFd->fDouble = float64_sqrt(rFm);
+			break;
+
+		case NRM_CODE >> 20:
+			rFd->fDouble = float64_mvf(rFm);
+			break;
+
+		default:
 			return 0;
 		}
 	}
 
+	/* The CPDO functions used to always set the destination type
+	   to be the same as their working size. */
+
+	/* If the operation succeeded, check to see if the result in the
+	   destination register is the correct size.  If not force it
+	   to be. */
+
+	fpa11->fType[getFd(opcode)] = nDest;
+
+#ifdef CONFIG_FPE_NWFPE_XP
+	if (nDest != typeDouble) {
+		if (nDest == typeSingle)
+			rFd->fSingle = float64_to_float32(rFd->fDouble);
+		else
+			rFd->fExtended = float64_to_floatx80(rFd->fDouble);
+	}
+#else
+	if (nDest != typeDouble)
+		rFd->fSingle = float64_to_float32(rFd->fDouble);
+#endif
 	return 1;
 }
diff -Nru linux_v18c/arch/arm/nwfpe/entry.S linux/arch/arm/nwfpe/entry.S
--- linux_v18c/arch/arm/nwfpe/entry.S	2006-04-08 17:52:56.000000000 +0900
+++ linux/arch/arm/nwfpe/entry.S	2006-04-06 09:13:25.000000000 +0900
@@ -68,45 +68,78 @@
 instructions to allow the emulator to spread the cost of the trap over
 several floating point instructions.  */
 
+	.text
 	.globl	nwfpe_enter
 nwfpe_enter:
 	mov	r4, lr			@ save the failure-return addresses
-	ldr	ip, [r10, #112]		@ get init_flag
 	mov	sl, sp			@ we access the registers via 'sl'
 
 	ldr	r5, [sp, #60]		@ get contents of PC;
-	cmp	ip, #0
-	bleq	nwfpe_init_fpa
 	sub	r8, r5, #4
 .Lx1:	ldrt	r0, [r8]		@ get actual instruction into r0
 emulate:
-	bl	EmulateAll		@ emulate the instruction
+	and	r3, r0, #0xF00
+	cmp	r3, #0x200
+	cmpne	r3, #0x100
+	movne	pc, r4			@ no, reurn failure
+	and	r3, r0, #0x0E000000
+	cmp	r3, #0x0E000000
+	bne	emulate_cpdt
+	tst	r0, #0x10
+	adr	lr, emulate_end
+	beq	EmulateCPDO
+	b	EmulateCPRT
+
+emulate_cpdt:
+	cmp	r3, #0x0C000000
+	movne	pc, r4			@ no, return failure
+	bl	EmulateCPDT
+
+emulate_end:
 	cmp	r0, #0			@ was emulation successful
 	moveq	pc, r4			@ no, return failure
 
 next:
 .Lx2:	ldrt	r6, [r5], #4		@ get the next instruction and
 					@ increment PC
-
-	and	r2, r6, #0x0F000000	@ test for FP insns
+	and	r2, r6, #0x0E000000	@ test for FP insns
 	teq	r2, #0x0C000000
-	teqne	r2, #0x0D000000
 	teqne	r2, #0x0E000000
 	movne	pc, r9			@ return ok if not a fp insn
 
 	str	r5, [sp, #60]		@ update PC copy in regs
 
-	mov	r0, r6			@ save a copy
+	adr	r0, fp_cond		@ check condition of next instruction
 	ldr	r1, [sp, #64]		@ fetch the condition codes
-	bl	checkCondition		@ check the condition
-	cmp	r0, #0			@ r0 = 0 ==> condition failed
-
-	@ if condition code failed to match, next insn
+	mov	r2, r6, lsr #28		@ r6 is opcode
+	mov	r1, r1, lsr #28		@ r1 is condition codes
+	ldr	r0, [r0, r2, lsl #2]
+	mov	r0, r0, lsr r1
+	tst	r0, #1
 	beq	next			@ get the next instruction;
 
 	mov	r0, r6			@ prepare for EmulateAll()
 	b	emulate			@ if r0 != 0, goto EmulateAll
 
+	.align	2
+fp_cond:
+	.word	0xf0f0	@ eq
+	.word	0x0f0f	@ ne
+	.word	0xcccc	@ cs
+	.word	0x3333	@ cc
+	.word	0xff00	@ mi
+	.word	0x00ff	@ pl
+	.word	0xaaaa	@ vs
+	.word	0x5555	@ vc
+	.word	0x0c0c	@ hi
+	.word	0xf3f3	@ ls
+	.word	0xaa55	@ ge
+	.word	0x55aa	@ lt
+	.word	0x0a05	@ gt
+	.word	0xf5fa	@ le
+	.word	0xffff	@ al
+	.word	0x0000	@ nv
+	
 	@ We need to be prepared for the instructions at .Lx1 and .Lx2 
 	@ to fault.  Emit the appropriate exception gunk to fix things up.
 	@ ??? For some reason, faults can happen at .Lx2 even with a
diff -Nru linux_v18c/arch/arm/nwfpe/extended_cpdo.c linux/arch/arm/nwfpe/extended_cpdo.c
--- linux_v18c/arch/arm/nwfpe/extended_cpdo.c	2006-04-08 17:52:56.000000000 +0900
+++ linux/arch/arm/nwfpe/extended_cpdo.c	2006-04-08 00:03:53.000000000 +0900
@@ -19,10 +19,6 @@
     Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
 */
 
-#include "fpa11.h"
-#include "softfloat.h"
-#include "fpopcode.h"
-
 floatx80 floatx80_exp(floatx80 Fm);
 floatx80 floatx80_ln(floatx80 Fm);
 floatx80 floatx80_sin(floatx80 rFm);
@@ -87,9 +83,8 @@
 	[NRM_CODE >> 20] = floatx80_mvf,
 };
 
-unsigned int ExtendedCPDO(const unsigned int opcode, FPREG * rFd)
+static inline unsigned int ExtendedCPDO(FPA11 *fpa11, const unsigned int opcode, FPREG * rFd, const unsigned int nDest)
 {
-	FPA11 *fpa11 = GET_FPA11();
 	floatx80 rFm;
 	unsigned int Fm, opc_mask_shift;
 
@@ -150,5 +145,21 @@
 		}
 	}
 
+	/* The CPDO functions used to always set the destination type
+	   to be the same as their working size. */
+
+	/* If the operation succeeded, check to see if the result in the
+	   destination register is the correct size.  If not force it
+	   to be. */
+
+	fpa11->fType[getFd(opcode)] = nDest;
+
+	if (nDest != typeExtended) {
+		if (nDest == typeSingle)
+			rFd->fSingle = floatx80_to_float32(rFd->fExtended);
+		else
+			rFd->fDouble = floatx80_to_float64(rFd->fExtended);
+	}
+
 	return 1;
 }
diff -Nru linux_v18c/arch/arm/nwfpe/fpa11.c linux/arch/arm/nwfpe/fpa11.c
--- linux_v18c/arch/arm/nwfpe/fpa11.c	2006-04-08 17:52:56.000000000 +0900
+++ linux/arch/arm/nwfpe/fpa11.c	2006-03-29 10:59:57.000000000 +0900
@@ -28,16 +28,10 @@
 
 #include <asm/system.h>
 
-/* forward declarations */
-unsigned int EmulateCPDO(const unsigned int);
-unsigned int EmulateCPDT(const unsigned int);
-unsigned int EmulateCPRT(const unsigned int);
-
 /* Reset the FPA11 chip.  Called to initialize and reset the emulator. */
-void resetFPA11(void)
+static void resetFPA11(FPA11 *fpa11)
 {
 	int i;
-	FPA11 *fpa11 = GET_FPA11();
 
 	/* initialize the register type array */
 	for (i = 0; i <= 7; i++) {
@@ -92,40 +86,15 @@
 #endif
 }
 
-void nwfpe_init_fpa(void)
+void nwfpe_init_fpa(union fp_state *fp)
 {
-	FPA11 *fpa11 = GET_FPA11();
+	FPA11 *fpa11 = (FPA11 *)fp;
 #ifdef NWFPE_DEBUG
 	printk("NWFPE: setting up state.\n");
 #endif
-	resetFPA11();
+ 	memset(fpa11, 0, sizeof(FPA11));
+	resetFPA11(fpa11);
 	SetRoundingMode(ROUND_TO_NEAREST);
 	SetRoundingPrecision(ROUND_EXTENDED);
 	fpa11->initflag = 1;
 }
-
-/* Emulate the instruction in the opcode. */
-unsigned int EmulateAll(const unsigned int opcode)
-{
-#ifdef NWFPE_DEBUG
-	printk("NWFPE: emulating opcode %08x\n", opcode);
-#endif
-
-	if (TEST_OPCODE(opcode, MASK_CPRT)) {
-		/* Emulate conversion opcodes. */
-		/* Emulate register transfer opcodes. */
-		/* Emulate comparison opcodes. */
-		return EmulateCPRT(opcode);
-	} else if (TEST_OPCODE(opcode, MASK_CPDO)) {
-		/* Emulate monadic arithmetic opcodes. */
-		/* Emulate dyadic arithmetic opcodes. */
-		return EmulateCPDO(opcode);
-	} else if (TEST_OPCODE(opcode, MASK_CPDT)) {
-		/* Emulate load/store opcodes. */
-		/* Emulate load/store multiple opcodes. */
-		return EmulateCPDT(opcode);
-	}
-
-	/* Invalid instruction detected.  Return FALSE. */
-	return 0;
-}
diff -Nru linux_v18c/arch/arm/nwfpe/fpa11.h linux/arch/arm/nwfpe/fpa11.h
--- linux_v18c/arch/arm/nwfpe/fpa11.h	2006-04-08 17:52:56.000000000 +0900
+++ linux/arch/arm/nwfpe/fpa11.h	2006-04-08 15:04:37.000000000 +0900
@@ -86,7 +86,6 @@
 				   initialised. */
 } FPA11;
 
-extern void resetFPA11(void);
 extern void SetRoundingMode(const unsigned int);
 extern void SetRoundingPrecision(const unsigned int);
 
diff -Nru linux_v18c/arch/arm/nwfpe/fpa11_cpdo.c linux/arch/arm/nwfpe/fpa11_cpdo.c
--- linux_v18c/arch/arm/nwfpe/fpa11_cpdo.c	2006-04-08 17:52:56.000000000 +0900
+++ linux/arch/arm/nwfpe/fpa11_cpdo.c	2006-03-29 10:59:57.000000000 +0900
@@ -22,16 +22,19 @@
 
 #include "fpa11.h"
 #include "fpopcode.h"
+#include "softfloat.h"
 
-unsigned int SingleCPDO(const unsigned int opcode, FPREG * rFd);
-unsigned int DoubleCPDO(const unsigned int opcode, FPREG * rFd);
-unsigned int ExtendedCPDO(const unsigned int opcode, FPREG * rFd);
+#include "single_cpdo.c"
+#include "double_cpdo.c"
+#ifdef CONFIG_FPE_NWFPE_XP
+#include "extended_cpdo.c"
+#endif
 
 unsigned int EmulateCPDO(const unsigned int opcode)
 {
 	FPA11 *fpa11 = GET_FPA11();
 	FPREG *rFd;
-	unsigned int nType, nDest, nRc;
+	unsigned int nType, nDest, Fm;
 
 	/* Get the destination size.  If not valid let Linux perform
 	   an invalid instruction trap. */
@@ -52,7 +55,7 @@
 		nType = fpa11->fType[getFn(opcode)];
 
 	if (!CONSTANT_FM(opcode)) {
-		register unsigned int Fm = getFm(opcode);
+		Fm = getFm(opcode);
 		if (nType < fpa11->fType[Fm]) {
 			nType = fpa11->fType[Fm];
 		}
@@ -62,70 +65,17 @@
 
 	switch (nType) {
 	case typeSingle:
-		nRc = SingleCPDO(opcode, rFd);
-		break;
+		return SingleCPDO(fpa11, opcode, rFd, nDest);
+
 	case typeDouble:
-		nRc = DoubleCPDO(opcode, rFd);
-		break;
+		return DoubleCPDO(fpa11, opcode, rFd, nDest);
+
 #ifdef CONFIG_FPE_NWFPE_XP
 	case typeExtended:
-		nRc = ExtendedCPDO(opcode, rFd);
-		break;
+		return ExtendedCPDO(fpa11, opcode, rFd, nDest);
 #endif
-	default:
-		nRc = 0;
-	}
-
-	/* The CPDO functions used to always set the destination type
-	   to be the same as their working size. */
-
-	if (nRc != 0) {
-		/* If the operation succeeded, check to see if the result in the
-		   destination register is the correct size.  If not force it
-		   to be. */
 
-		fpa11->fType[getFd(opcode)] = nDest;
-
-#ifdef CONFIG_FPE_NWFPE_XP
-		if (nDest != nType) {
-			switch (nDest) {
-			case typeSingle:
-				{
-					if (typeDouble == nType)
-						rFd->fSingle = float64_to_float32(rFd->fDouble);
-					else
-						rFd->fSingle = floatx80_to_float32(rFd->fExtended);
-				}
-				break;
-
-			case typeDouble:
-				{
-					if (typeSingle == nType)
-						rFd->fDouble = float32_to_float64(rFd->fSingle);
-					else
-						rFd->fDouble = floatx80_to_float64(rFd->fExtended);
-				}
-				break;
-
-			case typeExtended:
-				{
-					if (typeSingle == nType)
-						rFd->fExtended = float32_to_floatx80(rFd->fSingle);
-					else
-						rFd->fExtended = float64_to_floatx80(rFd->fDouble);
-				}
-				break;
-			}
-		}
-#else
-		if (nDest != nType) {
-			if (nDest == typeSingle)
-				rFd->fSingle = float64_to_float32(rFd->fDouble);
-			else
-				rFd->fDouble = float32_to_float64(rFd->fSingle);
-		}
-#endif
+	default:
+		return 0;
 	}
-
-	return nRc;
 }
diff -Nru linux_v18c/arch/arm/nwfpe/fpa11_cpdt.c linux/arch/arm/nwfpe/fpa11_cpdt.c
--- linux_v18c/arch/arm/nwfpe/fpa11_cpdt.c	2006-04-08 17:52:56.000000000 +0900
+++ linux/arch/arm/nwfpe/fpa11_cpdt.c	2006-04-08 15:53:01.000000000 +0900
@@ -26,13 +26,41 @@
 #include "fpmodule.h"
 #include "fpmodule.inl"
 
-#include <asm/uaccess.h>
+int get_user_nwfpe(unsigned int, unsigned int *);
+int put_user_nwfpe(unsigned int, unsigned int *);
+
+#define get_user_nwfpe(x, p)					\
+({								\
+	const register unsigned int *__p asm("r0") = (p);	\
+	register unsigned int __r1 asm("r1");			\
+	register int __e asm("r0");				\
+	__asm__ __volatile__ (					\
+		"bl __get_user_nwfpe"				\
+		: "=&r"(__e), "=r"(__r1)			\
+		: "0"(__p)					\
+		: "lr", "cc");					\
+	x = __r1;						\
+	__e;							\
+})
+
+#define put_user_nwfpe(x, p)					\
+({								\
+	const register typeof(*(p)) __r1 asm("r1") = (x);	\
+	const register typeof(*(p)) *__p asm("r0") = (p);	\
+	register int __e asm("r0");				\
+	__asm__ __volatile__ (					\
+		"bl	__put_user_nwfpe"			\
+		: "=&r" (__e)					\
+		: "0" (__p), "r" (__r1)				\
+		: "lr", "cc");					\
+	__e;							\
+})
 
 static inline void loadSingle(const unsigned int Fn, const unsigned int *pMem)
 {
 	FPA11 *fpa11 = GET_FPA11();
 	fpa11->fType[Fn] = typeSingle;
-	get_user(fpa11->fpreg[Fn].fSingle, pMem);
+	get_user_nwfpe(fpa11->fpreg[Fn].fSingle, pMem);
 }
 
 static inline void loadDouble(const unsigned int Fn, const unsigned int *pMem)
@@ -41,8 +69,8 @@
 	unsigned int *p;
 	p = (unsigned int *) &fpa11->fpreg[Fn].fDouble;
 	fpa11->fType[Fn] = typeDouble;
-	get_user(p[0], &pMem[1]);
-	get_user(p[1], &pMem[0]);	/* sign & exponent */
+	get_user_nwfpe(p[0], &pMem[1]);
+	get_user_nwfpe(p[1], &pMem[0]);	/* sign & exponent */
 }
 
 #ifdef CONFIG_FPE_NWFPE_XP
@@ -52,9 +80,9 @@
 	unsigned int *p;
 	p = (unsigned int *) &fpa11->fpreg[Fn].fExtended;
 	fpa11->fType[Fn] = typeExtended;
-	get_user(p[0], &pMem[0]);	/* sign & exponent */
-	get_user(p[1], &pMem[2]);	/* ls bits */
-	get_user(p[2], &pMem[1]);	/* ms bits */
+	get_user_nwfpe(p[0], &pMem[0]);	/* sign & exponent */
+	get_user_nwfpe(p[1], &pMem[2]);	/* ls bits */
+	get_user_nwfpe(p[2], &pMem[1]);	/* ms bits */
 }
 #endif
 
@@ -65,26 +93,22 @@
 	unsigned long x;
 
 	p = (unsigned int *) &(fpa11->fpreg[Fn]);
-	get_user(x, &pMem[0]);
+	get_user_nwfpe(x, &pMem[0]);
 	fpa11->fType[Fn] = (x >> 14) & 0x00000003;
 
 	switch (fpa11->fType[Fn]) {
 	case typeSingle:
 	case typeDouble:
-		{
-			get_user(p[0], &pMem[2]);	/* Single */
-			get_user(p[1], &pMem[1]);	/* double msw */
-			p[2] = 0;			/* empty */
-		}
+		get_user_nwfpe(p[0], &pMem[2]);	/* Single */
+		get_user_nwfpe(p[1], &pMem[1]);	/* double msw */
+		p[2] = 0;			/* empty */
 		break;
 
 #ifdef CONFIG_FPE_NWFPE_XP
 	case typeExtended:
-		{
-			get_user(p[1], &pMem[2]);
-			get_user(p[2], &pMem[1]);	/* msw */
-			p[0] = (x & 0x80003fff);
-		}
+		get_user_nwfpe(p[1], &pMem[2]);
+		get_user_nwfpe(p[2], &pMem[1]);	/* msw */
+		p[0] = (x & 0x80003fff);
 		break;
 #endif
 	}
@@ -111,7 +135,7 @@
 		val = fpa11->fpreg[Fn].fSingle;
 	}
 
-	put_user(p[0], pMem);
+	put_user_nwfpe(p[0], pMem);
 }
 
 static inline void storeDouble(const unsigned int Fn, unsigned int *pMem)
@@ -134,8 +158,8 @@
 	default:
 		val = fpa11->fpreg[Fn].fDouble;
 	}
-	put_user(p[1], &pMem[0]);	/* msw */
-	put_user(p[0], &pMem[1]);	/* lsw */
+	put_user_nwfpe(p[1], &pMem[0]);	/* msw */
+	put_user_nwfpe(p[0], &pMem[1]);	/* lsw */
 }
 
 #ifdef CONFIG_FPE_NWFPE_XP
@@ -158,9 +182,9 @@
 		val = fpa11->fpreg[Fn].fExtended;
 	}
 
-	put_user(p[0], &pMem[0]);	/* sign & exp */
-	put_user(p[1], &pMem[2]);
-	put_user(p[2], &pMem[1]);	/* msw */
+	put_user_nwfpe(p[0], &pMem[0]);	/* sign & exp */
+	put_user_nwfpe(p[1], &pMem[2]);
+	put_user_nwfpe(p[2], &pMem[1]);	/* msw */
 }
 #endif
 
@@ -175,28 +199,24 @@
 	switch (nType) {
 	case typeSingle:
 	case typeDouble:
-		{
-			put_user(p[0], &pMem[2]);	/* single */
-			put_user(p[1], &pMem[1]);	/* double msw */
-			put_user(nType << 14, &pMem[0]);
-		}
+		put_user_nwfpe(p[0], &pMem[2]);	/* single */
+		put_user_nwfpe(p[1], &pMem[1]);	/* double msw */
+		put_user_nwfpe(nType << 14, &pMem[0]);
 		break;
 
 #ifdef CONFIG_FPE_NWFPE_XP
 	case typeExtended:
-		{
-			put_user(p[2], &pMem[1]);	/* msw */
-			put_user(p[1], &pMem[2]);
-			put_user((p[0] & 0x80003fff) | (nType << 14), &pMem[0]);
-		}
+		put_user_nwfpe(p[2], &pMem[1]);	/* msw */
+		put_user_nwfpe(p[1], &pMem[2]);
+		put_user_nwfpe((p[0] & 0x80003fff) | (nType << 14), &pMem[0]);
 		break;
 #endif
 	}
 }
 
-unsigned int PerformLDF(const unsigned int opcode)
+static /*inline*/ unsigned int PerformLDF(const unsigned int opcode)
 {
-	unsigned int *pBase, *pAddress, *pFinal, nRc = 1,
+	unsigned int i, *pBase, *pAddress, *pFinal, nRc = 1,
 	    write_back = WRITE_BACK(opcode);
 
 	pBase = (unsigned int *) readRegister(getRn(opcode));
@@ -206,16 +226,18 @@
 	}
 
 	pFinal = pBase;
+	i = getOffset(opcode);
 	if (BIT_UP_SET(opcode))
-		pFinal += getOffset(opcode);
+		pFinal += i;
 	else
-		pFinal -= getOffset(opcode);
+		pFinal -= i;
 
 	if (PREINDEXED(opcode))
 		pAddress = pFinal;
 	else
 		pAddress = pBase;
 
+#ifdef CONFIG_FPE_NWFPE_XP
 	switch (opcode & MASK_TRANSFER_LENGTH) {
 	case TRANSFER_SINGLE:
 		loadSingle(getFd(opcode), pAddress);
@@ -223,23 +245,31 @@
 	case TRANSFER_DOUBLE:
 		loadDouble(getFd(opcode), pAddress);
 		break;
-#ifdef CONFIG_FPE_NWFPE_XP
 	case TRANSFER_EXTENDED:
 		loadExtended(getFd(opcode), pAddress);
 		break;
-#endif
 	default:
 		nRc = 0;
 	}
+#else
+	if (opcode & TRANSFER_EXTENDED) {
+		nRc = 0;
+	} else {
+		if (opcode & TRANSFER_DOUBLE)
+			loadDouble(getFd(opcode), pAddress);
+		else
+			loadSingle(getFd(opcode), pAddress);
+	}
+#endif
 
 	if (write_back)
 		writeRegister(getRn(opcode), (unsigned int) pFinal);
 	return nRc;
 }
 
-unsigned int PerformSTF(const unsigned int opcode)
+static /*inline*/ unsigned int PerformSTF(const unsigned int opcode)
 {
-	unsigned int *pBase, *pAddress, *pFinal, nRc = 1,
+	unsigned int i, *pBase, *pAddress, *pFinal, nRc = 1,
 	    write_back = WRITE_BACK(opcode);
 
 	SetRoundingMode(ROUND_TO_NEAREST);
@@ -251,16 +281,18 @@
 	}
 
 	pFinal = pBase;
+	i = getOffset(opcode);
 	if (BIT_UP_SET(opcode))
-		pFinal += getOffset(opcode);
+		pFinal += i;
 	else
-		pFinal -= getOffset(opcode);
+		pFinal -= i;
 
 	if (PREINDEXED(opcode))
 		pAddress = pFinal;
 	else
 		pAddress = pBase;
 
+#ifdef CONFIG_FPE_NWFPE_XP
 	switch (opcode & MASK_TRANSFER_LENGTH) {
 	case TRANSFER_SINGLE:
 		storeSingle(getFd(opcode), pAddress);
@@ -268,21 +300,29 @@
 	case TRANSFER_DOUBLE:
 		storeDouble(getFd(opcode), pAddress);
 		break;
-#ifdef CONFIG_FPE_NWFPE_XP
 	case TRANSFER_EXTENDED:
 		storeExtended(getFd(opcode), pAddress);
 		break;
-#endif
 	default:
 		nRc = 0;
 	}
+#else
+	if (opcode & TRANSFER_EXTENDED) {
+		nRc = 0;
+	} else {
+		if (opcode & TRANSFER_DOUBLE)
+			storeDouble(getFd(opcode), pAddress);
+		else
+			storeSingle(getFd(opcode), pAddress);
+	}
+#endif
 
 	if (write_back)
 		writeRegister(getRn(opcode), (unsigned int) pFinal);
 	return nRc;
 }
 
-unsigned int PerformLFM(const unsigned int opcode)
+static /*inline*/ unsigned int PerformLFM(const unsigned int opcode)
 {
 	unsigned int i, Fd, *pBase, *pAddress, *pFinal,
 	    write_back = WRITE_BACK(opcode);
@@ -294,10 +334,11 @@
 	}
 
 	pFinal = pBase;
+	i = getOffset(opcode);
 	if (BIT_UP_SET(opcode))
-		pFinal += getOffset(opcode);
+		pFinal += i;
 	else
-		pFinal -= getOffset(opcode);
+		pFinal -= i;
 
 	if (PREINDEXED(opcode))
 		pAddress = pFinal;
@@ -308,9 +349,7 @@
 	for (i = getRegisterCount(opcode); i > 0; i--) {
 		loadMultiple(Fd, pAddress);
 		pAddress += 3;
-		Fd++;
-		if (Fd == 8)
-			Fd = 0;
+		Fd = (++Fd & 7);
 	}
 
 	if (write_back)
@@ -318,7 +357,7 @@
 	return 1;
 }
 
-unsigned int PerformSFM(const unsigned int opcode)
+static /*inline*/ unsigned int PerformSFM(const unsigned int opcode)
 {
 	unsigned int i, Fd, *pBase, *pAddress, *pFinal,
 	    write_back = WRITE_BACK(opcode);
@@ -330,10 +369,11 @@
 	}
 
 	pFinal = pBase;
+	i = getOffset(opcode);
 	if (BIT_UP_SET(opcode))
-		pFinal += getOffset(opcode);
+		pFinal += i;
 	else
-		pFinal -= getOffset(opcode);
+		pFinal -= i;
 
 	if (PREINDEXED(opcode))
 		pAddress = pFinal;
@@ -344,9 +384,7 @@
 	for (i = getRegisterCount(opcode); i > 0; i--) {
 		storeMultiple(Fd, pAddress);
 		pAddress += 3;
-		Fd++;
-		if (Fd == 8)
-			Fd = 0;
+		Fd = (++Fd & 7);
 	}
 
 	if (write_back)
@@ -356,19 +394,15 @@
 
 unsigned int EmulateCPDT(const unsigned int opcode)
 {
-	unsigned int nRc = 0;
-
-	if (LDF_OP(opcode)) {
-		nRc = PerformLDF(opcode);
-	} else if (LFM_OP(opcode)) {
-		nRc = PerformLFM(opcode);
-	} else if (STF_OP(opcode)) {
-		nRc = PerformSTF(opcode);
-	} else if (SFM_OP(opcode)) {
-		nRc = PerformSFM(opcode);
+	if (getCoprocessorNumber(opcode) == 1) {
+		if (opcode & BIT_LOAD)
+			return PerformLDF(opcode);
+		else
+			return PerformSTF(opcode);
 	} else {
-		nRc = 0;
+		if (opcode & BIT_LOAD)
+			return PerformLFM(opcode);
+		else
+			return PerformSFM(opcode);
 	}
-
-	return nRc;
 }
diff -Nru linux_v18c/arch/arm/nwfpe/fpa11_cprt.c linux/arch/arm/nwfpe/fpa11_cprt.c
--- linux_v18c/arch/arm/nwfpe/fpa11_cprt.c	2006-04-08 17:52:56.000000000 +0900
+++ linux/arch/arm/nwfpe/fpa11_cprt.c	2006-04-08 00:47:32.000000000 +0900
@@ -29,77 +29,38 @@
 #ifdef CONFIG_FPE_NWFPE_XP
 extern flag floatx80_is_nan(floatx80);
 #endif
-extern flag float64_is_nan(float64);
-extern flag float32_is_nan(float32);
 
 void SetRoundingMode(const unsigned int opcode);
 
-unsigned int PerformFLT(const unsigned int opcode);
-unsigned int PerformFIX(const unsigned int opcode);
+static inline unsigned int PerformFLT(const unsigned int opcode);
+static inline unsigned int PerformFIX(const unsigned int opcode);
 
-static unsigned int PerformComparison(const unsigned int opcode);
+static inline unsigned int PerformComparison(const unsigned int opcode);
 
-unsigned int EmulateCPRT(const unsigned int opcode)
-{
-
-	if (opcode & 0x800000) {
-		/* This is some variant of a comparison (PerformComparison
-		   will sort out which one).  Since most of the other CPRT
-		   instructions are oddball cases of some sort or other it
-		   makes sense to pull this out into a fast path.  */
-		return PerformComparison(opcode);
-	}
-
-	/* Hint to GCC that we'd like a jump table rather than a load of CMPs */
-	switch ((opcode & 0x700000) >> 20) {
-	case FLT_CODE >> 20:
-		return PerformFLT(opcode);
-		break;
-	case FIX_CODE >> 20:
-		return PerformFIX(opcode);
-		break;
-
-	case WFS_CODE >> 20:
-		writeFPSR(readRegister(getRd(opcode)));
-		break;
-	case RFS_CODE >> 20:
-		writeRegister(getRd(opcode), readFPSR());
-		break;
-
-	default:
-		return 0;
-	}
-
-	return 1;
-}
-
-unsigned int PerformFLT(const unsigned int opcode)
+static inline unsigned int PerformFLT(const unsigned int opcode)
 {
 	FPA11 *fpa11 = GET_FPA11();
+	int idx;
+
 	SetRoundingMode(opcode);
 	SetRoundingPrecision(opcode);
 
+	idx = getFn(opcode);
 	switch (opcode & MASK_ROUNDING_PRECISION) {
 	case ROUND_SINGLE:
-		{
-			fpa11->fType[getFn(opcode)] = typeSingle;
-			fpa11->fpreg[getFn(opcode)].fSingle = int32_to_float32(readRegister(getRd(opcode)));
-		}
+		fpa11->fType[idx] = typeSingle;
+		fpa11->fpreg[idx].fSingle = int32_to_float32(readRegister(getRd(opcode)));
 		break;
 
 	case ROUND_DOUBLE:
-		{
-			fpa11->fType[getFn(opcode)] = typeDouble;
-			fpa11->fpreg[getFn(opcode)].fDouble = int32_to_float64(readRegister(getRd(opcode)));
-		}
+		fpa11->fType[idx] = typeDouble;
+		fpa11->fpreg[idx].fDouble = int32_to_float64(readRegister(getRd(opcode)));
 		break;
 
 #ifdef CONFIG_FPE_NWFPE_XP
 	case ROUND_EXTENDED:
-		{
-			fpa11->fType[getFn(opcode)] = typeExtended;
-			fpa11->fpreg[getFn(opcode)].fExtended = int32_to_floatx80(readRegister(getRd(opcode)));
-		}
+		fpa11->fType[idx] = typeExtended;
+		fpa11->fpreg[idx].fExtended = int32_to_floatx80(readRegister(getRd(opcode)));
 		break;
 #endif
 
@@ -110,7 +71,7 @@
 	return 1;
 }
 
-unsigned int PerformFIX(const unsigned int opcode)
+static inline unsigned int PerformFIX(const unsigned int opcode)
 {
 	FPA11 *fpa11 = GET_FPA11();
 	unsigned int Fn = getFm(opcode);
@@ -119,22 +80,16 @@
 
 	switch (fpa11->fType[Fn]) {
 	case typeSingle:
-		{
-			writeRegister(getRd(opcode), float32_to_int32(fpa11->fpreg[Fn].fSingle));
-		}
+		writeRegister(getRd(opcode), float32_to_int32(fpa11->fpreg[Fn].fSingle));
 		break;
 
 	case typeDouble:
-		{
-			writeRegister(getRd(opcode), float64_to_int32(fpa11->fpreg[Fn].fDouble));
-		}
+		writeRegister(getRd(opcode), float64_to_int32(fpa11->fpreg[Fn].fDouble));
 		break;
 
 #ifdef CONFIG_FPE_NWFPE_XP
 	case typeExtended:
-		{
-			writeRegister(getRd(opcode), floatx80_to_int32(fpa11->fpreg[Fn].fExtended));
-		}
+		writeRegister(getRd(opcode), floatx80_to_int32(fpa11->fpreg[Fn].fExtended));
 		break;
 #endif
 
@@ -146,7 +101,7 @@
 }
 
 /* This instruction sets the flags N, Z, C, V in the FPSR. */
-static unsigned int PerformComparison(const unsigned int opcode)
+static inline unsigned int PerformComparison(const unsigned int opcode)
 {
 	FPA11 *fpa11 = GET_FPA11();
 	unsigned int Fn = getFn(opcode), Fm = getFm(opcode);
@@ -366,3 +321,37 @@
 	writeConditionCodes(flags);
 	return 1;
 }
+
+unsigned int EmulateCPRT(const unsigned int opcode)
+{
+
+	if (opcode & 0x800000) {
+		/* This is some variant of a comparison (PerformComparison
+		   will sort out which one).  Since most of the other CPRT
+		   instructions are oddball cases of some sort or other it
+		   makes sense to pull this out into a fast path.  */
+		return PerformComparison(opcode);
+	}
+
+	/* Hint to GCC that we'd like a jump table rather than a load of CMPs */
+	switch ((opcode & 0x700000) >> 20) {
+	case FLT_CODE >> 20:
+		return PerformFLT(opcode);
+		break;
+	case FIX_CODE >> 20:
+		return PerformFIX(opcode);
+		break;
+
+	case WFS_CODE >> 20:
+		writeFPSR(readRegister(getRd(opcode)));
+		break;
+	case RFS_CODE >> 20:
+		writeRegister(getRd(opcode), readFPSR());
+		break;
+
+	default:
+		return 0;
+	}
+
+	return 1;
+}
diff -Nru linux_v18c/arch/arm/nwfpe/fpmodule.c linux/arch/arm/nwfpe/fpmodule.c
--- linux_v18c/arch/arm/nwfpe/fpmodule.c	2006-04-08 17:52:56.000000000 +0900
+++ linux/arch/arm/nwfpe/fpmodule.c	2006-03-29 10:59:57.000000000 +0900
@@ -68,12 +68,15 @@
 
 /* external declarations for saved kernel symbols */
 extern void (*kern_fp_enter)(void);
+extern void (*fp_init)(union fp_state *);
 
 /* Original value of fp_enter from kernel before patched by fpe_init. */
 static void (*orig_fp_enter)(void);
+static void (*orig_fp_init)(union fp_state *);
 
 /* forward declarations */
 extern void nwfpe_enter(void);
+extern void nwfpe_init_fpa(union fp_state *fp);
 
 #ifdef MODULE
 /*
@@ -112,7 +115,9 @@
 
 	/* Save pointer to the old FP handler and then patch ourselves in */
 	orig_fp_enter = kern_fp_enter;
+	orig_fp_init = fp_init;
 	kern_fp_enter = nwfpe_enter;
+	fp_init = nwfpe_init_fpa;
 
 	return 0;
 }
@@ -121,6 +126,7 @@
 {
 	/* Restore the values we saved earlier. */
 	kern_fp_enter = orig_fp_enter;
+	fp_init = orig_fp_init;
 }
 
 /*
diff -Nru linux_v18c/arch/arm/nwfpe/fpopcode.c linux/arch/arm/nwfpe/fpopcode.c
--- linux_v18c/arch/arm/nwfpe/fpopcode.c	2006-04-08 17:52:56.000000000 +0900
+++ linux/arch/arm/nwfpe/fpopcode.c	2006-04-06 09:13:06.000000000 +0900
@@ -60,30 +60,3 @@
 	0x3f000000,		/* single 0.5 */
 	0x41200000		/* single 10.0 */
 };
-
-/* condition code lookup table
- index into the table is test code: EQ, NE, ... LT, GT, AL, NV
- bit position in short is condition code: NZCV */
-static const unsigned short aCC[16] = {
-	0xF0F0,			// EQ == Z set
-	0x0F0F,			// NE
-	0xCCCC,			// CS == C set
-	0x3333,			// CC
-	0xFF00,			// MI == N set
-	0x00FF,			// PL
-	0xAAAA,			// VS == V set
-	0x5555,			// VC
-	0x0C0C,			// HI == C set && Z clear
-	0xF3F3,			// LS == C clear || Z set
-	0xAA55,			// GE == (N==V)
-	0x55AA,			// LT == (N!=V)
-	0x0A05,			// GT == (!Z && (N==V))
-	0xF5FA,			// LE == (Z || (N!=V))
-	0xFFFF,			// AL always
-	0			// NV
-};
-
-unsigned int checkCondition(const unsigned int opcode, const unsigned int ccodes)
-{
-	return (aCC[opcode >> 28] >> (ccodes >> 28)) & 1;
-}
diff -Nru linux_v18c/arch/arm/nwfpe/fpopcode.h linux/arch/arm/nwfpe/fpopcode.h
--- linux_v18c/arch/arm/nwfpe/fpopcode.h	2006-04-08 17:52:56.000000000 +0900
+++ linux/arch/arm/nwfpe/fpopcode.h	2006-04-07 11:47:39.000000000 +0900
@@ -387,68 +387,16 @@
 	return float32Constant[nIndex];
 }
 
-static inline unsigned int getTransferLength(const unsigned int opcode)
-{
-	unsigned int nRc;
-
-	switch (opcode & MASK_TRANSFER_LENGTH) {
-	case 0x00000000:
-		nRc = 1;
-		break;		/* single precision */
-	case 0x00008000:
-		nRc = 2;
-		break;		/* double precision */
-	case 0x00400000:
-		nRc = 3;
-		break;		/* extended precision */
-	default:
-		nRc = 0;
-	}
-
-	return (nRc);
-}
-
 static inline unsigned int getRegisterCount(const unsigned int opcode)
 {
-	unsigned int nRc;
+	unsigned int nRc = 0;
 
-	switch (opcode & MASK_REGISTER_COUNT) {
-	case 0x00000000:
+	if (opcode & 0x00008000)
+		nRc += 1;
+	if (opcode & 0x00400000)
+		nRc += 2;
+	if (nRc == 0)
 		nRc = 4;
-		break;
-	case 0x00008000:
-		nRc = 1;
-		break;
-	case 0x00400000:
-		nRc = 2;
-		break;
-	case 0x00408000:
-		nRc = 3;
-		break;
-	default:
-		nRc = 0;
-	}
-
-	return (nRc);
-}
-
-static inline unsigned int getRoundingPrecision(const unsigned int opcode)
-{
-	unsigned int nRc;
-
-	switch (opcode & MASK_ROUNDING_PRECISION) {
-	case 0x00000000:
-		nRc = 1;
-		break;
-	case 0x00000080:
-		nRc = 2;
-		break;
-	case 0x00080000:
-		nRc = 3;
-		break;
-	default:
-		nRc = 0;
-	}
 
 	return (nRc);
 }
diff -Nru linux_v18c/arch/arm/nwfpe/single_cpdo.c linux/arch/arm/nwfpe/single_cpdo.c
--- linux_v18c/arch/arm/nwfpe/single_cpdo.c	2006-04-08 17:52:56.000000000 +0900
+++ linux/arch/arm/nwfpe/single_cpdo.c	2006-04-08 00:03:43.000000000 +0900
@@ -20,10 +20,6 @@
     Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
 */
 
-#include "fpa11.h"
-#include "softfloat.h"
-#include "fpopcode.h"
-
 float32 float32_exp(float32 Fm);
 float32 float32_ln(float32 Fm);
 float32 float32_sin(float32 rFm);
@@ -36,59 +32,34 @@
 float32 float32_pow(float32 rFn, float32 rFm);
 float32 float32_pol(float32 rFn, float32 rFm);
 
-static float32 float32_rsf(float32 rFn, float32 rFm)
+static inline float32 float32_rsf(float32 rFn, float32 rFm)
 {
 	return float32_sub(rFm, rFn);
 }
 
-static float32 float32_rdv(float32 rFn, float32 rFm)
+static inline float32 float32_rdv(float32 rFn, float32 rFm)
 {
 	return float32_div(rFm, rFn);
 }
 
-static float32 (*const dyadic_single[16])(float32 rFn, float32 rFm) = {
-	[ADF_CODE >> 20] = float32_add,
-	[MUF_CODE >> 20] = float32_mul,
-	[SUF_CODE >> 20] = float32_sub,
-	[RSF_CODE >> 20] = float32_rsf,
-	[DVF_CODE >> 20] = float32_div,
-	[RDF_CODE >> 20] = float32_rdv,
-	[RMF_CODE >> 20] = float32_rem,
-
-	[FML_CODE >> 20] = float32_mul,
-	[FDV_CODE >> 20] = float32_div,
-	[FRD_CODE >> 20] = float32_rdv,
-};
-
-static float32 float32_mvf(float32 rFm)
+static inline float32 float32_mvf(float32 rFm)
 {
 	return rFm;
 }
 
-static float32 float32_mnf(float32 rFm)
+static inline float32 float32_mnf(float32 rFm)
 {
 	return rFm ^ 0x80000000;
 }
 
-static float32 float32_abs(float32 rFm)
+static inline float32 float32_abs(float32 rFm)
 {
 	return rFm & 0x7fffffff;
 }
 
-static float32 (*const monadic_single[16])(float32 rFm) = {
-	[MVF_CODE >> 20] = float32_mvf,
-	[MNF_CODE >> 20] = float32_mnf,
-	[ABS_CODE >> 20] = float32_abs,
-	[RND_CODE >> 20] = float32_round_to_int,
-	[URD_CODE >> 20] = float32_round_to_int,
-	[SQT_CODE >> 20] = float32_sqrt,
-	[NRM_CODE >> 20] = float32_mvf,
-};
-
-unsigned int SingleCPDO(const unsigned int opcode, FPREG * rFd)
+static inline unsigned int SingleCPDO(FPA11 *fpa11, const unsigned int opcode, FPREG * rFd, const unsigned int nDest)
 {
-	FPA11 *fpa11 = GET_FPA11();
-	float32 rFm;
+	float32 rFm, rFn;
 	unsigned int Fm, opc_mask_shift;
 
 	Fm = getFm(opcode);
@@ -100,25 +71,114 @@
 		return 0;
 	}
 
-	opc_mask_shift = (opcode & MASK_ARITHMETIC_OPCODE) >> 20;
+	opc_mask_shift = (opcode >> 20) & (MASK_ARITHMETIC_OPCODE >> 20);
 	if (!MONADIC_INSTRUCTION(opcode)) {
 		unsigned int Fn = getFn(opcode);
-		float32 rFn;
 
-		if (fpa11->fType[Fn] == typeSingle &&
-		    dyadic_single[opc_mask_shift]) {
-			rFn = fpa11->fpreg[Fn].fSingle;
-			rFd->fSingle = dyadic_single[opc_mask_shift](rFn, rFm);
-		} else {
+		if (fpa11->fType[Fn] != typeSingle)
+			return 0;
+
+		rFn = fpa11->fpreg[Fn].fSingle;
+
+		switch (opc_mask_shift) {
+		case ADF_CODE >> 20:
+			rFd->fSingle = float32_add(rFn, rFm);
+			break;
+
+		case MUF_CODE >> 20:
+			rFd->fSingle = float32_mul(rFn, rFm);
+			break;
+
+		case SUF_CODE >> 20:
+			rFd->fSingle = float32_sub(rFn, rFm);
+			break;
+
+		case RSF_CODE >> 20:
+			rFd->fSingle = float32_rsf(rFn, rFm);
+			break;
+
+		case DVF_CODE >> 20:
+			rFd->fSingle = float32_div(rFn, rFm);
+			break;
+
+		case RDF_CODE >> 20:
+			rFd->fSingle = float32_rdv(rFn, rFm);
+			break;
+
+		case RMF_CODE >> 20:
+			rFd->fSingle = float32_rem(rFn, rFm);
+			break;
+
+		case FML_CODE >> 20:
+			rFd->fSingle = float32_mul(rFn, rFm);
+			break;
+
+		case FDV_CODE >> 20:
+			rFd->fSingle = float32_div(rFn, rFm);
+			break;
+
+		case FRD_CODE >> 20:
+			rFd->fSingle = float32_rdv(rFn, rFm);
+			break;
+
+		default:
 			return 0;
 		}
 	} else {
-		if (monadic_single[opc_mask_shift]) {
-			rFd->fSingle = monadic_single[opc_mask_shift](rFm);
-		} else {
+		switch (opc_mask_shift) {
+		case MVF_CODE >> 20:
+			rFd->fSingle = float32_mvf(rFm);
+			break;
+
+		case MNF_CODE >> 20:
+			rFd->fSingle = float32_mnf(rFm);
+			break;
+
+		case ABS_CODE >> 20:
+			rFd->fSingle = float32_abs(rFm);
+			break;
+
+		case RND_CODE >> 20:
+			rFd->fSingle = float32_round_to_int(rFm);
+			break;
+
+		case URD_CODE >> 20:
+			rFd->fSingle = float32_round_to_int(rFm);
+			break;
+
+		case SQT_CODE >> 20:
+			rFd->fSingle = float32_sqrt(rFm);
+			break;
+
+		case NRM_CODE >> 20:
+			rFd->fSingle = float32_mvf(rFm);
+			break;
+
+		default:
 			return 0;
 		}
 	}
 
+	/* The CPDO functions used to always set the destination type
+	   to be the same as their working size. */
+
+	/* If the operation succeeded, check to see if the result in the
+	   destination register is the correct size.  If not force it
+	   to be. */
+
+	fpa11->fType[getFd(opcode)] = nDest;
+
+#ifdef CONFIG_FPE_NWFPE_XP
+	if (nDest != typeSingle) {
+		if (nDest == typeDouble)
+			rFd->fDouble = float32_to_float64(rFd->fSingle);
+		else
+			rFd->fExtended = float32_to_floatx80(rFd->fSingle);
+	}
+#else
+	if (nDest != typeSingle)
+		rFd->fDouble = float32_to_float64(rFd->fSingle);
+#endif
+
 	return 1;
 }
diff -Nru linux_v18c/arch/arm/nwfpe/softfloat-macros linux/arch/arm/nwfpe/softfloat-macros
--- linux_v18c/arch/arm/nwfpe/softfloat-macros	2006-04-08 17:52:56.000000000 +0900
+++ linux/arch/arm/nwfpe/softfloat-macros	2006-04-06 09:13:06.000000000 +0900
@@ -41,17 +41,23 @@
 */
 INLINE void shift32RightJamming( bits32 a, int16 count, bits32 *zPtr )
 {
-    bits32 z;
-    if ( count == 0 ) {
-        z = a;
-    }
-    else if ( count < 32 ) {
-        z = ( a>>count ) | ( ( a<<( ( - count ) & 31 ) ) != 0 );
-    }
-    else {
-        z = ( a != 0 );
-    }
-    *zPtr = z;
+    if ( count == 0 )
+        *zPtr = a;
+    else if ( count < 32 )
+        *zPtr = ( a>>count ) | ( ( a<<( ( - count ) & 31 ) ) != 0 );
+    else
+        *zPtr = ( a != 0 );
+}
+INLINE void shift32RightJammingFast( bits32 a, int16 count, bits32 *zPtr )
+{
+    if ( count < 32 )
+        *zPtr = ( a>>count ) | ( ( a<<( ( - count ) & 31 ) ) != 0 );
+    else
+        *zPtr = ( a != 0 );
+}
+INLINE void shift32RightJammingConst( bits32 a, int16 count, bits32 *zPtr )
+{
+    *zPtr = ( a>>count ) | ( ( a<<( ( - count ) & 31 ) ) != 0 );
 }
 
 /*
@@ -66,20 +72,23 @@
 */
 INLINE void shift64RightJamming( bits64 a, int16 count, bits64 *zPtr )
 {
-    bits64 z;
-
- __asm__("@shift64RightJamming -- start");   
-    if ( count == 0 ) {
-        z = a;
-    }
-    else if ( count < 64 ) {
-        z = ( a>>count ) | ( ( a<<( ( - count ) & 63 ) ) != 0 );
-    }
-    else {
-        z = ( a != 0 );
-    }
- __asm__("@shift64RightJamming -- end");   
-    *zPtr = z;
+    if ( count == 0 )
+        *zPtr = a;
+    else if ( count < 64 )
+        *zPtr = ( a>>count ) | ( ( a<<( ( - count ) & 63 ) ) != 0 );
+    else
+        *zPtr = ( a != 0 );
+}
+INLINE void shift64RightJammingFast( bits64 a, int16 count, bits64 *zPtr )
+{
+    if ( count < 64 )
+        *zPtr = ( a>>count ) | ( ( a<<( ( - count ) & 63 ) ) != 0 );
+    else
+        *zPtr = ( a != 0 );
+}
+INLINE void shift64RightJammingConst( bits64 a, int16 count, bits64 *zPtr )
+{
+    *zPtr = ( a>>count ) | ( ( a<<( ( - count ) & 63 ) ) != 0 );
 }
 
 /*
@@ -104,29 +113,21 @@
  shift64ExtraRightJamming(
      bits64 a0, bits64 a1, int16 count, bits64 *z0Ptr, bits64 *z1Ptr )
 {
-    bits64 z0, z1;
     int8 negCount = ( - count ) & 63;
 
     if ( count == 0 ) {
-        z1 = a1;
-        z0 = a0;
-    }
-    else if ( count < 64 ) {
-        z1 = ( a0<<negCount ) | ( a1 != 0 );
-        z0 = a0>>count;
+        *z1Ptr = a1;
+        *z0Ptr = a0;
+    } else if ( count < 64 ) {
+        *z1Ptr = ( a0<<negCount ) | ( a1 != 0 );
+        *z0Ptr = a0>>count;
+    } else {
+        if ( count == 64 )
+            *z1Ptr = a0 | ( a1 != 0 );
+        else
+            *z1Ptr = ( ( a0 | a1 ) != 0 );
+        *z0Ptr = 0;
     }
-    else {
-        if ( count == 64 ) {
-            z1 = a0 | ( a1 != 0 );
-        }
-        else {
-            z1 = ( ( a0 | a1 ) != 0 );
-        }
-        z0 = 0;
-    }
-    *z1Ptr = z1;
-    *z0Ptr = z0;
-
 }
 
 /*
@@ -142,24 +143,18 @@
  shift128Right(
      bits64 a0, bits64 a1, int16 count, bits64 *z0Ptr, bits64 *z1Ptr )
 {
-    bits64 z0, z1;
     int8 negCount = ( - count ) & 63;
 
     if ( count == 0 ) {
-        z1 = a1;
-        z0 = a0;
-    }
-    else if ( count < 64 ) {
-        z1 = ( a0<<negCount ) | ( a1>>count );
-        z0 = a0>>count;
+        *z1Ptr = a1;
+        *z0Ptr = a0;
+    } else if ( count < 64 ) {
+        *z1Ptr = ( a0<<negCount ) | ( a1>>count );
+        *z0Ptr = a0>>count;
+    } else {
+        *z1Ptr = ( count < 64 ) ? ( a0>>( count & 63 ) ) : 0;
+        *z0Ptr = 0;
     }
-    else {
-        z1 = ( count < 64 ) ? ( a0>>( count & 63 ) ) : 0;
-        z0 = 0;
-    }
-    *z1Ptr = z1;
-    *z0Ptr = z0;
-
 }
 
 /*
@@ -178,32 +173,23 @@
  shift128RightJamming(
      bits64 a0, bits64 a1, int16 count, bits64 *z0Ptr, bits64 *z1Ptr )
 {
-    bits64 z0, z1;
     int8 negCount = ( - count ) & 63;
 
     if ( count == 0 ) {
-        z1 = a1;
-        z0 = a0;
-    }
-    else if ( count < 64 ) {
-        z1 = ( a0<<negCount ) | ( a1>>count ) | ( ( a1<<negCount ) != 0 );
-        z0 = a0>>count;
-    }
-    else {
-        if ( count == 64 ) {
-            z1 = a0 | ( a1 != 0 );
-        }
-        else if ( count < 128 ) {
-            z1 = ( a0>>( count & 63 ) ) | ( ( ( a0<<negCount ) | a1 ) != 0 );
-        }
-        else {
-            z1 = ( ( a0 | a1 ) != 0 );
-        }
-        z0 = 0;
+        *z1Ptr = a1;
+        *z0Ptr = a0;
+    } else if ( count < 64 ) {
+        *z1Ptr = ( a0<<negCount ) | ( a1>>count ) | ( ( a1<<negCount ) != 0 );
+        *z0Ptr = a0>>count;
+    } else {
+        if ( count == 64 )
+            *z1Ptr = a0 | ( a1 != 0 );
+        else if ( count < 128 )
+            *z1Ptr = ( a0>>( count & 63 ) ) | ( ( ( a0<<negCount ) | a1 ) != 0 );
+        else
+            *z1Ptr = ( ( a0 | a1 ) != 0 );
+        *z0Ptr = 0;
     }
-    *z1Ptr = z1;
-    *z0Ptr = z0;
-
 }
 
 /*
@@ -624,40 +610,13 @@
 of `a'.  If `a' is zero, 32 is returned.
 -------------------------------------------------------------------------------
 */
-static int8 countLeadingZeros32( bits32 a )
+static inline int8 countLeadingZeros32( bits32 a )
 {
-    static const int8 countLeadingZerosHigh[] = {
-        8, 7, 6, 6, 5, 5, 5, 5, 4, 4, 4, 4, 4, 4, 4, 4,
-        3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
-        2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
-        2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
-        1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
-        1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
-        1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
-        1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
-        0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
-        0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
-        0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
-        0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
-        0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
-        0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
-        0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
-        0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
-    };
-    int8 shiftCount;
+	int ret;
 
-    shiftCount = 0;
-    if ( a < 0x10000 ) {
-        shiftCount += 16;
-        a <<= 16;
-    }
-    if ( a < 0x1000000 ) {
-        shiftCount += 8;
-        a <<= 8;
-    }
-    shiftCount += countLeadingZerosHigh[ a>>24 ];
-    return shiftCount;
+ 	asm("clz %0, %1" : "=r"(ret) : "r"(a) : "cc");
 
+	return ret;
 }
 
 /*
diff -Nru linux_v18c/arch/arm/nwfpe/softfloat-specialize linux/arch/arm/nwfpe/softfloat-specialize
--- linux_v18c/arch/arm/nwfpe/softfloat-specialize	2006-04-08 17:52:56.000000000 +0900
+++ linux/arch/arm/nwfpe/softfloat-specialize	2006-04-06 09:13:06.000000000 +0900
@@ -39,25 +39,6 @@
 
 /*
 -------------------------------------------------------------------------------
-Raises the exceptions specified by `flags'.  Floating-point traps can be
-defined here if desired.  It is currently not possible for such a trap to
-substitute a result value.  If traps are not implemented, this routine
-should be simply `float_exception_flags |= flags;'.
-
-ScottB:  November 4, 1998
-Moved this function out of softfloat-specialize into fpmodule.c.
-This effectively isolates all the changes required for integrating with the
-Linux kernel into fpmodule.c.  Porting to NetBSD should only require modifying
-fpmodule.c to integrate with the NetBSD kernel (I hope!).
--------------------------------------------------------------------------------
-void float_raise( int8 flags )
-{
-    float_exception_flags |= flags;
-}
-*/
-
-/*
--------------------------------------------------------------------------------
 Internal canonical NaN format.
 -------------------------------------------------------------------------------
 */
@@ -68,45 +49,12 @@
 
 /*
 -------------------------------------------------------------------------------
-The pattern for a default generated single-precision NaN.
--------------------------------------------------------------------------------
-*/
-#define float32_default_nan 0xFFFFFFFF
-
-/*
--------------------------------------------------------------------------------
-Returns 1 if the single-precision floating-point value `a' is a NaN;
-otherwise returns 0.
--------------------------------------------------------------------------------
-*/
-flag float32_is_nan( float32 a )
-{
-
-    return ( 0xFF000000 < (bits32) ( a<<1 ) );
-
-}
-
-/*
--------------------------------------------------------------------------------
-Returns 1 if the single-precision floating-point value `a' is a signaling
-NaN; otherwise returns 0.
--------------------------------------------------------------------------------
-*/
-flag float32_is_signaling_nan( float32 a )
-{
-
-    return ( ( ( a>>22 ) & 0x1FF ) == 0x1FE ) && ( a & 0x003FFFFF );
-
-}
-
-/*
--------------------------------------------------------------------------------
 Returns the result of converting the single-precision floating-point NaN
 `a' to the canonical NaN format.  If `a' is a signaling NaN, the invalid
 exception is raised.
 -------------------------------------------------------------------------------
 */
-static commonNaNT float32ToCommonNaN( float32 a )
+static inline commonNaNT float32ToCommonNaN( float32 a )
 {
     commonNaNT z;
 
@@ -124,7 +72,7 @@
 precision floating-point format.
 -------------------------------------------------------------------------------
 */
-static float32 commonNaNToFloat32( commonNaNT a )
+static inline float32 commonNaNToFloat32( commonNaNT a )
 {
 
     return ( ( (bits32) a.sign )<<31 ) | 0x7FC00000 | ( a.high>>41 );
@@ -138,7 +86,7 @@
 signaling NaN, the invalid exception is raised.
 -------------------------------------------------------------------------------
 */
-static float32 propagateFloat32NaN( float32 a, float32 b )
+static inline float32 propagateFloat32NaN( float32 a, float32 b )
 {
     flag aIsNaN, aIsSignalingNaN, bIsNaN, bIsSignalingNaN;
 
@@ -167,19 +115,6 @@
 
 /*
 -------------------------------------------------------------------------------
-Returns 1 if the double-precision floating-point value `a' is a NaN;
-otherwise returns 0.
--------------------------------------------------------------------------------
-*/
-flag float64_is_nan( float64 a )
-{
-
-    return ( LIT64( 0xFFE0000000000000 ) < (bits64) ( a<<1 ) );
-
-}
-
-/*
--------------------------------------------------------------------------------
 Returns 1 if the double-precision floating-point value `a' is a signaling
 NaN; otherwise returns 0.
 -------------------------------------------------------------------------------
@@ -200,7 +135,7 @@
 exception is raised.
 -------------------------------------------------------------------------------
 */
-static commonNaNT float64ToCommonNaN( float64 a )
+static inline commonNaNT float64ToCommonNaN( float64 a )
 {
     commonNaNT z;
 
@@ -218,7 +153,7 @@
 precision floating-point format.
 -------------------------------------------------------------------------------
 */
-static float64 commonNaNToFloat64( commonNaNT a )
+static inline float64 commonNaNToFloat64( commonNaNT a )
 {
 
     return
@@ -235,7 +170,7 @@
 signaling NaN, the invalid exception is raised.
 -------------------------------------------------------------------------------
 */
-static float64 propagateFloat64NaN( float64 a, float64 b )
+static inline float64 propagateFloat64NaN( float64 a, float64 b )
 {
     flag aIsNaN, aIsSignalingNaN, bIsNaN, bIsSignalingNaN;
 
diff -Nru linux_v18c/arch/arm/nwfpe/softfloat.c linux/arch/arm/nwfpe/softfloat.c
--- linux_v18c/arch/arm/nwfpe/softfloat.c	2006-04-08 17:52:56.000000000 +0900
+++ linux/arch/arm/nwfpe/softfloat.c	2006-04-06 09:13:06.000000000 +0900
@@ -29,8 +29,6 @@
 */
 
 #include "fpa11.h"
-//#include "milieu.h"
-//#include "softfloat.h"
 
 /*
 -------------------------------------------------------------------------------
@@ -75,7 +73,8 @@
 positive or negative integer is returned.
 -------------------------------------------------------------------------------
 */
-static int32 roundAndPackInt32( flag zSign, bits64 absZ )
+static inline int32
+roundAndPackInt32( flag zSign, bits64 absZ )
 {
     int8 roundingMode;
     flag roundNearestEven;
@@ -115,52 +114,14 @@
 
 /*
 -------------------------------------------------------------------------------
-Returns the fraction bits of the single-precision floating-point value `a'.
--------------------------------------------------------------------------------
-*/
-INLINE bits32 extractFloat32Frac( float32 a )
-{
-
-    return a & 0x007FFFFF;
-
-}
-
-/*
--------------------------------------------------------------------------------
-Returns the exponent bits of the single-precision floating-point value `a'.
--------------------------------------------------------------------------------
-*/
-INLINE int16 extractFloat32Exp( float32 a )
-{
-
-    return ( a>>23 ) & 0xFF;
-
-}
-
-/*
--------------------------------------------------------------------------------
-Returns the sign bit of the single-precision floating-point value `a'.
--------------------------------------------------------------------------------
-*/
-#if 0	/* in softfloat.h */
-INLINE flag extractFloat32Sign( float32 a )
-{
-
-    return a>>31;
-
-}
-#endif
-
-/*
--------------------------------------------------------------------------------
 Normalizes the subnormal single-precision floating-point value represented
 by the denormalized significand `aSig'.  The normalized exponent and
 significand are stored at the locations pointed to by `zExpPtr' and
 `zSigPtr', respectively.
 -------------------------------------------------------------------------------
 */
-static void
- normalizeFloat32Subnormal( bits32 aSig, int16 *zExpPtr, bits32 *zSigPtr )
+static inline void
+normalizeFloat32Subnormal( bits32 aSig, int16 *zExpPtr, bits32 *zSigPtr )
 {
     int8 shiftCount;
 
@@ -172,35 +133,6 @@
 
 /*
 -------------------------------------------------------------------------------
-Packs the sign `zSign', exponent `zExp', and significand `zSig' into a
-single-precision floating-point value, returning the result.  After being
-shifted into the proper positions, the three fields are simply added
-together to form the result.  This means that any integer portion of `zSig'
-will be added into the exponent.  Since a properly normalized significand
-will have an integer portion equal to 1, the `zExp' input should be 1 less
-than the desired result exponent whenever `zSig' is a complete, normalized
-significand.
--------------------------------------------------------------------------------
-*/
-INLINE float32 packFloat32( flag zSign, int16 zExp, bits32 zSig )
-{
-#if 0
-   float32 f;
-   __asm__("@ packFloat32				\n\
-   	    mov %0, %1, asl #31				\n\
-   	    orr %0, %2, asl #23				\n\
-   	    orr %0, %3"
-   	    : /* no outputs */
-   	    : "g" (f), "g" (zSign), "g" (zExp), "g" (zSig)
-   	    : "cc");
-   return f;
-#else
-    return ( ( (bits32) zSign )<<31 ) + ( ( (bits32) zExp )<<23 ) + zSig;
-#endif 
-}
-
-/*
--------------------------------------------------------------------------------
 Takes an abstract floating-point value having sign `zSign', exponent `zExp',
 and significand `zSig', and returns the proper single-precision floating-
 point value corresponding to the abstract input.  Ordinarily, the abstract
@@ -222,7 +154,8 @@
 Binary Floating-point Arithmetic.
 -------------------------------------------------------------------------------
 */
-static float32 roundAndPackFloat32( flag zSign, int16 zExp, bits32 zSig )
+static inline float32
+roundAndPackFloat32( flag zSign, int16 zExp, bits32 zSig )
 {
     int8 roundingMode;
     flag roundNearestEven;
@@ -260,7 +193,7 @@
                    ( float_detect_tininess == float_tininess_before_rounding )
                 || ( zExp < -1 )
                 || ( zSig + roundIncrement < 0x80000000 );
-            shift32RightJamming( zSig, - zExp, &zSig );
+            shift32RightJammingFast( zSig, - zExp, &zSig );
             zExp = 0;
             roundBits = zSig & 0x7F;
             if ( isTiny && roundBits ) float_raise( float_flag_underflow );
@@ -284,8 +217,8 @@
 point exponent.
 -------------------------------------------------------------------------------
 */
-static float32
- normalizeRoundAndPackFloat32( flag zSign, int16 zExp, bits32 zSig )
+static inline float32
+normalizeRoundAndPackFloat32( flag zSign, int16 zExp, bits32 zSig )
 {
     int8 shiftCount;
 
@@ -296,52 +229,14 @@
 
 /*
 -------------------------------------------------------------------------------
-Returns the fraction bits of the double-precision floating-point value `a'.
--------------------------------------------------------------------------------
-*/
-INLINE bits64 extractFloat64Frac( float64 a )
-{
-
-    return a & LIT64( 0x000FFFFFFFFFFFFF );
-
-}
-
-/*
--------------------------------------------------------------------------------
-Returns the exponent bits of the double-precision floating-point value `a'.
--------------------------------------------------------------------------------
-*/
-INLINE int16 extractFloat64Exp( float64 a )
-{
-
-    return ( a>>52 ) & 0x7FF;
-
-}
-
-/*
--------------------------------------------------------------------------------
-Returns the sign bit of the double-precision floating-point value `a'.
--------------------------------------------------------------------------------
-*/
-#if 0	/* in softfloat.h */
-INLINE flag extractFloat64Sign( float64 a )
-{
-
-    return a>>63;
-
-}
-#endif
-
-/*
--------------------------------------------------------------------------------
 Normalizes the subnormal double-precision floating-point value represented
 by the denormalized significand `aSig'.  The normalized exponent and
 significand are stored at the locations pointed to by `zExpPtr' and
 `zSigPtr', respectively.
 -------------------------------------------------------------------------------
 */
-static void
- normalizeFloat64Subnormal( bits64 aSig, int16 *zExpPtr, bits64 *zSigPtr )
+static inline void
+normalizeFloat64Subnormal( bits64 aSig, int16 *zExpPtr, bits64 *zSigPtr )
 {
     int8 shiftCount;
 
@@ -353,25 +248,6 @@
 
 /*
 -------------------------------------------------------------------------------
-Packs the sign `zSign', exponent `zExp', and significand `zSig' into a
-double-precision floating-point value, returning the result.  After being
-shifted into the proper positions, the three fields are simply added
-together to form the result.  This means that any integer portion of `zSig'
-will be added into the exponent.  Since a properly normalized significand
-will have an integer portion equal to 1, the `zExp' input should be 1 less
-than the desired result exponent whenever `zSig' is a complete, normalized
-significand.
--------------------------------------------------------------------------------
-*/
-INLINE float64 packFloat64( flag zSign, int16 zExp, bits64 zSig )
-{
-
-    return ( ( (bits64) zSign )<<63 ) + ( ( (bits64) zExp )<<52 ) + zSig;
-
-}
-
-/*
--------------------------------------------------------------------------------
 Takes an abstract floating-point value having sign `zSign', exponent `zExp',
 and significand `zSig', and returns the proper double-precision floating-
 point value corresponding to the abstract input.  Ordinarily, the abstract
@@ -393,7 +269,8 @@
 Binary Floating-point Arithmetic.
 -------------------------------------------------------------------------------
 */
-static float64 roundAndPackFloat64( flag zSign, int16 zExp, bits64 zSig )
+static inline float64
+roundAndPackFloat64( flag zSign, int16 zExp, bits64 zSig )
 {
     int8 roundingMode;
     flag roundNearestEven;
@@ -433,7 +310,7 @@
                    ( float_detect_tininess == float_tininess_before_rounding )
                 || ( zExp < -1 )
                 || ( zSig + roundIncrement < LIT64( 0x8000000000000000 ) );
-            shift64RightJamming( zSig, - zExp, &zSig );
+            shift64RightJammingFast( zSig, - zExp, &zSig );
             zExp = 0;
             roundBits = zSig & 0x3FF;
             if ( isTiny && roundBits ) float_raise( float_flag_underflow );
@@ -457,8 +334,8 @@
 point exponent.
 -------------------------------------------------------------------------------
 */
-static float64
- normalizeRoundAndPackFloat64( flag zSign, int16 zExp, bits64 zSig )
+static inline float64
+normalizeRoundAndPackFloat64( flag zSign, int16 zExp, bits64 zSig )
 {
     int8 shiftCount;
 
@@ -618,7 +495,7 @@
                    ( float_detect_tininess == float_tininess_before_rounding )
                 || ( zExp < 0 )
                 || ( zSig0 <= zSig0 + roundIncrement );
-            shift64RightJamming( zSig0, 1 - zExp, &zSig0 );
+            shift64RightJammingFast( zSig0, 1 - zExp, &zSig0 );
             zExp = 0;
             roundBits = zSig0 & roundMask;
             if ( isTiny && roundBits ) float_raise( float_flag_underflow );
@@ -853,7 +730,7 @@
     shiftCount = 0xAF - aExp;
     zSig = aSig;
     zSig <<= 32;
-    if ( 0 < shiftCount ) shift64RightJamming( zSig, shiftCount, &zSig );
+    if ( 0 < shiftCount ) shift64RightJammingFast( zSig, shiftCount, &zSig );
     return roundAndPackInt32( aSign, zSig );
 
 }
@@ -861,47 +738,6 @@
 /*
 -------------------------------------------------------------------------------
 Returns the result of converting the single-precision floating-point value
-`a' to the 32-bit two's complement integer format.  The conversion is
-performed according to the IEC/IEEE Standard for Binary Floating-point
-Arithmetic, except that the conversion is always rounded toward zero.  If
-`a' is a NaN, the largest positive integer is returned.  Otherwise, if the
-conversion overflows, the largest integer with the same sign as `a' is
-returned.
--------------------------------------------------------------------------------
-*/
-int32 float32_to_int32_round_to_zero( float32 a )
-{
-    flag aSign;
-    int16 aExp, shiftCount;
-    bits32 aSig;
-    int32 z;
-
-    aSig = extractFloat32Frac( a );
-    aExp = extractFloat32Exp( a );
-    aSign = extractFloat32Sign( a );
-    shiftCount = aExp - 0x9E;
-    if ( 0 <= shiftCount ) {
-        if ( a == 0xCF000000 ) return 0x80000000;
-        float_raise( float_flag_invalid );
-        if ( ! aSign || ( ( aExp == 0xFF ) && aSig ) ) return 0x7FFFFFFF;
-        return 0x80000000;
-    }
-    else if ( aExp <= 0x7E ) {
-        if ( aExp | aSig ) float_exception_flags |= float_flag_inexact;
-        return 0;
-    }
-    aSig = ( aSig | 0x00800000 )<<8;
-    z = aSig>>( - shiftCount );
-    if ( (bits32) ( aSig<<( shiftCount & 31 ) ) ) {
-        float_exception_flags |= float_flag_inexact;
-    }
-    return aSign ? - z : z;
-
-}
-
-/*
--------------------------------------------------------------------------------
-Returns the result of converting the single-precision floating-point value
 `a' to the double-precision floating-point format.  The conversion is
 performed according to the IEC/IEEE Standard for Binary Floating-point
 Arithmetic.
@@ -1032,7 +868,8 @@
 Floating-point Arithmetic.
 -------------------------------------------------------------------------------
 */
-static float32 addFloat32Sigs( float32 a, float32 b, flag zSign )
+static inline float32
+addFloat32Sigs( float32 a, float32 b, flag zSign )
 {
     int16 aExp, bExp, zExp;
     bits32 aSig, bSig, zSig;
@@ -1104,7 +941,8 @@
 Standard for Binary Floating-point Arithmetic.
 -------------------------------------------------------------------------------
 */
-static float32 subFloat32Sigs( float32 a, float32 b, flag zSign )
+static inline float32
+subFloat32Sigs( float32 a, float32 b, flag zSign )
 {
     int16 aExp, bExp, zExp;
     bits32 aSig, bSig, zSig;
@@ -1266,7 +1104,7 @@
     zExp = aExp + bExp - 0x7F;
     aSig = ( aSig | 0x00800000 )<<7;
     bSig = ( bSig | 0x00800000 )<<8;
-    shift64RightJamming( ( (bits64) aSig ) * bSig, 32, &zSig64 );
+    shift64RightJammingConst( ( (bits64) aSig ) * bSig, 32, &zSig64 );
     zSig = zSig64;
     if ( 0 <= (sbits32) ( zSig<<1 ) ) {
         zSig <<= 1;
@@ -1490,7 +1328,7 @@
             zSig |= ( rem != 0 );
         }
     }
-    shift32RightJamming( zSig, 1, &zSig );
+    shift32RightJammingConst( zSig, 1, &zSig );
     return roundAndPackFloat32( 0, zExp, zSig );
 
 }
@@ -1568,82 +1406,6 @@
 
 /*
 -------------------------------------------------------------------------------
-Returns 1 if the single-precision floating-point value `a' is equal to the
-corresponding value `b', and 0 otherwise.  The invalid exception is raised
-if either operand is a NaN.  Otherwise, the comparison is performed
-according to the IEC/IEEE Standard for Binary Floating-point Arithmetic.
--------------------------------------------------------------------------------
-*/
-flag float32_eq_signaling( float32 a, float32 b )
-{
-
-    if (    ( ( extractFloat32Exp( a ) == 0xFF ) && extractFloat32Frac( a ) )
-         || ( ( extractFloat32Exp( b ) == 0xFF ) && extractFloat32Frac( b ) )
-       ) {
-        float_raise( float_flag_invalid );
-        return 0;
-    }
-    return ( a == b ) || ( (bits32) ( ( a | b )<<1 ) == 0 );
-
-}
-
-/*
--------------------------------------------------------------------------------
-Returns 1 if the single-precision floating-point value `a' is less than or
-equal to the corresponding value `b', and 0 otherwise.  Quiet NaNs do not
-cause an exception.  Otherwise, the comparison is performed according to the
-IEC/IEEE Standard for Binary Floating-point Arithmetic.
--------------------------------------------------------------------------------
-*/
-flag float32_le_quiet( float32 a, float32 b )
-{
-    flag aSign, bSign;
-    //int16 aExp, bExp;
-
-    if (    ( ( extractFloat32Exp( a ) == 0xFF ) && extractFloat32Frac( a ) )
-         || ( ( extractFloat32Exp( b ) == 0xFF ) && extractFloat32Frac( b ) )
-       ) {
-        if ( float32_is_signaling_nan( a ) || float32_is_signaling_nan( b ) ) {
-            float_raise( float_flag_invalid );
-        }
-        return 0;
-    }
-    aSign = extractFloat32Sign( a );
-    bSign = extractFloat32Sign( b );
-    if ( aSign != bSign ) return aSign || ( (bits32) ( ( a | b )<<1 ) == 0 );
-    return ( a == b ) || ( aSign ^ ( a < b ) );
-
-}
-
-/*
--------------------------------------------------------------------------------
-Returns 1 if the single-precision floating-point value `a' is less than
-the corresponding value `b', and 0 otherwise.  Quiet NaNs do not cause an
-exception.  Otherwise, the comparison is performed according to the IEC/IEEE
-Standard for Binary Floating-point Arithmetic.
--------------------------------------------------------------------------------
-*/
-flag float32_lt_quiet( float32 a, float32 b )
-{
-    flag aSign, bSign;
-
-    if (    ( ( extractFloat32Exp( a ) == 0xFF ) && extractFloat32Frac( a ) )
-         || ( ( extractFloat32Exp( b ) == 0xFF ) && extractFloat32Frac( b ) )
-       ) {
-        if ( float32_is_signaling_nan( a ) || float32_is_signaling_nan( b ) ) {
-            float_raise( float_flag_invalid );
-        }
-        return 0;
-    }
-    aSign = extractFloat32Sign( a );
-    bSign = extractFloat32Sign( b );
-    if ( aSign != bSign ) return aSign && ( (bits32) ( ( a | b )<<1 ) != 0 );
-    return ( a != b ) && ( aSign ^ ( a < b ) );
-
-}
-
-/*
--------------------------------------------------------------------------------
 Returns the result of converting the double-precision floating-point value
 `a' to the 32-bit two's complement integer format.  The conversion is
 performed according to the IEC/IEEE Standard for Binary Floating-point
@@ -1665,7 +1427,7 @@
     if ( ( aExp == 0x7FF ) && aSig ) aSign = 0;
     if ( aExp ) aSig |= LIT64( 0x0010000000000000 );
     shiftCount = 0x42C - aExp;
-    if ( 0 < shiftCount ) shift64RightJamming( aSig, shiftCount, &aSig );
+    if ( 0 < shiftCount ) shift64RightJammingFast( aSig, shiftCount, &aSig );
     return roundAndPackInt32( aSign, aSig );
 
 }
@@ -1673,125 +1435,6 @@
 /*
 -------------------------------------------------------------------------------
 Returns the result of converting the double-precision floating-point value
-`a' to the 32-bit two's complement integer format.  The conversion is
-performed according to the IEC/IEEE Standard for Binary Floating-point
-Arithmetic, except that the conversion is always rounded toward zero.  If
-`a' is a NaN, the largest positive integer is returned.  Otherwise, if the
-conversion overflows, the largest integer with the same sign as `a' is
-returned.
--------------------------------------------------------------------------------
-*/
-int32 float64_to_int32_round_to_zero( float64 a )
-{
-    flag aSign;
-    int16 aExp, shiftCount;
-    bits64 aSig, savedASig;
-    int32 z;
-
-    aSig = extractFloat64Frac( a );
-    aExp = extractFloat64Exp( a );
-    aSign = extractFloat64Sign( a );
-    shiftCount = 0x433 - aExp;
-    if ( shiftCount < 21 ) {
-        if ( ( aExp == 0x7FF ) && aSig ) aSign = 0;
-        goto invalid;
-    }
-    else if ( 52 < shiftCount ) {
-        if ( aExp || aSig ) float_exception_flags |= float_flag_inexact;
-        return 0;
-    }
-    aSig |= LIT64( 0x0010000000000000 );
-    savedASig = aSig;
-    aSig >>= shiftCount;
-    z = aSig;
-    if ( aSign ) z = - z;
-    if ( ( z < 0 ) ^ aSign ) {
- invalid:
-        float_exception_flags |= float_flag_invalid;
-        return aSign ? 0x80000000 : 0x7FFFFFFF;
-    }
-    if ( ( aSig<<shiftCount ) != savedASig ) {
-        float_exception_flags |= float_flag_inexact;
-    }
-    return z;
-
-}
-
-/*
--------------------------------------------------------------------------------
-Returns the result of converting the double-precision floating-point value
-`a' to the 32-bit two's complement unsigned integer format.  The conversion
-is performed according to the IEC/IEEE Standard for Binary Floating-point
-Arithmetic---which means in particular that the conversion is rounded
-according to the current rounding mode.  If `a' is a NaN, the largest
-positive integer is returned.  Otherwise, if the conversion overflows, the
-largest positive integer is returned.
--------------------------------------------------------------------------------
-*/
-int32 float64_to_uint32( float64 a )
-{
-    flag aSign;
-    int16 aExp, shiftCount;
-    bits64 aSig;
-
-    aSig = extractFloat64Frac( a );
-    aExp = extractFloat64Exp( a );
-    aSign = 0; //extractFloat64Sign( a );
-    //if ( ( aExp == 0x7FF ) && aSig ) aSign = 0;
-    if ( aExp ) aSig |= LIT64( 0x0010000000000000 );
-    shiftCount = 0x42C - aExp;
-    if ( 0 < shiftCount ) shift64RightJamming( aSig, shiftCount, &aSig );
-    return roundAndPackInt32( aSign, aSig );
-}
-
-/*
--------------------------------------------------------------------------------
-Returns the result of converting the double-precision floating-point value
-`a' to the 32-bit two's complement integer format.  The conversion is
-performed according to the IEC/IEEE Standard for Binary Floating-point
-Arithmetic, except that the conversion is always rounded toward zero.  If
-`a' is a NaN, the largest positive integer is returned.  Otherwise, if the
-conversion overflows, the largest positive integer is returned.
--------------------------------------------------------------------------------
-*/
-int32 float64_to_uint32_round_to_zero( float64 a )
-{
-    flag aSign;
-    int16 aExp, shiftCount;
-    bits64 aSig, savedASig;
-    int32 z;
-
-    aSig = extractFloat64Frac( a );
-    aExp = extractFloat64Exp( a );
-    aSign = extractFloat64Sign( a );
-    shiftCount = 0x433 - aExp;
-    if ( shiftCount < 21 ) {
-        if ( ( aExp == 0x7FF ) && aSig ) aSign = 0;
-        goto invalid;
-    }
-    else if ( 52 < shiftCount ) {
-        if ( aExp || aSig ) float_exception_flags |= float_flag_inexact;
-        return 0;
-    }
-    aSig |= LIT64( 0x0010000000000000 );
-    savedASig = aSig;
-    aSig >>= shiftCount;
-    z = aSig;
-    if ( aSign ) z = - z;
-    if ( ( z < 0 ) ^ aSign ) {
- invalid:
-        float_exception_flags |= float_flag_invalid;
-        return aSign ? 0x80000000 : 0x7FFFFFFF;
-    }
-    if ( ( aSig<<shiftCount ) != savedASig ) {
-        float_exception_flags |= float_flag_inexact;
-    }
-    return z;
-}
-
-/*
--------------------------------------------------------------------------------
-Returns the result of converting the double-precision floating-point value
 `a' to the single-precision floating-point format.  The conversion is
 performed according to the IEC/IEEE Standard for Binary Floating-point
 Arithmetic.
@@ -1811,7 +1454,7 @@
         if ( aSig ) return commonNaNToFloat32( float64ToCommonNaN( a ) );
         return packFloat32( aSign, 0xFF, 0 );
     }
-    shift64RightJamming( aSig, 22, &aSig );
+    shift64RightJammingConst( aSig, 22, &aSig );
     zSig = aSig;
     if ( aExp || zSig ) {
         zSig |= 0x40000000;
@@ -1926,7 +1569,8 @@
 Floating-point Arithmetic.
 -------------------------------------------------------------------------------
 */
-static float64 addFloat64Sigs( float64 a, float64 b, flag zSign )
+static inline float64
+addFloat64Sigs( float64 a, float64 b, flag zSign )
 {
     int16 aExp, bExp, zExp;
     bits64 aSig, bSig, zSig;
@@ -1998,7 +1642,8 @@
 Standard for Binary Floating-point Arithmetic.
 -------------------------------------------------------------------------------
 */
-static float64 subFloat64Sigs( float64 a, float64 b, flag zSign )
+static inline float64
+subFloat64Sigs( float64 a, float64 b, flag zSign )
 {
     int16 aExp, bExp, zExp;
     bits64 aSig, bSig, zSig;
@@ -2381,7 +2026,7 @@
             zSig |= ( ( rem0 | rem1 ) != 0 );
         }
     }
-    shift64RightJamming( zSig, 1, &zSig );
+    shift64RightJammingConst( zSig, 1, &zSig );
     return roundAndPackFloat64( 0, zExp, zSig );
 
 }
@@ -2457,82 +2102,6 @@
 
 }
 
-/*
--------------------------------------------------------------------------------
-Returns 1 if the double-precision floating-point value `a' is equal to the
-corresponding value `b', and 0 otherwise.  The invalid exception is raised
-if either operand is a NaN.  Otherwise, the comparison is performed
-according to the IEC/IEEE Standard for Binary Floating-point Arithmetic.
--------------------------------------------------------------------------------
-*/
-flag float64_eq_signaling( float64 a, float64 b )
-{
-
-    if (    ( ( extractFloat64Exp( a ) == 0x7FF ) && extractFloat64Frac( a ) )
-         || ( ( extractFloat64Exp( b ) == 0x7FF ) && extractFloat64Frac( b ) )
-       ) {
-        float_raise( float_flag_invalid );
-        return 0;
-    }
-    return ( a == b ) || ( (bits64) ( ( a | b )<<1 ) == 0 );
-
-}
-
-/*
--------------------------------------------------------------------------------
-Returns 1 if the double-precision floating-point value `a' is less than or
-equal to the corresponding value `b', and 0 otherwise.  Quiet NaNs do not
-cause an exception.  Otherwise, the comparison is performed according to the
-IEC/IEEE Standard for Binary Floating-point Arithmetic.
--------------------------------------------------------------------------------
-*/
-flag float64_le_quiet( float64 a, float64 b )
-{
-    flag aSign, bSign;
-    //int16 aExp, bExp;
-
-    if (    ( ( extractFloat64Exp( a ) == 0x7FF ) && extractFloat64Frac( a ) )
-         || ( ( extractFloat64Exp( b ) == 0x7FF ) && extractFloat64Frac( b ) )
-       ) {
-        if ( float64_is_signaling_nan( a ) || float64_is_signaling_nan( b ) ) {
-            float_raise( float_flag_invalid );
-        }
-        return 0;
-    }
-    aSign = extractFloat64Sign( a );
-    bSign = extractFloat64Sign( b );
-    if ( aSign != bSign ) return aSign || ( (bits64) ( ( a | b )<<1 ) == 0 );
-    return ( a == b ) || ( aSign ^ ( a < b ) );
-
-}
-
-/*
--------------------------------------------------------------------------------
-Returns 1 if the double-precision floating-point value `a' is less than
-the corresponding value `b', and 0 otherwise.  Quiet NaNs do not cause an
-exception.  Otherwise, the comparison is performed according to the IEC/IEEE
-Standard for Binary Floating-point Arithmetic.
--------------------------------------------------------------------------------
-*/
-flag float64_lt_quiet( float64 a, float64 b )
-{
-    flag aSign, bSign;
-
-    if (    ( ( extractFloat64Exp( a ) == 0x7FF ) && extractFloat64Frac( a ) )
-         || ( ( extractFloat64Exp( b ) == 0x7FF ) && extractFloat64Frac( b ) )
-       ) {
-        if ( float64_is_signaling_nan( a ) || float64_is_signaling_nan( b ) ) {
-            float_raise( float_flag_invalid );
-        }
-        return 0;
-    }
-    aSign = extractFloat64Sign( a );
-    bSign = extractFloat64Sign( b );
-    if ( aSign != bSign ) return aSign && ( (bits64) ( ( a | b )<<1 ) != 0 );
-    return ( a != b ) && ( aSign ^ ( a < b ) );
-
-}
-
 #ifdef FLOATX80
 
 /*
@@ -2558,7 +2127,7 @@
     if ( ( aExp == 0x7FFF ) && (bits64) ( aSig<<1 ) ) aSign = 0;
     shiftCount = 0x4037 - aExp;
     if ( shiftCount <= 0 ) shiftCount = 1;
-    shift64RightJamming( aSig, shiftCount, &aSig );
+    shift64RightJammingFast( aSig, shiftCount, &aSig );
     return roundAndPackInt32( aSign, aSig );
 
 }
@@ -2632,7 +2201,7 @@
         }
         return packFloat32( aSign, 0xFF, 0 );
     }
-    shift64RightJamming( aSig, 33, &aSig );
+    shift64RightJammingConst( aSig, 33, &aSig );
     if ( aExp || aSig ) aExp -= 0x3F81;
     return roundAndPackFloat32( aSign, aExp, aSig );
 
@@ -2661,7 +2230,7 @@
         }
         return packFloat64( aSign, 0x7FF, 0 );
     }
-    shift64RightJamming( aSig, 1, &zSig );
+    shift64RightJammingConst( aSig, 1, &zSig );
     if ( aExp || aSig ) aExp -= 0x3C01;
     return roundAndPackFloat64( aSign, aExp, zSig );
 
diff -Nru linux_v18c/arch/arm/nwfpe/softfloat.h linux/arch/arm/nwfpe/softfloat.h
--- linux_v18c/arch/arm/nwfpe/softfloat.h	2006-04-08 17:52:56.000000000 +0900
+++ linux/arch/arm/nwfpe/softfloat.h	2006-04-06 09:13:06.000000000 +0900
@@ -108,6 +108,13 @@
 
 /*
 -------------------------------------------------------------------------------
+The pattern for a default generated single-precision NaN.
+-------------------------------------------------------------------------------
+*/
+#define float32_default_nan 0xFFFFFFFF
+
+/*
+-------------------------------------------------------------------------------
 Routine to raise any or all of the software IEC/IEEE floating-point
 exception flags.
 -------------------------------------------------------------------------------
@@ -131,7 +138,6 @@
 -------------------------------------------------------------------------------
 */
 signed int float32_to_int32( float32 );
-signed int float32_to_int32_round_to_zero( float32 );
 float64 float32_to_float64( float32 );
 #ifdef FLOATX80
 floatx80 float32_to_floatx80( float32 );
@@ -149,13 +155,9 @@
 float32 float32_div( float32, float32 );
 float32 float32_rem( float32, float32 );
 float32 float32_sqrt( float32 );
-char float32_eq( float32, float32 );
-char float32_le( float32, float32 );
-char float32_lt( float32, float32 );
-char float32_eq_signaling( float32, float32 );
-char float32_le_quiet( float32, float32 );
-char float32_lt_quiet( float32, float32 );
-char float32_is_signaling_nan( float32 );
+flag float32_eq( float32, float32 );
+flag float32_le( float32, float32 );
+flag float32_lt( float32, float32 );
 
 /*
 -------------------------------------------------------------------------------
@@ -163,7 +165,6 @@
 -------------------------------------------------------------------------------
 */
 signed int float64_to_int32( float64 );
-signed int float64_to_int32_round_to_zero( float64 );
 float32 float64_to_float32( float64 );
 #ifdef FLOATX80
 floatx80 float64_to_floatx80( float64 );
@@ -181,13 +182,10 @@
 float64 float64_div( float64, float64 );
 float64 float64_rem( float64, float64 );
 float64 float64_sqrt( float64 );
-char float64_eq( float64, float64 );
-char float64_le( float64, float64 );
-char float64_lt( float64, float64 );
-char float64_eq_signaling( float64, float64 );
-char float64_le_quiet( float64, float64 );
-char float64_lt_quiet( float64, float64 );
-char float64_is_signaling_nan( float64 );
+flag float64_eq( float64, float64 );
+flag float64_le( float64, float64 );
+flag float64_lt( float64, float64 );
+flag float64_is_signaling_nan( float64 );
 
 #ifdef FLOATX80
 
@@ -221,19 +219,132 @@
 floatx80 floatx80_div( floatx80, floatx80 );
 floatx80 floatx80_rem( floatx80, floatx80 );
 floatx80 floatx80_sqrt( floatx80 );
-char floatx80_eq( floatx80, floatx80 );
-char floatx80_le( floatx80, floatx80 );
-char floatx80_lt( floatx80, floatx80 );
-char floatx80_eq_signaling( floatx80, floatx80 );
-char floatx80_le_quiet( floatx80, floatx80 );
-char floatx80_lt_quiet( floatx80, floatx80 );
-char floatx80_is_signaling_nan( floatx80 );
+flag floatx80_eq( floatx80, floatx80 );
+flag floatx80_le( floatx80, floatx80 );
+flag floatx80_lt( floatx80, floatx80 );
+flag floatx80_eq_signaling( floatx80, floatx80 );
+flag floatx80_le_quiet( floatx80, floatx80 );
+flag floatx80_lt_quiet( floatx80, floatx80 );
+flag floatx80_is_signaling_nan( floatx80 );
 
 #endif
 
-static inline flag extractFloat32Sign(float32 a)
+/*
+-------------------------------------------------------------------------------
+Returns 1 if the single-precision floating-point value `a' is a NaN;
+otherwise returns 0.
+-------------------------------------------------------------------------------
+*/
+static inline flag float32_is_nan(float32 a)
+{
+	return (0xFF000000 < (bits32)(a << 1));
+}
+
+/*
+-------------------------------------------------------------------------------
+Returns 1 if the single-precision floating-point value `a' is a signaling
+NaN; otherwise returns 0.
+-------------------------------------------------------------------------------
+*/
+static inline flag float32_is_signaling_nan(float32 a)
+{
+	return (((a >> 22) & 0x1FF) == 0x1FE) && (a & 0x003FFFFF);
+}
+
+/*
+-------------------------------------------------------------------------------
+Returns 1 if the double-precision floating-point value `a' is a NaN;
+otherwise returns 0.
+-------------------------------------------------------------------------------
+*/
+static inline flag float64_is_nan(float64 a)
+{
+	return (LIT64(0xFFE0000000000000) < (bits64)(a << 1));
+}
+
+/*
+-------------------------------------------------------------------------------
+Returns the fraction bits of the single-precision floating-point value `a'.
+-------------------------------------------------------------------------------
+*/
+#define extractFloat32Frac(a)	(bits32)(((float32)(a)) & 0x007FFFFF)
+
+/*
+-------------------------------------------------------------------------------
+Returns the exponent bits of the single-precision floating-point value `a'.
+-------------------------------------------------------------------------------
+*/
+#define extractFloat32Exp(a)	(int16)((((float32)(a)) >> 23) & 0xFF)
+
+/*
+-------------------------------------------------------------------------------
+Returns the sign bit of the single-precision floating-point value `a'.
+-------------------------------------------------------------------------------
+*/
+#define extractFloat32Sign(a)	(((float32)(a)) >> 31)
+
+/*
+-------------------------------------------------------------------------------
+Packs the sign `zSign', exponent `zExp', and significand `zSig' into a
+single-precision floating-point value, returning the result.  After being
+shifted into the proper positions, the three fields are simply added
+together to form the result.  This means that any integer portion of `zSig'
+will be added into the exponent.  Since a properly normalized significand
+will have an integer portion equal to 1, the `zExp' input should be 1 less
+than the desired result exponent whenever `zSig' is a complete, normalized
+significand.
+-------------------------------------------------------------------------------
+*/
+static inline float32 packFloat32(flag zSign, int16 zExp, bits32 zSig)
+{
+	return (((bits32)zSign) << 31) + (((bits32)zExp) << 23) + zSig;
+}
+
+/*
+-------------------------------------------------------------------------------
+Returns the fraction bits of the double-precision floating-point value `a'.
+-------------------------------------------------------------------------------
+*/
+static inline bits64 extractFloat64Frac(float64 a)
+{
+	return a & LIT64(0x000FFFFFFFFFFFFF);
+}
+
+/*
+-------------------------------------------------------------------------------
+Returns the exponent bits of the double-precision floating-point value `a'.
+-------------------------------------------------------------------------------
+*/
+static inline int16 extractFloat64Exp(float64 a)
+{
+	return (a >> 52) & 0x7FF;
+}
+
+/*
+-------------------------------------------------------------------------------
+Returns the sign bit of the double-precision floating-point value `a'.
+-------------------------------------------------------------------------------
+*/
+static inline flag extractFloat64Sign(float64 a)
+{
+	return a >> 63;
+}
+
+/*
+-------------------------------------------------------------------------------
+Packs the sign `zSign', exponent `zExp', and significand `zSig' into a
+double-precision floating-point value, returning the result.  After being
+shifted into the proper positions, the three fields are simply added
+together to form the result.  This means that any integer portion of `zSig'
+will be added into the exponent.  Since a properly normalized significand
+will have an integer portion equal to 1, the `zExp' input should be 1 less
+than the desired result exponent whenever `zSig' is a complete, normalized
+significand.
+-------------------------------------------------------------------------------
+*/
+static inline float64 packFloat64(flag zSign, int16 zExp, bits64 zSig)
 {
-	return a >> 31;
+	return (((bits64)zSign) << 63) + (((bits64)zExp) << 52) + zSig;
 }
 
 static inline flag float32_eq_nocheck(float32 a, float32 b)
@@ -252,11 +363,6 @@
 	return (a != b) && (aSign ^ (a < b));
 }
 
-static inline flag extractFloat64Sign(float64 a)
-{
-	return a >> 63;
-}
-
 static inline flag float64_eq_nocheck(float64 a, float64 b)
 {
 	return (a == b) || ((bits64) ((a | b) << 1) == 0);
diff -Nru linux_v18c/arch/arm/nwfpe/uaccsess_nwfpe.S linux/arch/arm/nwfpe/uaccsess_nwfpe.S
--- linux_v18c/arch/arm/nwfpe/uaccsess_nwfpe.S	1970-01-01 09:00:00.000000000 +0900
+++ linux/arch/arm/nwfpe/uaccsess_nwfpe.S	2006-04-08 15:54:31.000000000 +0900
@@ -0,0 +1,27 @@
+#include <asm/constants.h>
+#include <asm/errno.h>
+
+	.global	__get_user_nwfpe
+__get_user_nwfpe:
+1:	ldrt	r1, [r0]
+	mov	r0, #0
+	mov	pc, lr
+
+__get_user_nwfpe_bad:
+	mov	r1, #0
+	mov	r0, #-EFAULT
+	mov	pc, lr
+
+	.global	__put_user_nwfpe
+__put_user_nwfpe:
+2:	strt	r1, [r0]
+	mov	r0, #0
+	mov	pc, lr
+
+__put_user_nwfpe_bad:
+	mov	r0, #-EFAULT
+	mov	pc, lr
+
+.section __ex_table, "a"
+	.long	1b, __get_user_nwfpe_bad
+	.long	2b, __put_user_nwfpe_bad