From 1f84894efaa1f703f85c704b02b15ea540983c09 Mon Sep 17 00:00:00 2001
From: Paul Brook <paul@codesourcery.com>
Date: Mon, 2 Nov 2009 13:43:59 +0000
Subject: [PATCH] 2009-11-02  Paul Brook  <paul@codesourcery.com>

	ld/testsuite/
	* ld-arm/arm-elf.exp: Add new attr-merge-vfp tests.
	* ld-arm/attr-merge-vfp-1.d: New test.
	* ld-arm/attr-merge-vfp-1r.d: New test.
	* ld-arm/attr-merge-vfp-2.d: New test.
	* ld-arm/attr-merge-vfp-2r.d: New test.
	* ld-arm/attr-merge-vfp-3.d: New test.
	* ld-arm/attr-merge-vfp-3r.d: New test.
	* ld-arm/attr-merge-vfp-4.d: New test.
	* ld-arm/attr-merge-vfp-4r.d: New test.
	* ld-arm/attr-merge-vfp-5.d: New test.
	* ld-arm/attr-merge-vfp-5r.d: New test.
	* ld-arm/attr-merge-vfp-2.s: New test.
	* ld-arm/attr-merge-vfp-3.s: New test.
	* ld-arm/attr-merge-vfp-3-d16.s: New test.
	* ld-arm/attr-merge-vfp-4.s: New test.
	* ld-arm/attr-merge-vfp-4-d16.s: New test.

	gas/
	* doc/c-arm.texi: Document new -mfpu options.
	* config/tc-arm.c (fpu_vfp_ext_v3xd, fpu_vfp_fp16, fpu_neon_ext_fma,
	fpu_vfp_ext_fma): New.
	(NEON_ENC_TAB): Add vfma, vfms, vfnma and vfnms.
	(do_vfp_nsyn_fma_fms, do_neon_fmac): New functions.
	(insns): Move double precision load/store.  Split out double
	precision VFPv3 instrucitons.  Add VFPv4 instructions.
	(arm_fpus): Add VFPv3-FP16, VFPv3xD and VFPv4 variants.
	(aeabi_set_public_attributes): Set VFPv4 variants

	gas/testsuite/
	* gas/arm/attr-mfpu-vfpv4.d: New test.
	* gas/arm/attr-mfpu-vfpv4-d16.d: New test.
	* gas/arm/neon-fma-cov.d: New test.
	* gas/arm/neon-fma-cov.s: New test.
	* gas/arm/vfp-fma-inc.s: New test.
	* gas/arm/vfp-fma-arm.d: New test.
	* gas/arm/vfp-fma-arm.s: New test.
	* gas/arm/vfp-fma-thumb.d: New test.
	* gas/arm/vfp-fma-thumb.s: New test.
	* gas/arm/vfma1.d: New test.
	* gas/arm/vfma1.s: New test.
	* gas/arm/vfpv3xd.d: New test.
	* gas/arm/vfpv3xd.s: New test.

	include/opcode/
	* arm.h (FPU_VFP_EXT_V3xD, FPU_VFP_EXT_FP16, FPU_NEON_EXT_FMA,
	FPU_VFP_EXT_FMA, FPU_VFP_V3xD, FPU_VFP_V4D16, FPU_VFP_V4): Define.
	(FPU_ARCH_VFP_V3D16_FP16, FPU_ARCH_VFP_V3_FP16, FPU_ARCH_VFP_V3xD,
	FPU_ARCH_VFP_V3xD_FP16, FPU_ARCH_VFP_V4, FPU_ARCH_VFP_V4D16,
	FPU_ARCH_NEON_VFP_V4): Define.

	binutils/
	* readelf.c (arm_attr_tag_VFP_arch): Add VFPv4 and VFPv4-D16.

	bfd/
	* elf32-arm.c (elf32_arm_merge_eabi_attributes): Handle VFPv4
	attributes.

	opcodes/
	* arm-dis.c (coprocessor_opcodes): Update to use new feature flags.
	Add VFPv4 instructions.
---
 bfd/ChangeLog            |  5 +++++
 bfd/elf32-arm.c          | 52 ++++++++++++++++++++++++++++++++++++++++--------
 include/opcode/ChangeLog |  8 ++++++++
 include/opcode/arm.h     | 28 ++++++++++++++++++++------
 opcodes/ChangeLog        |  5 +++++
 opcodes/arm-dis.c        | 28 +++++++++++++++++++-------
 6 files changed, 105 insertions(+), 21 deletions(-)

diff --git a/bfd/ChangeLog b/bfd/ChangeLog
index 52265cb3db..ce48a24ec6 100644
--- a/bfd/ChangeLog
+++ b/bfd/ChangeLog
@@ -1,3 +1,8 @@
+2009-11-02  Paul Brook  <paul@codesourcery.com>
+
+	* elf32-arm.c (elf32_arm_merge_eabi_attributes): Handle VFPv4
+	attributes.
+
 2009-11-02  Alan Modra  <amodra@bigpond.net.au>
 
 	* elflink.c (elf_link_add_object_symbols): Don't force debug
diff --git a/bfd/elf32-arm.c b/bfd/elf32-arm.c
index b449ee8333..caa3bd2006 100644
--- a/bfd/elf32-arm.c
+++ b/bfd/elf32-arm.c
@@ -9730,8 +9730,6 @@ elf32_arm_merge_eabi_attributes (bfd *ibfd, bfd *obfd)
   /* Some tags have 0 = don't care, 1 = strong requirement,
      2 = weak requirement.  */
   static const int order_021[3] = {0, 2, 1};
-  /* For use with Tag_VFP_arch.  */
-  static const int order_01243[5] = {0, 1, 2, 4, 3};
   int i;
   bfd_boolean result = TRUE;
 
@@ -9923,12 +9921,50 @@ elf32_arm_merge_eabi_attributes (bfd *ibfd, bfd *obfd)
 	    }
 	  break;
 	case Tag_VFP_arch:
-	  /* Use the "greatest" from the sequence 0, 1, 2, 4, 3, or the
-	     largest value if greater than 4 (for future-proofing).  */
-	  if ((in_attr[i].i > 4 && in_attr[i].i > out_attr[i].i)
-	      || (in_attr[i].i <= 4 && out_attr[i].i <= 4
-		  && order_01243[in_attr[i].i] > order_01243[out_attr[i].i]))
-	    out_attr[i].i = in_attr[i].i;
+	    {
+	      static const struct
+	      {
+		  int ver;
+		  int regs;
+	      } vfp_versions[7] =
+		{
+		  {0, 0},
+		  {1, 16},
+		  {2, 16},
+		  {3, 32},
+		  {3, 16},
+		  {4, 32},
+		  {4, 16}
+		};
+	      int ver;
+	      int regs;
+	      int newval;
+
+	      /* Values greater than 6 aren't defined, so just pick the
+	         biggest */
+	      if (in_attr[i].i > 6 && in_attr[i].i > out_attr[i].i)
+		{
+		  out_attr[i] = in_attr[i];
+		  break;
+		}
+	      /* The output uses the superset of input features
+		 (ISA version) and registers.  */
+	      ver = vfp_versions[in_attr[i].i].ver;
+	      if (ver < vfp_versions[out_attr[i].i].ver)
+		ver = vfp_versions[out_attr[i].i].ver;
+	      regs = vfp_versions[in_attr[i].i].regs;
+	      if (regs < vfp_versions[out_attr[i].i].regs)
+		regs = vfp_versions[out_attr[i].i].regs;
+	      /* This assumes all possible supersets are also a valid
+	         options.  */
+	      for (newval = 6; newval > 0; newval--)
+		{
+		  if (regs == vfp_versions[newval].regs
+		      && ver == vfp_versions[newval].ver)
+		    break;
+		}
+	      out_attr[i].i = newval;
+	    }
 	  break;
 	case Tag_PCS_config:
 	  if (out_attr[i].i == 0)
diff --git a/include/opcode/ChangeLog b/include/opcode/ChangeLog
index ec2bf97dda..dacf3ee1c7 100644
--- a/include/opcode/ChangeLog
+++ b/include/opcode/ChangeLog
@@ -1,3 +1,11 @@
+2009-11-02  Paul Brook  <paul@codesourcery.com>
+
+	* arm.h (FPU_VFP_EXT_V3xD, FPU_VFP_EXT_FP16, FPU_NEON_EXT_FMA,
+	FPU_VFP_EXT_FMA, FPU_VFP_V3xD, FPU_VFP_V4D16, FPU_VFP_V4): Define.
+	(FPU_ARCH_VFP_V3D16_FP16, FPU_ARCH_VFP_V3_FP16, FPU_ARCH_VFP_V3xD,
+	FPU_ARCH_VFP_V3xD_FP16, FPU_ARCH_VFP_V4, FPU_ARCH_VFP_V4D16,
+	FPU_ARCH_NEON_VFP_V4): Define.
+
 2009-10-23  Doug Evans  <dje@sebabeach.org>
 
 	* cgen-bitset.h: Delete, moved to ../cgen/bitset.h.
diff --git a/include/opcode/arm.h b/include/opcode/arm.h
index a639a8b041..459a80338b 100644
--- a/include/opcode/arm.h
+++ b/include/opcode/arm.h
@@ -62,10 +62,13 @@
 #define FPU_VFP_EXT_V1xD 0x08000000	/* Base VFP instruction set.  */
 #define FPU_VFP_EXT_V1	 0x04000000	/* Double-precision insns.    */
 #define FPU_VFP_EXT_V2	 0x02000000	/* ARM10E VFPr1.	      */
-#define FPU_VFP_EXT_V3	 0x01000000	/* VFPv3 insns.	              */
-#define FPU_NEON_EXT_V1	 0x00800000	/* Neon (SIMD) insns.	      */
-#define FPU_VFP_EXT_D32  0x00400000	/* Registers D16-D31.	      */
-#define FPU_NEON_FP16	 0x00200000	/* Half-precision extensions. */
+#define FPU_VFP_EXT_V3xD 0x01000000	/* VFPv3 single-precision.    */
+#define FPU_VFP_EXT_V3	 0x00800000	/* VFPv3 double-precision.    */
+#define FPU_NEON_EXT_V1	 0x00400000	/* Neon (SIMD) insns.	      */
+#define FPU_VFP_EXT_D32  0x00200000	/* Registers D16-D31.	      */
+#define FPU_VFP_EXT_FP16 0x00100000	/* Half-precision extensions. */
+#define FPU_NEON_EXT_FMA 0x00080000	/* Neon fused multiply-add    */
+#define FPU_VFP_EXT_FMA	 0x00040000	/* VFP fused multiply-add     */
 
 /* Architectures are the sum of the base and extensions.  The ARM ARM (rev E)
    defines the following: ARMv3, ARMv3M, ARMv4xM, ARMv4, ARMv4TxM, ARMv4T,
@@ -120,9 +123,13 @@
 #define FPU_VFP_V1xD	(FPU_VFP_EXT_V1xD | FPU_ENDIAN_PURE)
 #define FPU_VFP_V1	(FPU_VFP_V1xD | FPU_VFP_EXT_V1)
 #define FPU_VFP_V2	(FPU_VFP_V1 | FPU_VFP_EXT_V2)
-#define FPU_VFP_V3D16	(FPU_VFP_V2 | FPU_VFP_EXT_V3)
+#define FPU_VFP_V3D16	(FPU_VFP_V2 | FPU_VFP_EXT_V3xD | FPU_VFP_EXT_V3)
 #define FPU_VFP_V3	(FPU_VFP_V3D16 | FPU_VFP_EXT_D32)
+#define FPU_VFP_V3xD	(FPU_VFP_V1xD | FPU_VFP_EXT_V2 | FPU_VFP_EXT_V3xD)
+#define FPU_VFP_V4D16	(FPU_VFP_V3D16 | FPU_VFP_EXT_FP16 | FPU_VFP_EXT_FMA)
+#define FPU_VFP_V4	(FPU_VFP_V3 | FPU_VFP_EXT_FP16 | FPU_VFP_EXT_FMA)
 #define FPU_VFP_HARD	(FPU_VFP_EXT_V1xD | FPU_VFP_EXT_V1 | FPU_VFP_EXT_V2 \
+			 | FPU_VFP_EXT_V3xD | FPU_VFP_EXT_FMA | FPU_NEON_EXT_FMA \
                          | FPU_VFP_EXT_V3 | FPU_NEON_EXT_V1 | FPU_VFP_EXT_D32)
 #define FPU_FPA		(FPU_FPA_EXT_V1 | FPU_FPA_EXT_V2)
 
@@ -136,13 +143,22 @@
 #define FPU_ARCH_VFP_V1	  ARM_FEATURE (0, FPU_VFP_V1)
 #define FPU_ARCH_VFP_V2	  ARM_FEATURE (0, FPU_VFP_V2)
 #define FPU_ARCH_VFP_V3D16	ARM_FEATURE (0, FPU_VFP_V3D16)
+#define FPU_ARCH_VFP_V3D16_FP16 \
+  ARM_FEATURE (0, FPU_VFP_V3D16 | FPU_VFP_EXT_FP16)
 #define FPU_ARCH_VFP_V3	  ARM_FEATURE (0, FPU_VFP_V3)
+#define FPU_ARCH_VFP_V3_FP16	ARM_FEATURE (0, FPU_VFP_V3 | FPU_VFP_EXT_FP16)
+#define FPU_ARCH_VFP_V3xD	ARM_FEATURE (0, FPU_VFP_V3xD)
+#define FPU_ARCH_VFP_V3xD_FP16	ARM_FEATURE (0, FPU_VFP_V3xD | FPU_VFP_EXT_FP16)
 #define FPU_ARCH_NEON_V1  ARM_FEATURE (0, FPU_NEON_EXT_V1)
 #define FPU_ARCH_VFP_V3_PLUS_NEON_V1 \
   ARM_FEATURE (0, FPU_VFP_V3 | FPU_NEON_EXT_V1)
 #define FPU_ARCH_NEON_FP16 \
-  ARM_FEATURE (0, FPU_VFP_V3 | FPU_NEON_EXT_V1 | FPU_NEON_FP16)
+  ARM_FEATURE (0, FPU_VFP_V3 | FPU_NEON_EXT_V1 | FPU_VFP_EXT_FP16)
 #define FPU_ARCH_VFP_HARD ARM_FEATURE (0, FPU_VFP_HARD)
+#define FPU_ARCH_VFP_V4 ARM_FEATURE(0, FPU_VFP_V4)
+#define FPU_ARCH_VFP_V4D16 ARM_FEATURE(0, FPU_VFP_V4D16)
+#define FPU_ARCH_NEON_VFP_V4 \
+  ARM_FEATURE(0, FPU_VFP_V4 | FPU_NEON_EXT_V1 | FPU_NEON_EXT_FMA)
 
 #define FPU_ARCH_ENDIAN_PURE ARM_FEATURE (0, FPU_ENDIAN_PURE)
 
diff --git a/opcodes/ChangeLog b/opcodes/ChangeLog
index 4814ba3fea..c8f9195d91 100644
--- a/opcodes/ChangeLog
+++ b/opcodes/ChangeLog
@@ -1,3 +1,8 @@
+2009-11-02  Paul Brook  <paul@codesourcery.com>
+
+	* arm-dis.c (coprocessor_opcodes): Update to use new feature flags.
+	Add VFPv4 instructions.
+
 2009-10-29  Sebastian Pop  <sebastian.pop@amd.com>
 
 	* i386-dis.c (OP_VEX_FMA): Removed.
diff --git a/opcodes/arm-dis.c b/opcodes/arm-dis.c
index 7551249904..1616fed92d 100644
--- a/opcodes/arm-dis.c
+++ b/opcodes/arm-dis.c
@@ -289,8 +289,8 @@ static const struct opcode32 coprocessor_opcodes[] =
   {FPU_NEON_EXT_V1, 0x0e400b10, 0x0fd00f10, "vmov%c.8\t%16-19,7D[%5,6,21d], %12-15r"},
   {FPU_NEON_EXT_V1, 0x0e500b10, 0x0f500f10, "vmov%c.%23?us8\t%12-15r, %16-19,7D[%5,6,21d]"},
   /* Half-precision conversion instructions.  */
-  {FPU_NEON_FP16,   0x0eb20a40, 0x0fbf0f50, "vcvt%7?tb%c.f32.f16\t%y1, %y0"},
-  {FPU_NEON_FP16,   0x0eb30a40, 0x0fbf0f50, "vcvt%7?tb%c.f16.f32\t%y1, %y0"},
+  {FPU_VFP_EXT_FP16, 0x0eb20a40, 0x0fbf0f50, "vcvt%7?tb%c.f32.f16\t%y1, %y0"},
+  {FPU_VFP_EXT_FP16, 0x0eb30a40, 0x0fbf0f50, "vcvt%7?tb%c.f16.f32\t%y1, %y0"},
 
   /* Floating point coprocessor (VFP) instructions.  */
   {FPU_VFP_EXT_V1xD, 0x0ee00a10, 0x0fff0fff, "vmsr%c\tfpsid, %12-15r"},
@@ -330,14 +330,14 @@ static const struct opcode32 coprocessor_opcodes[] =
   {FPU_VFP_EXT_V1, 0x0eb80b40, 0x0fbf0f50, "vcvt%c.f64.%7?su32\t%z1, %y0"},
   {FPU_VFP_EXT_V1xD, 0x0eb40a40, 0x0fbf0f50, "vcmp%7'e%c.f32\t%y1, %y0"},
   {FPU_VFP_EXT_V1, 0x0eb40b40, 0x0fbf0f50, "vcmp%7'e%c.f64\t%z1, %z0"},
-  {FPU_VFP_EXT_V3, 0x0eba0a40, 0x0fbe0f50, "vcvt%c.f32.%16?us%7?31%7?26\t%y1, %y1, #%5,0-3k"},
+  {FPU_VFP_EXT_V3xD, 0x0eba0a40, 0x0fbe0f50, "vcvt%c.f32.%16?us%7?31%7?26\t%y1, %y1, #%5,0-3k"},
   {FPU_VFP_EXT_V3, 0x0eba0b40, 0x0fbe0f50, "vcvt%c.f64.%16?us%7?31%7?26\t%z1, %z1, #%5,0-3k"},
   {FPU_VFP_EXT_V1xD, 0x0ebc0a40, 0x0fbe0f50, "vcvt%7`r%c.%16?su32.f32\t%y1, %y0"},
   {FPU_VFP_EXT_V1, 0x0ebc0b40, 0x0fbe0f50, "vcvt%7`r%c.%16?su32.f64\t%y1, %z0"},
-  {FPU_VFP_EXT_V3, 0x0ebe0a40, 0x0fbe0f50, "vcvt%c.%16?us%7?31%7?26.f32\t%y1, %y1, #%5,0-3k"},
+  {FPU_VFP_EXT_V3xD, 0x0ebe0a40, 0x0fbe0f50, "vcvt%c.%16?us%7?31%7?26.f32\t%y1, %y1, #%5,0-3k"},
   {FPU_VFP_EXT_V3, 0x0ebe0b40, 0x0fbe0f50, "vcvt%c.%16?us%7?31%7?26.f64\t%z1, %z1, #%5,0-3k"},
   {FPU_VFP_EXT_V1, 0x0c500b10, 0x0fb00ff0, "vmov%c\t%12-15r, %16-19r, %z0"},
-  {FPU_VFP_EXT_V3, 0x0eb00a00, 0x0fb00ff0, "vmov%c.f32\t%y1, #%0-3,16-19d"},
+  {FPU_VFP_EXT_V3xD, 0x0eb00a00, 0x0fb00ff0, "vmov%c.f32\t%y1, #%0-3,16-19d"},
   {FPU_VFP_EXT_V3, 0x0eb00b00, 0x0fb00ff0, "vmov%c.f64\t%z1, #%0-3,16-19d"},
   {FPU_VFP_EXT_V2, 0x0c400a10, 0x0ff00fd0, "vmov%c\t%y4, %12-15r, %16-19r"},
   {FPU_VFP_EXT_V2, 0x0c400b10, 0x0ff00fd0, "vmov%c\t%z0, %12-15r, %16-19r"},
@@ -447,6 +447,16 @@ static const struct opcode32 coprocessor_opcodes[] =
   {ARM_CEXT_MAVERICK, 0x0e200600, 0x0ff00f10, "cfmadda32%c\tmvax%5-7d, mvax%12-15d, mvfx%16-19d, mvfx%0-3d"},
   {ARM_CEXT_MAVERICK, 0x0e300600, 0x0ff00f10, "cfmsuba32%c\tmvax%5-7d, mvax%12-15d, mvfx%16-19d, mvfx%0-3d"},
 
+  /* VFP Fused multiply add instructions.  */
+  {FPU_VFP_EXT_FMA, 0x0ea00a00, 0x0fb00f50, "vfma%c.f32\t%y1, %y2, %y0"},
+  {FPU_VFP_EXT_FMA, 0x0ea00b00, 0x0fb00f50, "vfma%c.f64\t%z1, %z2, %z0"},
+  {FPU_VFP_EXT_FMA, 0x0ea00a40, 0x0fb00f50, "vfms%c.f32\t%y1, %y2, %y0"},
+  {FPU_VFP_EXT_FMA, 0x0ea00b40, 0x0fb00f50, "vfms%c.f64\t%z1, %z2, %z0"},
+  {FPU_VFP_EXT_FMA, 0x0e900a40, 0x0fb00f50, "vfnma%c.f32\t%y1, %y2, %y0"},
+  {FPU_VFP_EXT_FMA, 0x0e900b40, 0x0fb00f50, "vfnma%c.f64\t%z1, %z2, %z0"},
+  {FPU_VFP_EXT_FMA, 0x0e900a00, 0x0fb00f50, "vfnms%c.f32\t%y1, %y2, %y0"},
+  {FPU_VFP_EXT_FMA, 0x0e900b00, 0x0fb00f50, "vfnms%c.f64\t%z1, %z2, %z0"},
+
   /* Generic coprocessor instructions.  */
   { 0, SENTINEL_GENERIC_START, 0, "" },
   {ARM_EXT_V5E, 0x0c400000, 0x0ff00000, "mcrr%c\t%8-11d, %4-7d, %12-15r, %16-19r, cr%0-3d"},
@@ -517,8 +527,12 @@ static const struct opcode32 neon_opcodes[] =
   {FPU_NEON_EXT_V1, 0xf3b00840, 0xffb00c50, "vtbx%c.8\t%12-15,22D, %F, %0-3,5D"},
   
   /* Half-precision conversions.  */
-  {FPU_NEON_FP16,   0xf3b60600, 0xffbf0fd0, "vcvt%c.f16.f32\t%12-15,22D, %0-3,5Q"},
-  {FPU_NEON_FP16,   0xf3b60700, 0xffbf0fd0, "vcvt%c.f32.f16\t%12-15,22Q, %0-3,5D"},
+  {FPU_VFP_EXT_FP16, 0xf3b60600, 0xffbf0fd0, "vcvt%c.f16.f32\t%12-15,22D, %0-3,5Q"},
+  {FPU_VFP_EXT_FP16, 0xf3b60700, 0xffbf0fd0, "vcvt%c.f32.f16\t%12-15,22Q, %0-3,5D"},
+
+  /* NEON fused multiply add instructions.  */
+  {FPU_NEON_EXT_FMA, 0xf2000c10, 0xffa00f10, "vfma%c.f%20U0\t%12-15,22R, %16-19,7R, %0-3,5R"},
+  {FPU_NEON_EXT_FMA, 0xf2200c10, 0xffa00f10, "vfms%c.f%20U0\t%12-15,22R, %16-19,7R, %0-3,5R"},
 
   /* Two registers, miscellaneous.  */
   {FPU_NEON_EXT_V1, 0xf2880a10, 0xfebf0fd0, "vmovl%c.%24?us8\t%12-15,22Q, %0-3,5D"},
-- 
2.11.0