From 1f84894efaa1f703f85c704b02b15ea540983c09 Mon Sep 17 00:00:00 2001 From: Paul Brook Date: Mon, 2 Nov 2009 13:43:59 +0000 Subject: [PATCH] 2009-11-02 Paul Brook ld/testsuite/ * ld-arm/arm-elf.exp: Add new attr-merge-vfp tests. * ld-arm/attr-merge-vfp-1.d: New test. * ld-arm/attr-merge-vfp-1r.d: New test. * ld-arm/attr-merge-vfp-2.d: New test. * ld-arm/attr-merge-vfp-2r.d: New test. * ld-arm/attr-merge-vfp-3.d: New test. * ld-arm/attr-merge-vfp-3r.d: New test. * ld-arm/attr-merge-vfp-4.d: New test. * ld-arm/attr-merge-vfp-4r.d: New test. * ld-arm/attr-merge-vfp-5.d: New test. * ld-arm/attr-merge-vfp-5r.d: New test. * ld-arm/attr-merge-vfp-2.s: New test. * ld-arm/attr-merge-vfp-3.s: New test. * ld-arm/attr-merge-vfp-3-d16.s: New test. * ld-arm/attr-merge-vfp-4.s: New test. * ld-arm/attr-merge-vfp-4-d16.s: New test. gas/ * doc/c-arm.texi: Document new -mfpu options. * config/tc-arm.c (fpu_vfp_ext_v3xd, fpu_vfp_fp16, fpu_neon_ext_fma, fpu_vfp_ext_fma): New. (NEON_ENC_TAB): Add vfma, vfms, vfnma and vfnms. (do_vfp_nsyn_fma_fms, do_neon_fmac): New functions. (insns): Move double precision load/store. Split out double precision VFPv3 instrucitons. Add VFPv4 instructions. (arm_fpus): Add VFPv3-FP16, VFPv3xD and VFPv4 variants. (aeabi_set_public_attributes): Set VFPv4 variants gas/testsuite/ * gas/arm/attr-mfpu-vfpv4.d: New test. * gas/arm/attr-mfpu-vfpv4-d16.d: New test. * gas/arm/neon-fma-cov.d: New test. * gas/arm/neon-fma-cov.s: New test. * gas/arm/vfp-fma-inc.s: New test. * gas/arm/vfp-fma-arm.d: New test. * gas/arm/vfp-fma-arm.s: New test. * gas/arm/vfp-fma-thumb.d: New test. * gas/arm/vfp-fma-thumb.s: New test. * gas/arm/vfma1.d: New test. * gas/arm/vfma1.s: New test. * gas/arm/vfpv3xd.d: New test. * gas/arm/vfpv3xd.s: New test. include/opcode/ * arm.h (FPU_VFP_EXT_V3xD, FPU_VFP_EXT_FP16, FPU_NEON_EXT_FMA, FPU_VFP_EXT_FMA, FPU_VFP_V3xD, FPU_VFP_V4D16, FPU_VFP_V4): Define. (FPU_ARCH_VFP_V3D16_FP16, FPU_ARCH_VFP_V3_FP16, FPU_ARCH_VFP_V3xD, FPU_ARCH_VFP_V3xD_FP16, FPU_ARCH_VFP_V4, FPU_ARCH_VFP_V4D16, FPU_ARCH_NEON_VFP_V4): Define. binutils/ * readelf.c (arm_attr_tag_VFP_arch): Add VFPv4 and VFPv4-D16. bfd/ * elf32-arm.c (elf32_arm_merge_eabi_attributes): Handle VFPv4 attributes. opcodes/ * arm-dis.c (coprocessor_opcodes): Update to use new feature flags. Add VFPv4 instructions. --- bfd/ChangeLog | 5 +++++ bfd/elf32-arm.c | 52 ++++++++++++++++++++++++++++++++++++++++-------- include/opcode/ChangeLog | 8 ++++++++ include/opcode/arm.h | 28 ++++++++++++++++++++------ opcodes/ChangeLog | 5 +++++ opcodes/arm-dis.c | 28 +++++++++++++++++++------- 6 files changed, 105 insertions(+), 21 deletions(-) diff --git a/bfd/ChangeLog b/bfd/ChangeLog index 52265cb3db..ce48a24ec6 100644 --- a/bfd/ChangeLog +++ b/bfd/ChangeLog @@ -1,3 +1,8 @@ +2009-11-02 Paul Brook + + * elf32-arm.c (elf32_arm_merge_eabi_attributes): Handle VFPv4 + attributes. + 2009-11-02 Alan Modra * elflink.c (elf_link_add_object_symbols): Don't force debug diff --git a/bfd/elf32-arm.c b/bfd/elf32-arm.c index b449ee8333..caa3bd2006 100644 --- a/bfd/elf32-arm.c +++ b/bfd/elf32-arm.c @@ -9730,8 +9730,6 @@ elf32_arm_merge_eabi_attributes (bfd *ibfd, bfd *obfd) /* Some tags have 0 = don't care, 1 = strong requirement, 2 = weak requirement. */ static const int order_021[3] = {0, 2, 1}; - /* For use with Tag_VFP_arch. */ - static const int order_01243[5] = {0, 1, 2, 4, 3}; int i; bfd_boolean result = TRUE; @@ -9923,12 +9921,50 @@ elf32_arm_merge_eabi_attributes (bfd *ibfd, bfd *obfd) } break; case Tag_VFP_arch: - /* Use the "greatest" from the sequence 0, 1, 2, 4, 3, or the - largest value if greater than 4 (for future-proofing). */ - if ((in_attr[i].i > 4 && in_attr[i].i > out_attr[i].i) - || (in_attr[i].i <= 4 && out_attr[i].i <= 4 - && order_01243[in_attr[i].i] > order_01243[out_attr[i].i])) - out_attr[i].i = in_attr[i].i; + { + static const struct + { + int ver; + int regs; + } vfp_versions[7] = + { + {0, 0}, + {1, 16}, + {2, 16}, + {3, 32}, + {3, 16}, + {4, 32}, + {4, 16} + }; + int ver; + int regs; + int newval; + + /* Values greater than 6 aren't defined, so just pick the + biggest */ + if (in_attr[i].i > 6 && in_attr[i].i > out_attr[i].i) + { + out_attr[i] = in_attr[i]; + break; + } + /* The output uses the superset of input features + (ISA version) and registers. */ + ver = vfp_versions[in_attr[i].i].ver; + if (ver < vfp_versions[out_attr[i].i].ver) + ver = vfp_versions[out_attr[i].i].ver; + regs = vfp_versions[in_attr[i].i].regs; + if (regs < vfp_versions[out_attr[i].i].regs) + regs = vfp_versions[out_attr[i].i].regs; + /* This assumes all possible supersets are also a valid + options. */ + for (newval = 6; newval > 0; newval--) + { + if (regs == vfp_versions[newval].regs + && ver == vfp_versions[newval].ver) + break; + } + out_attr[i].i = newval; + } break; case Tag_PCS_config: if (out_attr[i].i == 0) diff --git a/include/opcode/ChangeLog b/include/opcode/ChangeLog index ec2bf97dda..dacf3ee1c7 100644 --- a/include/opcode/ChangeLog +++ b/include/opcode/ChangeLog @@ -1,3 +1,11 @@ +2009-11-02 Paul Brook + + * arm.h (FPU_VFP_EXT_V3xD, FPU_VFP_EXT_FP16, FPU_NEON_EXT_FMA, + FPU_VFP_EXT_FMA, FPU_VFP_V3xD, FPU_VFP_V4D16, FPU_VFP_V4): Define. + (FPU_ARCH_VFP_V3D16_FP16, FPU_ARCH_VFP_V3_FP16, FPU_ARCH_VFP_V3xD, + FPU_ARCH_VFP_V3xD_FP16, FPU_ARCH_VFP_V4, FPU_ARCH_VFP_V4D16, + FPU_ARCH_NEON_VFP_V4): Define. + 2009-10-23 Doug Evans * cgen-bitset.h: Delete, moved to ../cgen/bitset.h. diff --git a/include/opcode/arm.h b/include/opcode/arm.h index a639a8b041..459a80338b 100644 --- a/include/opcode/arm.h +++ b/include/opcode/arm.h @@ -62,10 +62,13 @@ #define FPU_VFP_EXT_V1xD 0x08000000 /* Base VFP instruction set. */ #define FPU_VFP_EXT_V1 0x04000000 /* Double-precision insns. */ #define FPU_VFP_EXT_V2 0x02000000 /* ARM10E VFPr1. */ -#define FPU_VFP_EXT_V3 0x01000000 /* VFPv3 insns. */ -#define FPU_NEON_EXT_V1 0x00800000 /* Neon (SIMD) insns. */ -#define FPU_VFP_EXT_D32 0x00400000 /* Registers D16-D31. */ -#define FPU_NEON_FP16 0x00200000 /* Half-precision extensions. */ +#define FPU_VFP_EXT_V3xD 0x01000000 /* VFPv3 single-precision. */ +#define FPU_VFP_EXT_V3 0x00800000 /* VFPv3 double-precision. */ +#define FPU_NEON_EXT_V1 0x00400000 /* Neon (SIMD) insns. */ +#define FPU_VFP_EXT_D32 0x00200000 /* Registers D16-D31. */ +#define FPU_VFP_EXT_FP16 0x00100000 /* Half-precision extensions. */ +#define FPU_NEON_EXT_FMA 0x00080000 /* Neon fused multiply-add */ +#define FPU_VFP_EXT_FMA 0x00040000 /* VFP fused multiply-add */ /* Architectures are the sum of the base and extensions. The ARM ARM (rev E) defines the following: ARMv3, ARMv3M, ARMv4xM, ARMv4, ARMv4TxM, ARMv4T, @@ -120,9 +123,13 @@ #define FPU_VFP_V1xD (FPU_VFP_EXT_V1xD | FPU_ENDIAN_PURE) #define FPU_VFP_V1 (FPU_VFP_V1xD | FPU_VFP_EXT_V1) #define FPU_VFP_V2 (FPU_VFP_V1 | FPU_VFP_EXT_V2) -#define FPU_VFP_V3D16 (FPU_VFP_V2 | FPU_VFP_EXT_V3) +#define FPU_VFP_V3D16 (FPU_VFP_V2 | FPU_VFP_EXT_V3xD | FPU_VFP_EXT_V3) #define FPU_VFP_V3 (FPU_VFP_V3D16 | FPU_VFP_EXT_D32) +#define FPU_VFP_V3xD (FPU_VFP_V1xD | FPU_VFP_EXT_V2 | FPU_VFP_EXT_V3xD) +#define FPU_VFP_V4D16 (FPU_VFP_V3D16 | FPU_VFP_EXT_FP16 | FPU_VFP_EXT_FMA) +#define FPU_VFP_V4 (FPU_VFP_V3 | FPU_VFP_EXT_FP16 | FPU_VFP_EXT_FMA) #define FPU_VFP_HARD (FPU_VFP_EXT_V1xD | FPU_VFP_EXT_V1 | FPU_VFP_EXT_V2 \ + | FPU_VFP_EXT_V3xD | FPU_VFP_EXT_FMA | FPU_NEON_EXT_FMA \ | FPU_VFP_EXT_V3 | FPU_NEON_EXT_V1 | FPU_VFP_EXT_D32) #define FPU_FPA (FPU_FPA_EXT_V1 | FPU_FPA_EXT_V2) @@ -136,13 +143,22 @@ #define FPU_ARCH_VFP_V1 ARM_FEATURE (0, FPU_VFP_V1) #define FPU_ARCH_VFP_V2 ARM_FEATURE (0, FPU_VFP_V2) #define FPU_ARCH_VFP_V3D16 ARM_FEATURE (0, FPU_VFP_V3D16) +#define FPU_ARCH_VFP_V3D16_FP16 \ + ARM_FEATURE (0, FPU_VFP_V3D16 | FPU_VFP_EXT_FP16) #define FPU_ARCH_VFP_V3 ARM_FEATURE (0, FPU_VFP_V3) +#define FPU_ARCH_VFP_V3_FP16 ARM_FEATURE (0, FPU_VFP_V3 | FPU_VFP_EXT_FP16) +#define FPU_ARCH_VFP_V3xD ARM_FEATURE (0, FPU_VFP_V3xD) +#define FPU_ARCH_VFP_V3xD_FP16 ARM_FEATURE (0, FPU_VFP_V3xD | FPU_VFP_EXT_FP16) #define FPU_ARCH_NEON_V1 ARM_FEATURE (0, FPU_NEON_EXT_V1) #define FPU_ARCH_VFP_V3_PLUS_NEON_V1 \ ARM_FEATURE (0, FPU_VFP_V3 | FPU_NEON_EXT_V1) #define FPU_ARCH_NEON_FP16 \ - ARM_FEATURE (0, FPU_VFP_V3 | FPU_NEON_EXT_V1 | FPU_NEON_FP16) + ARM_FEATURE (0, FPU_VFP_V3 | FPU_NEON_EXT_V1 | FPU_VFP_EXT_FP16) #define FPU_ARCH_VFP_HARD ARM_FEATURE (0, FPU_VFP_HARD) +#define FPU_ARCH_VFP_V4 ARM_FEATURE(0, FPU_VFP_V4) +#define FPU_ARCH_VFP_V4D16 ARM_FEATURE(0, FPU_VFP_V4D16) +#define FPU_ARCH_NEON_VFP_V4 \ + ARM_FEATURE(0, FPU_VFP_V4 | FPU_NEON_EXT_V1 | FPU_NEON_EXT_FMA) #define FPU_ARCH_ENDIAN_PURE ARM_FEATURE (0, FPU_ENDIAN_PURE) diff --git a/opcodes/ChangeLog b/opcodes/ChangeLog index 4814ba3fea..c8f9195d91 100644 --- a/opcodes/ChangeLog +++ b/opcodes/ChangeLog @@ -1,3 +1,8 @@ +2009-11-02 Paul Brook + + * arm-dis.c (coprocessor_opcodes): Update to use new feature flags. + Add VFPv4 instructions. + 2009-10-29 Sebastian Pop * i386-dis.c (OP_VEX_FMA): Removed. diff --git a/opcodes/arm-dis.c b/opcodes/arm-dis.c index 7551249904..1616fed92d 100644 --- a/opcodes/arm-dis.c +++ b/opcodes/arm-dis.c @@ -289,8 +289,8 @@ static const struct opcode32 coprocessor_opcodes[] = {FPU_NEON_EXT_V1, 0x0e400b10, 0x0fd00f10, "vmov%c.8\t%16-19,7D[%5,6,21d], %12-15r"}, {FPU_NEON_EXT_V1, 0x0e500b10, 0x0f500f10, "vmov%c.%23?us8\t%12-15r, %16-19,7D[%5,6,21d]"}, /* Half-precision conversion instructions. */ - {FPU_NEON_FP16, 0x0eb20a40, 0x0fbf0f50, "vcvt%7?tb%c.f32.f16\t%y1, %y0"}, - {FPU_NEON_FP16, 0x0eb30a40, 0x0fbf0f50, "vcvt%7?tb%c.f16.f32\t%y1, %y0"}, + {FPU_VFP_EXT_FP16, 0x0eb20a40, 0x0fbf0f50, "vcvt%7?tb%c.f32.f16\t%y1, %y0"}, + {FPU_VFP_EXT_FP16, 0x0eb30a40, 0x0fbf0f50, "vcvt%7?tb%c.f16.f32\t%y1, %y0"}, /* Floating point coprocessor (VFP) instructions. */ {FPU_VFP_EXT_V1xD, 0x0ee00a10, 0x0fff0fff, "vmsr%c\tfpsid, %12-15r"}, @@ -330,14 +330,14 @@ static const struct opcode32 coprocessor_opcodes[] = {FPU_VFP_EXT_V1, 0x0eb80b40, 0x0fbf0f50, "vcvt%c.f64.%7?su32\t%z1, %y0"}, {FPU_VFP_EXT_V1xD, 0x0eb40a40, 0x0fbf0f50, "vcmp%7'e%c.f32\t%y1, %y0"}, {FPU_VFP_EXT_V1, 0x0eb40b40, 0x0fbf0f50, "vcmp%7'e%c.f64\t%z1, %z0"}, - {FPU_VFP_EXT_V3, 0x0eba0a40, 0x0fbe0f50, "vcvt%c.f32.%16?us%7?31%7?26\t%y1, %y1, #%5,0-3k"}, + {FPU_VFP_EXT_V3xD, 0x0eba0a40, 0x0fbe0f50, "vcvt%c.f32.%16?us%7?31%7?26\t%y1, %y1, #%5,0-3k"}, {FPU_VFP_EXT_V3, 0x0eba0b40, 0x0fbe0f50, "vcvt%c.f64.%16?us%7?31%7?26\t%z1, %z1, #%5,0-3k"}, {FPU_VFP_EXT_V1xD, 0x0ebc0a40, 0x0fbe0f50, "vcvt%7`r%c.%16?su32.f32\t%y1, %y0"}, {FPU_VFP_EXT_V1, 0x0ebc0b40, 0x0fbe0f50, "vcvt%7`r%c.%16?su32.f64\t%y1, %z0"}, - {FPU_VFP_EXT_V3, 0x0ebe0a40, 0x0fbe0f50, "vcvt%c.%16?us%7?31%7?26.f32\t%y1, %y1, #%5,0-3k"}, + {FPU_VFP_EXT_V3xD, 0x0ebe0a40, 0x0fbe0f50, "vcvt%c.%16?us%7?31%7?26.f32\t%y1, %y1, #%5,0-3k"}, {FPU_VFP_EXT_V3, 0x0ebe0b40, 0x0fbe0f50, "vcvt%c.%16?us%7?31%7?26.f64\t%z1, %z1, #%5,0-3k"}, {FPU_VFP_EXT_V1, 0x0c500b10, 0x0fb00ff0, "vmov%c\t%12-15r, %16-19r, %z0"}, - {FPU_VFP_EXT_V3, 0x0eb00a00, 0x0fb00ff0, "vmov%c.f32\t%y1, #%0-3,16-19d"}, + {FPU_VFP_EXT_V3xD, 0x0eb00a00, 0x0fb00ff0, "vmov%c.f32\t%y1, #%0-3,16-19d"}, {FPU_VFP_EXT_V3, 0x0eb00b00, 0x0fb00ff0, "vmov%c.f64\t%z1, #%0-3,16-19d"}, {FPU_VFP_EXT_V2, 0x0c400a10, 0x0ff00fd0, "vmov%c\t%y4, %12-15r, %16-19r"}, {FPU_VFP_EXT_V2, 0x0c400b10, 0x0ff00fd0, "vmov%c\t%z0, %12-15r, %16-19r"}, @@ -447,6 +447,16 @@ static const struct opcode32 coprocessor_opcodes[] = {ARM_CEXT_MAVERICK, 0x0e200600, 0x0ff00f10, "cfmadda32%c\tmvax%5-7d, mvax%12-15d, mvfx%16-19d, mvfx%0-3d"}, {ARM_CEXT_MAVERICK, 0x0e300600, 0x0ff00f10, "cfmsuba32%c\tmvax%5-7d, mvax%12-15d, mvfx%16-19d, mvfx%0-3d"}, + /* VFP Fused multiply add instructions. */ + {FPU_VFP_EXT_FMA, 0x0ea00a00, 0x0fb00f50, "vfma%c.f32\t%y1, %y2, %y0"}, + {FPU_VFP_EXT_FMA, 0x0ea00b00, 0x0fb00f50, "vfma%c.f64\t%z1, %z2, %z0"}, + {FPU_VFP_EXT_FMA, 0x0ea00a40, 0x0fb00f50, "vfms%c.f32\t%y1, %y2, %y0"}, + {FPU_VFP_EXT_FMA, 0x0ea00b40, 0x0fb00f50, "vfms%c.f64\t%z1, %z2, %z0"}, + {FPU_VFP_EXT_FMA, 0x0e900a40, 0x0fb00f50, "vfnma%c.f32\t%y1, %y2, %y0"}, + {FPU_VFP_EXT_FMA, 0x0e900b40, 0x0fb00f50, "vfnma%c.f64\t%z1, %z2, %z0"}, + {FPU_VFP_EXT_FMA, 0x0e900a00, 0x0fb00f50, "vfnms%c.f32\t%y1, %y2, %y0"}, + {FPU_VFP_EXT_FMA, 0x0e900b00, 0x0fb00f50, "vfnms%c.f64\t%z1, %z2, %z0"}, + /* Generic coprocessor instructions. */ { 0, SENTINEL_GENERIC_START, 0, "" }, {ARM_EXT_V5E, 0x0c400000, 0x0ff00000, "mcrr%c\t%8-11d, %4-7d, %12-15r, %16-19r, cr%0-3d"}, @@ -517,8 +527,12 @@ static const struct opcode32 neon_opcodes[] = {FPU_NEON_EXT_V1, 0xf3b00840, 0xffb00c50, "vtbx%c.8\t%12-15,22D, %F, %0-3,5D"}, /* Half-precision conversions. */ - {FPU_NEON_FP16, 0xf3b60600, 0xffbf0fd0, "vcvt%c.f16.f32\t%12-15,22D, %0-3,5Q"}, - {FPU_NEON_FP16, 0xf3b60700, 0xffbf0fd0, "vcvt%c.f32.f16\t%12-15,22Q, %0-3,5D"}, + {FPU_VFP_EXT_FP16, 0xf3b60600, 0xffbf0fd0, "vcvt%c.f16.f32\t%12-15,22D, %0-3,5Q"}, + {FPU_VFP_EXT_FP16, 0xf3b60700, 0xffbf0fd0, "vcvt%c.f32.f16\t%12-15,22Q, %0-3,5D"}, + + /* NEON fused multiply add instructions. */ + {FPU_NEON_EXT_FMA, 0xf2000c10, 0xffa00f10, "vfma%c.f%20U0\t%12-15,22R, %16-19,7R, %0-3,5R"}, + {FPU_NEON_EXT_FMA, 0xf2200c10, 0xffa00f10, "vfms%c.f%20U0\t%12-15,22R, %16-19,7R, %0-3,5R"}, /* Two registers, miscellaneous. */ {FPU_NEON_EXT_V1, 0xf2880a10, 0xfebf0fd0, "vmovl%c.%24?us8\t%12-15,22Q, %0-3,5D"}, -- 2.11.0