From 91f2876f143dd8b2304490c3e85f96aaaa5ac219 Mon Sep 17 00:00:00 2001
From: Ivailo Monev <xakepa10@laimg.moc>
Date: Tue, 9 Aug 2016 20:19:41 +0000
Subject: [PATCH] get rid of some SIMD remains

Signed-off-by: Ivailo Monev <xakepa10@laimg.moc>
---
 src/core/tools/qsimd.cpp         | 415 ---------------------------------------
 src/core/tools/qsimd_p.h         | 237 ----------------------
 src/core/tools/qstring.cpp       | 126 ------------
 src/core/tools/tools.cmake       |   2 -
 src/gui/image/qimage.cpp         |   1 -
 src/gui/image/qjpeghandler.cpp   |   1 -
 src/gui/image/qpixmap_raster.cpp |  20 +-
 src/gui/painting/qdrawhelper.cpp | 251 -----------------------
 src/gui/painting/qdrawhelper_p.h |   1 -
 9 files changed, 6 insertions(+), 1048 deletions(-)
 delete mode 100644 src/core/tools/qsimd.cpp
 delete mode 100644 src/core/tools/qsimd_p.h
diff --git a/src/core/tools/qsimd.cpp b/src/core/tools/qsimd.cpp
deleted file mode 100644
index 2b8045094..000000000
--- a/src/core/tools/qsimd.cpp
+++ /dev/null
@@ -1,415 +0,0 @@
-/****************************************************************************
-**
-** Copyright (C) 2015 The Qt Company Ltd.
-** Contact: http://www.qt.io/licensing/
-**
-** This file is part of the QtCore module of the Qt Toolkit.
-**
-** $QT_BEGIN_LICENSE:LGPL$
-** Commercial License Usage
-** Licensees holding valid commercial Qt licenses may use this file in
-** accordance with the commercial license agreement provided with the
-** Software or, alternatively, in accordance with the terms contained in
-** a written agreement between you and The Qt Company. For licensing terms
-** and conditions see http://www.qt.io/terms-conditions. For further
-** information use the contact form at http://www.qt.io/contact-us.
-**
-** GNU Lesser General Public License Usage
-** Alternatively, this file may be used under the terms of the GNU Lesser
-** General Public License version 2.1 or version 3 as published by the Free
-** Software Foundation and appearing in the file LICENSE.LGPLv21 and
-** LICENSE.LGPLv3 included in the packaging of this file. Please review the
-** following information to ensure the GNU Lesser General Public License
-** requirements will be met: https://www.gnu.org/licenses/lgpl.html and
-** http://www.gnu.org/licenses/old-licenses/lgpl-2.1.html.
-**
-** As a special exception, The Qt Company gives you certain additional
-** rights. These rights are described in The Qt Company LGPL Exception
-** version 1.1, included in the file LGPL_EXCEPTION.txt in this package.
-**
-** GNU General Public License Usage
-** Alternatively, this file may be used under the terms of the GNU
-** General Public License version 3.0 as published by the Free Software
-** Foundation and appearing in the file LICENSE.GPL included in the
-** packaging of this file.  Please review the following information to
-** ensure the GNU General Public License version 3.0 requirements will be
-** met: http://www.gnu.org/copyleft/gpl.html.
-**
-** $QT_END_LICENSE$
-**
-****************************************************************************/
-
-#include "qsimd_p.h"
-#include <QByteArray>
-#include <stdio.h>
-
-#if defined(Q_OS_WINCE)
-#include <windows.h>
-#endif
-
-#if defined(Q_OS_WIN64)
-#include <intrin.h>
-#endif
-
-#if defined(Q_OS_LINUX) && defined(__arm__)
-#include "qcore_unix_p.h"
-
-// the kernel header definitions for HWCAP_*
-// (the ones we need/may need anyway)
-
-// copied from <asm/hwcap.h> (ARM)
-#define HWCAP_IWMMXT    512
-#define HWCAP_CRUNCH    1024
-#define HWCAP_THUMBEE   2048
-#define HWCAP_NEON      4096
-#define HWCAP_VFPv3     8192
-#define HWCAP_VFPv3D16  16384
-
-// copied from <linux/auxvec.h>
-#define AT_HWCAP  16    /* arch dependent hints at CPU capabilities */
-
-#endif
-
-QT_BEGIN_NAMESPACE
-
-#if defined (Q_OS_NACL)
-static inline uint detectProcessorFeatures()
-{
-    return 0;
-}
-#elif defined (Q_OS_WINCE)
-static inline uint detectProcessorFeatures()
-{
-    uint features = 0;
-
-#if defined (ARM)
-    if (IsProcessorFeaturePresent(PF_ARM_INTEL_WMMX)) {
-        features = IWMMXT;
-        return features;
-    }
-#elif defined(_X86_)
-    features = 0;
-#if defined QT_HAVE_MMX
-    if (IsProcessorFeaturePresent(PF_MMX_INSTRUCTIONS_AVAILABLE))
-        features |= MMX;
-#endif
-#if defined QT_HAVE_3DNOW
-    if (IsProcessorFeaturePresent(PF_3DNOW_INSTRUCTIONS_AVAILABLE))
-        features |= MMX3DNOW;
-#endif
-    return features;
-#endif
-    features = 0;
-    return features;
-}
-
-#elif defined(__arm__) || defined(__arm) || defined(QT_HAVE_IWMMXT) || defined(QT_HAVE_NEON)
-static inline uint detectProcessorFeatures()
-{
-    uint features = 0;
-
-#if defined(Q_OS_LINUX)
-    int auxv = ::qt_safe_open("/proc/self/auxv", O_RDONLY);
-    if (auxv != -1) {
-        unsigned long vector[64];
-        int nread;
-        while (features == 0) {
-            nread = ::qt_safe_read(auxv, (char *)vector, sizeof vector);
-            if (nread <= 0) {
-                // EOF or error
-                break;
-            }
-
-            int max = nread / (sizeof vector[0]);
-            for (int i = 0; i < max; i += 2)
-                if (vector[i] == AT_HWCAP) {
-                    if (vector[i+1] & HWCAP_IWMMXT)
-                        features |= IWMMXT;
-                    if (vector[i+1] & HWCAP_NEON)
-                        features |= NEON;
-                    break;
-                }
-        }
-
-        ::qt_safe_close(auxv);
-        return features;
-    }
-    // fall back if /proc/self/auxv wasn't found
-#endif
-
-#if defined(QT_HAVE_IWMMXT)
-    // runtime detection only available when running as a previlegied process
-    features = IWMMXT;
-#elif defined(QT_ALWAYS_HAVE_NEON)
-    features = NEON;
-#endif
-
-    return features;
-}
-
-#elif defined(__i386__) || defined(_M_IX86)
-static inline uint detectProcessorFeatures()
-{
-    uint features = 0;
-
-    unsigned int extended_result = 0;
-    unsigned int feature_result = 0;
-    uint result = 0;
-    /* see p. 118 of amd64 instruction set manual Vol3 */
-#if defined(Q_CC_GNU)
-    long cpuid_supported, tmp1;
-    asm ("pushf\n"
-         "pop %0\n"
-         "mov %0, %1\n"
-         "xor $0x00200000, %0\n"
-         "push %0\n"
-         "popf\n"
-         "pushf\n"
-         "pop %0\n"
-         "xor %1, %0\n" // %eax is now 0 if CPUID is not supported
-         : "=a" (cpuid_supported), "=r" (tmp1)
-         );
-    if (cpuid_supported) {
-        asm ("xchg %%ebx, %2\n"
-             "cpuid\n"
-             "xchg %%ebx, %2\n"
-            : "=c" (feature_result), "=d" (result), "=&r" (tmp1)
-            : "a" (1));
-
-        asm ("xchg %%ebx, %1\n"
-             "cpuid\n"
-             "cmp $0x80000000, %%eax\n"
-             "jnbe 1f\n"
-             "xor %0, %0\n"
-             "jmp 2f\n"
-             "1:\n"
-             "mov $0x80000001, %%eax\n"
-             "cpuid\n"
-             "2:\n"
-             "xchg %%ebx, %1\n"
-            : "=d" (extended_result), "=&r" (tmp1)
-            : "a" (0x80000000)
-            : "%ecx"
-            );
-    }
-
-#elif defined (Q_OS_WIN)
-    _asm {
-        push eax
-        push ebx
-        push ecx
-        push edx
-        pushfd
-        pop eax
-        mov ebx, eax
-        xor eax, 00200000h
-        push eax
-        popfd
-        pushfd
-        pop eax
-        mov edx, 0
-        xor eax, ebx
-        jz skip
-
-        mov eax, 1
-        cpuid
-        mov result, edx
-        mov feature_result, ecx
-    skip:
-        pop edx
-        pop ecx
-        pop ebx
-        pop eax
-    }
-
-    _asm {
-        push eax
-        push ebx
-        push ecx
-        push edx
-        pushfd
-        pop eax
-        mov ebx, eax
-        xor eax, 00200000h
-        push eax
-        popfd
-        pushfd
-        pop eax
-        mov edx, 0
-        xor eax, ebx
-        jz skip2
-
-        mov eax, 80000000h
-        cpuid
-        cmp eax, 80000000h
-        jbe skip2
-        mov eax, 80000001h
-        cpuid
-        mov extended_result, edx
-    skip2:
-        pop edx
-        pop ecx
-        pop ebx
-        pop eax
-    }
-#endif
-
-
-    // result now contains the standard feature bits
-    if (result & (1u << 15))
-        features |= CMOV;
-    if (result & (1u << 23))
-        features |= MMX;
-    if (extended_result & (1u << 22))
-        features |= MMXEXT;
-    if (extended_result & (1u << 31))
-        features |= MMX3DNOW;
-    if (extended_result & (1u << 30))
-        features |= MMX3DNOWEXT;
-    if (result & (1u << 25))
-        features |= SSE;
-    if (result & (1u << 26))
-        features |= SSE2;
-    if (feature_result & (1u))
-        features |= SSE3;
-    if (feature_result & (1u << 9))
-        features |= SSSE3;
-    if (feature_result & (1u << 19))
-        features |= SSE4_1;
-    if (feature_result & (1u << 20))
-        features |= SSE4_2;
-    if (feature_result & (1u << 28))
-        features |= AVX;
-
-    return features;
-}
-
-#elif defined(__x86_64) || defined(Q_OS_WIN64)
-static inline uint detectProcessorFeatures()
-{
-    uint features = MMX|SSE|SSE2|CMOV;
-    uint feature_result = 0;
-
-#if defined(Q_CC_GNU)
-    quint64 tmp;
-    asm ("xchg %%rbx, %1\n"
-         "cpuid\n"
-         "xchg %%rbx, %1\n"
-        : "=c" (feature_result), "=&r" (tmp)
-        : "a" (1)
-        : "%edx"
-        );
-#elif defined (Q_OS_WIN64)
-    {
-       int info[4];
-       __cpuid(info, 1);
-       feature_result = info[2];
-    }
-#endif
-
-    if (feature_result & (1u))
-        features |= SSE3;
-    if (feature_result & (1u << 9))
-        features |= SSSE3;
-    if (feature_result & (1u << 19))
-        features |= SSE4_1;
-    if (feature_result & (1u << 20))
-        features |= SSE4_2;
-    if (feature_result & (1u << 28))
-        features |= AVX;
-
-    return features;
-}
-
-#elif defined(__ia64__)
-static inline uint detectProcessorFeatures()
-{
-    return MMX|SSE|SSE2;
-}
-
-#else
-static inline uint detectProcessorFeatures()
-{
-    return 0;
-}
-#endif
-
-/*
- * Use kdesdk/scripts/generate_string_table.pl to update the table below.
- * Here's the data (don't forget the ONE leading space):
- mmx
- mmxext
- mmx3dnow
- mmx3dnowext
- sse
- sse2
- cmov
- iwmmxt
- neon
- sse3
- ssse3
- sse4.1
- sse4.2
- avx
-  */
-
-// begin generated
-static const char features_string[] =
-    " mmx\0"
-    " mmxext\0"
-    " mmx3dnow\0"
-    " mmx3dnowext\0"
-    " sse\0"
-    " sse2\0"
-    " cmov\0"
-    " iwmmxt\0"
-    " neon\0"
-    " sse3\0"
-    " ssse3\0"
-    " sse4.1\0"
-    " sse4.2\0"
-    " avx\0"
-    "\0";
-
-static const int features_indices[] = {
-       0,    5,   13,   23,   36,   41,   47,   53,
-      61,   67,   73,   80,   88,   96,   -1
-};
-// end generated
-
-const int features_count = (sizeof features_indices - 1) / (sizeof features_indices[0]);
-
-uint qDetectCPUFeatures()
-{
-    static QBasicAtomicInt features = Q_BASIC_ATOMIC_INITIALIZER(-1);
-    if (features != -1)
-        return features;
-
-    uint f = detectProcessorFeatures();
-    QByteArray disable = qgetenv("QT_NO_CPU_FEATURE");
-    if (disable == "all") {
-        f = 0;
-    } else if (!disable.isEmpty()) {
-        disable.prepend(' ');
-        for (int i = 0; i < features_count; ++i) {
-            if (disable.contains(features_string + features_indices[i]))
-                f &= ~(1 << i);
-        }
-    }
-
-    features = f;
-    return features;
-}
-
-void qDumpCPUFeatures()
-{
-    uint features = qDetectCPUFeatures();
-    printf("Processor features: ");
-    for (int i = 0; i < features_count; ++i) {
-        if (features & (1 << i))
-            printf("%s", features_string + features_indices[i]);
-    }
-    puts("");
-}
-
-QT_END_NAMESPACE
diff --git a/src/core/tools/qsimd_p.h b/src/core/tools/qsimd_p.h
deleted file mode 100644
index fbc34c108..000000000
--- a/src/core/tools/qsimd_p.h
+++ /dev/null
@@ -1,237 +0,0 @@
-/****************************************************************************
-**
-** Copyright (C) 2015 The Qt Company Ltd.
-** Contact: http://www.qt.io/licensing/
-**
-** This file is part of the QtCore module of the Qt Toolkit.
-**
-** $QT_BEGIN_LICENSE:LGPL$
-** Commercial License Usage
-** Licensees holding valid commercial Qt licenses may use this file in
-** accordance with the commercial license agreement provided with the
-** Software or, alternatively, in accordance with the terms contained in
-** a written agreement between you and The Qt Company. For licensing terms
-** and conditions see http://www.qt.io/terms-conditions. For further
-** information use the contact form at http://www.qt.io/contact-us.
-**
-** GNU Lesser General Public License Usage
-** Alternatively, this file may be used under the terms of the GNU Lesser
-** General Public License version 2.1 or version 3 as published by the Free
-** Software Foundation and appearing in the file LICENSE.LGPLv21 and
-** LICENSE.LGPLv3 included in the packaging of this file. Please review the
-** following information to ensure the GNU Lesser General Public License
-** requirements will be met: https://www.gnu.org/licenses/lgpl.html and
-** http://www.gnu.org/licenses/old-licenses/lgpl-2.1.html.
-**
-** As a special exception, The Qt Company gives you certain additional
-** rights. These rights are described in The Qt Company LGPL Exception
-** version 1.1, included in the file LGPL_EXCEPTION.txt in this package.
-**
-** GNU General Public License Usage
-** Alternatively, this file may be used under the terms of the GNU
-** General Public License version 3.0 as published by the Free Software
-** Foundation and appearing in the file LICENSE.GPL included in the
-** packaging of this file.  Please review the following information to
-** ensure the GNU General Public License version 3.0 requirements will be
-** met: http://www.gnu.org/copyleft/gpl.html.
-**
-** $QT_END_LICENSE$
-**
-****************************************************************************/
-
-#ifndef QSIMD_P_H
-#define QSIMD_P_H
-
-#include <qglobal.h>
-
-
-QT_BEGIN_HEADER
-
-
-#if defined(QT_NO_MAC_XARCH) || (defined(Q_OS_DARWIN) && (defined(__ppc__) || defined(__ppc64__)))
-// Disable MMX and SSE on Mac/PPC builds, or if the compiler
-// does not support -Xarch argument passing
-#undef QT_HAVE_SSE
-#undef QT_HAVE_SSE2
-#undef QT_HAVE_SSE3
-#undef QT_HAVE_SSSE3
-#undef QT_HAVE_SSE4_1
-#undef QT_HAVE_SSE4_2
-#undef QT_HAVE_AVX
-#undef QT_HAVE_3DNOW
-#undef QT_HAVE_MMX
-#endif
-
-// SSE intrinsics
-#if defined(QT_HAVE_SSE2) && (defined(__SSE2__) || defined(Q_CC_MSVC))
-#if defined(QT_LINUXBASE)
-/// this is an evil hack - the posix_memalign declaration in LSB
-/// is wrong - see http://bugs.linuxbase.org/show_bug.cgi?id=2431
-#  define posix_memalign _lsb_hack_posix_memalign
-#  include <emmintrin.h>
-#  undef posix_memalign
-#else
-#  ifdef Q_CC_MINGW
-#    include <windows.h>
-#  endif
-#  include <emmintrin.h>
-#endif
-
-// SSE3 intrinsics
-#if defined(QT_HAVE_SSE3) && (defined(__SSE3__) || defined(Q_CC_MSVC))
-#include <pmmintrin.h>
-#endif
-
-// SSSE3 intrinsics
-#if defined(QT_HAVE_SSSE3) && (defined(__SSSE3__) || defined(Q_CC_MSVC))
-#include <tmmintrin.h>
-#endif
-
-// SSE4.1 intrinsics
-#if defined(QT_HAVE_SSE4_1) && (defined(__SSE4_1__) || defined(Q_CC_MSVC))
-#include <smmintrin.h>
-#endif
-
-// SSE4.2 intrinsics
-#if defined(QT_HAVE_SSE4_2) && (defined(__SSE4_2__) || defined(Q_CC_MSVC))
-#include <nmmintrin.h>
-
-// Add missing intrisics in some compilers (e.g. llvm-gcc)
-#ifndef _SIDD_UBYTE_OPS
-#define _SIDD_UBYTE_OPS                 0x00
-#endif
-
-#ifndef _SIDD_UWORD_OPS
-#define _SIDD_UWORD_OPS                 0x01
-#endif
-
-#ifndef _SIDD_SBYTE_OPS
-#define _SIDD_SBYTE_OPS                 0x02
-#endif
-
-#ifndef _SIDD_SWORD_OPS
-#define _SIDD_SWORD_OPS                 0x03
-#endif
-
-#ifndef _SIDD_CMP_EQUAL_ANY
-#define _SIDD_CMP_EQUAL_ANY             0x00
-#endif
-
-#ifndef _SIDD_CMP_RANGES
-#define _SIDD_CMP_RANGES                0x04
-#endif
-
-#ifndef _SIDD_CMP_EQUAL_EACH
-#define _SIDD_CMP_EQUAL_EACH            0x08
-#endif
-
-#ifndef _SIDD_CMP_EQUAL_ORDERED
-#define _SIDD_CMP_EQUAL_ORDERED         0x0c
-#endif
-
-#ifndef _SIDD_POSITIVE_POLARITY
-#define _SIDD_POSITIVE_POLARITY         0x00
-#endif
-
-#ifndef _SIDD_NEGATIVE_POLARITY
-#define _SIDD_NEGATIVE_POLARITY         0x10
-#endif
-
-#ifndef _SIDD_MASKED_POSITIVE_POLARITY
-#define _SIDD_MASKED_POSITIVE_POLARITY  0x20
-#endif
-
-#ifndef _SIDD_MASKED_NEGATIVE_POLARITY
-#define _SIDD_MASKED_NEGATIVE_POLARITY  0x30
-#endif
-
-#ifndef _SIDD_LEAST_SIGNIFICANT
-#define _SIDD_LEAST_SIGNIFICANT         0x00
-#endif
-
-#ifndef _SIDD_MOST_SIGNIFICANT
-#define _SIDD_MOST_SIGNIFICANT          0x40
-#endif
-
-#ifndef _SIDD_BIT_MASK
-#define _SIDD_BIT_MASK                  0x00
-#endif
-
-#ifndef _SIDD_UNIT_MASK
-#define _SIDD_UNIT_MASK                 0x40
-#endif
-
-#endif
-
-// AVX intrinsics
-#if defined(QT_HAVE_AVX) && (defined(__AVX__) || defined(Q_CC_MSVC))
-#include <immintrin.h>
-#endif
-
-
-#if !defined(QT_BOOTSTRAPPED) && (!defined(Q_CC_MSVC) || (defined(_M_X64) || _M_IX86_FP == 2))
-#define QT_ALWAYS_HAVE_SSE2
-#endif
-#endif // defined(QT_HAVE_SSE2) && (defined(__SSE2__) || defined(Q_CC_MSVC))
-
-// NEON intrinsics
-#if defined __ARM_NEON__
-#define QT_ALWAYS_HAVE_NEON
-#include <arm_neon.h>
-#endif
-
-
-// IWMMXT intrinsics
-#if defined(QT_HAVE_IWMMXT)
-#include <mmintrin.h>
-#if defined(Q_OS_WINCE)
-#  include "qplatformdefs.h"
-#endif
-#endif
-
-#if defined(QT_HAVE_IWMMXT)
-#if !defined(__IWMMXT__) && !defined(Q_OS_WINCE)
-#  include <xmmintrin.h>
-#elif defined(Q_OS_WINCE_STD) && defined(_X86_)
-#  pragma warning(disable: 4391)
-#  include <xmmintrin.h>
-#endif
-#endif
-
-// 3D now intrinsics
-#if defined(QT_HAVE_3DNOW) && (defined(__3dNOW__) || defined(Q_CC_MSVC))
-#include <mm3dnow.h>
-#endif
-
-QT_BEGIN_NAMESPACE
-
-
-enum CPUFeatures {
-    None        = 0,
-    MMX         = 0x1,
-    MMXEXT      = 0x2,
-    MMX3DNOW    = 0x4,
-    MMX3DNOWEXT = 0x8,
-    SSE         = 0x10,
-    SSE2        = 0x20,
-    CMOV        = 0x40,
-    IWMMXT      = 0x80,
-    NEON        = 0x100,
-    SSE3        = 0x200,
-    SSSE3       = 0x400,
-    SSE4_1      = 0x800,
-    SSE4_2      = 0x1000,
-    AVX         = 0x2000
-};
-
-Q_CORE_EXPORT uint qDetectCPUFeatures();
-
-
-#define ALIGNMENT_PROLOGUE_16BYTES(ptr, i, length) \
-    for (; i < static_cast<int>(qMin(static_cast<quintptr>(length), ((4 - ((reinterpret_cast<quintptr>(ptr) >> 2) & 0x3)) & 0x3))); ++i)
-
-QT_END_NAMESPACE
-
-QT_END_HEADER
-
-#endif // QSIMD_P_H
diff --git a/src/core/tools/qstring.cpp b/src/core/tools/qstring.cpp
index e74cae81a..a81acfb99 100644
--- a/src/core/tools/qstring.cpp
+++ b/src/core/tools/qstring.cpp
@@ -46,7 +46,6 @@
 #include <qtextcodec.h>
 #endif
 #include <qutfcodec_p.h>
-#include "qsimd_p.h"
 #include <qdatastream.h>
 #include <qlist.h>
 #include "qlocale.h"
@@ -3535,61 +3534,6 @@ bool QString::endsWith(const QChar &c, Qt::CaseSensitivity cs) const
     Use toLocal8Bit() instead.
 */
 
-#if defined(QT_ALWAYS_HAVE_SSE2)
-static inline __m128i mergeQuestionMarks(__m128i chunk)
-{
-    const __m128i questionMark = _mm_set1_epi16('?');
-
-# ifdef __SSE4_2__
-    // compare the unsigned shorts for the range 0x0100-0xFFFF
-    // note on the use of _mm_cmpestrm:
-    //  The MSDN documentation online (http://technet.microsoft.com/en-us/library/bb514080.aspx)
-    //  says for range search the following:
-    //    For each character c in a, determine whether b0 <= c <= b1 or b2 <= c <= b3
-    //
-    //  However, all examples on the Internet, including from Intel
-    //  (see http://software.intel.com/en-us/articles/xml-parsing-accelerator-with-intel-streaming-simd-extensions-4-intel-sse4/)
-    //  put the range to be searched first
-    //
-    //  Disassembly and instruction-level debugging with GCC and ICC show
-    //  that they are doing the right thing. Inverting the arguments in the
-    //  instruction does cause a bunch of test failures.
-
-    const int mode = _SIDD_UWORD_OPS | _SIDD_CMP_RANGES | _SIDD_UNIT_MASK;
-    const __m128i rangeMatch = _mm_cvtsi32_si128(0xffff0100);
-    const __m128i offLimitMask = _mm_cmpestrm(rangeMatch, 2, chunk, 8, mode);
-
-    // replace the non-Latin 1 characters in the chunk with question marks
-    chunk = _mm_blendv_epi8(chunk, questionMark, offLimitMask);
-# else
-    // SSE has no compare instruction for unsigned comparison.
-    // The variables must be shiffted + 0x8000 to be compared
-    const __m128i signedBitOffset = _mm_set1_epi16(short(0x8000));
-    const __m128i thresholdMask = _mm_set1_epi16(short(0xff + 0x8000));
-
-    const __m128i signedChunk = _mm_add_epi16(chunk, signedBitOffset);
-    const __m128i offLimitMask = _mm_cmpgt_epi16(signedChunk, thresholdMask);
-
-#  ifdef __SSE4_1__
-    // replace the non-Latin 1 characters in the chunk with question marks
-    chunk = _mm_blendv_epi8(chunk, questionMark, offLimitMask);
-#  else
-    // offLimitQuestionMark contains '?' for each 16 bits that was off-limit
-    // the 16 bits that were correct contains zeros
-    const __m128i offLimitQuestionMark = _mm_and_si128(offLimitMask, questionMark);
-
-    // correctBytes contains the bytes that were in limit
-    // the 16 bits that were off limits contains zeros
-    const __m128i correctBytes = _mm_andnot_si128(offLimitMask, chunk);
-
-    // merge offLimitQuestionMark and correctBytes to have the result
-    chunk = _mm_or_si128(correctBytes, offLimitQuestionMark);
-#  endif
-# endif
-    return chunk;
-}
-#endif
-
 static QByteArray toLatin1_helper(const QChar *data, int length)
 {
     QByteArray ba;
@@ -3597,51 +3541,6 @@ static QByteArray toLatin1_helper(const QChar *data, int length)
         ba.resize(length);
         const ushort *src = reinterpret_cast<const ushort *>(data);
         uchar *dst = (uchar*) ba.data();
-#if defined(QT_ALWAYS_HAVE_SSE2)
-        if (length >= 16) {
-            const int chunkCount = length >> 4; // divided by 16
-
-            for (int i = 0; i < chunkCount; ++i) {
-                __m128i chunk1 = _mm_loadu_si128((__m128i*)src); // load
-                chunk1 = mergeQuestionMarks(chunk1);
-                src += 8;
-
-                __m128i chunk2 = _mm_loadu_si128((__m128i*)src); // load
-                chunk2 = mergeQuestionMarks(chunk2);
-                src += 8;
-
-                // pack the two vector to 16 x 8bits elements
-                const __m128i result = _mm_packus_epi16(chunk1, chunk2);
-
-                _mm_storeu_si128((__m128i*)dst, result); // store
-                dst += 16;
-            }
-            length = length % 16;
-        }
-#elif defined(QT_ALWAYS_HAVE_NEON)
-        // Refer to the documentation of the SSE2 implementation
-        // this use eactly the same method as for SSE except:
-        // 1) neon has unsigned comparison
-        // 2) packing is done to 64 bits (8 x 8bits component).
-        if (length >= 16) {
-            const int chunkCount = length >> 3; // divided by 8
-            const uint16x8_t questionMark = vdupq_n_u16('?'); // set
-            const uint16x8_t thresholdMask = vdupq_n_u16(0xff); // set
-            for (int i = 0; i < chunkCount; ++i) {
-                uint16x8_t chunk = vld1q_u16((uint16_t *)src); // load
-                src += 8;
-
-                const uint16x8_t offLimitMask = vcgtq_u16(chunk, thresholdMask); // chunk > thresholdMask
-                const uint16x8_t offLimitQuestionMark = vandq_u16(offLimitMask, questionMark); // offLimitMask & questionMark
-                const uint16x8_t correctBytes = vbicq_u16(chunk, offLimitMask); // !offLimitMask & chunk
-                chunk = vorrq_u16(correctBytes, offLimitQuestionMark); // correctBytes | offLimitQuestionMark
-                const uint8x8_t result = vmovn_u16(chunk); // narrowing move->packing
-                vst1_u8(dst, result); // store
-                dst += 8;
-            }
-            length = length % 8;
-        }
-#endif
         while (length--) {
             *dst++ = (*src>0xff) ? '?' : (uchar) *src;
             ++src;
@@ -3783,31 +3682,6 @@ QString::Data *QString::fromLatin1_helper(const char *str, int size)
         d->data = d->array;
         d->array[size] = '\0';
         ushort *dst = d->data;
-        /* SIMD:
-         * Unpacking with SSE has been shown to improve performance on recent CPUs
-         * The same method gives no improvement with NEON.
-         */
-#if defined(QT_ALWAYS_HAVE_SSE2)
-        if (size >= 16) {
-            int chunkCount = size >> 4; // divided by 16
-            const __m128i nullMask = _mm_set1_epi32(0);
-            for (int i = 0; i < chunkCount; ++i) {
-                const __m128i chunk = _mm_loadu_si128((__m128i*)str); // load
-                str += 16;
-
-                // unpack the first 8 bytes, padding with zeros
-                const __m128i firstHalf = _mm_unpacklo_epi8(chunk, nullMask);
-                _mm_storeu_si128((__m128i*)dst, firstHalf); // store
-                dst += 8;
-
-                // unpack the last 8 bytes, padding with zeros
-                const __m128i secondHalf = _mm_unpackhi_epi8 (chunk, nullMask);
-                _mm_storeu_si128((__m128i*)dst, secondHalf); // store
-                dst += 8;
-            }
-            size = size % 16;
-        }
-#endif
         while (size--)
             *dst++ = (uchar)*str++;
     }
diff --git a/src/core/tools/tools.cmake b/src/core/tools/tools.cmake
index 0b0335e64..bfc4c8d9a 100644
--- a/src/core/tools/tools.cmake
+++ b/src/core/tools/tools.cmake
@@ -37,7 +37,6 @@ set(CORE_HEADERS
     ${CMAKE_CURRENT_SOURCE_DIR}/tools/qsharedpointer.h
     ${CMAKE_CURRENT_SOURCE_DIR}/tools/qsharedpointer_impl.h
     ${CMAKE_CURRENT_SOURCE_DIR}/tools/qset.h
-    ${CMAKE_CURRENT_SOURCE_DIR}/tools/qsimd_p.h
     ${CMAKE_CURRENT_SOURCE_DIR}/tools/qsize.h
     ${CMAKE_CURRENT_SOURCE_DIR}/tools/qstack.h
     ${CMAKE_CURRENT_SOURCE_DIR}/tools/qstring.h
@@ -80,7 +79,6 @@ set(CORE_SOURCES
     ${CMAKE_CURRENT_SOURCE_DIR}/tools/qregexp.cpp
     ${CMAKE_CURRENT_SOURCE_DIR}/tools/qshareddata.cpp
     ${CMAKE_CURRENT_SOURCE_DIR}/tools/qsharedpointer.cpp
-    ${CMAKE_CURRENT_SOURCE_DIR}/tools/qsimd.cpp
     ${CMAKE_CURRENT_SOURCE_DIR}/tools/qsize.cpp
     ${CMAKE_CURRENT_SOURCE_DIR}/tools/qstring.cpp
     ${CMAKE_CURRENT_SOURCE_DIR}/tools/qstringbuilder.cpp
diff --git a/src/gui/image/qimage.cpp b/src/gui/image/qimage.cpp
index 001e5e5d7..2da71df69 100644
--- a/src/gui/image/qimage.cpp
+++ b/src/gui/image/qimage.cpp
@@ -57,7 +57,6 @@
 #include <qdrawhelper_p.h>
 #include <qmemrotate_p.h>
 #include <qpixmapdata_p.h>
-#include <qsimd_p.h>
 
 #include <qhash.h>
 
diff --git a/src/gui/image/qjpeghandler.cpp b/src/gui/image/qjpeghandler.cpp
index cda6c0df2..a7f97896a 100644
--- a/src/gui/image/qjpeghandler.cpp
+++ b/src/gui/image/qjpeghandler.cpp
@@ -45,7 +45,6 @@
 #include <qvariant.h>
 #include <qvector.h>
 #include <qbuffer.h>
-#include <qsimd_p.h>
 
 #include <stdio.h>      // jpeglib needs this to be pre-included
 #include <setjmp.h>
diff --git a/src/gui/image/qpixmap_raster.cpp b/src/gui/image/qpixmap_raster.cpp
index e2c1db911..8e5c5b114 100644
--- a/src/gui/image/qpixmap_raster.cpp
+++ b/src/gui/image/qpixmap_raster.cpp
@@ -40,22 +40,18 @@
 ****************************************************************************/
 
 #include "qpixmap.h"
-
-#include <qfont_p.h>
-
+#include "qfont_p.h"
 #include "qpixmap_raster_p.h"
 #include "qnativeimage_p.h"
 #include "qimage_p.h"
 #include "qpaintengine.h"
-
 #include "qbitmap.h"
 #include "qimage.h"
-#include <QBuffer>
-#include <QImageReader>
-#include <qimage_p.h>
-#include <qsimd_p.h>
-#include <qwidget_p.h>
-#include <qdrawhelper_p.h>
+#include "qbuffer.h"
+#include "qimagereader.h"
+#include "qimage_p.h"
+#include "qwidget_p.h"
+#include "qdrawhelper_p.h"
 
 QT_BEGIN_NAMESPACE
 
@@ -163,7 +159,6 @@ void QRasterPixmapData::fill(const QColor &color)
         if (alpha != 255) {
             if (!image.hasAlphaChannel()) {
                 QImage::Format toFormat;
-#if !(defined(QT_HAVE_NEON) || defined(QT_ALWAYS_HAVE_SSE2))
                 if (image.format() == QImage::Format_RGB16)
                     toFormat = QImage::Format_ARGB8565_Premultiplied;
                 else if (image.format() == QImage::Format_RGB666)
@@ -173,7 +168,6 @@ void QRasterPixmapData::fill(const QColor &color)
                 else if (image.format() == QImage::Format_RGB444)
                     toFormat = QImage::Format_ARGB4444_Premultiplied;
                 else
-#endif
                     toFormat = QImage::Format_ARGB32_Premultiplied;
 
                 if (!image.isNull() && qt_depthForFormat(image.format()) == qt_depthForFormat(toFormat)) {
@@ -364,7 +358,6 @@ void QRasterPixmapData::createPixmapForImage(QImage &sourceImage, Qt::ImageConve
             QImage::Format opaqueFormat = QNativeImage::systemFormat();
             QImage::Format alphaFormat = QImage::Format_ARGB32_Premultiplied;
 
-#if !defined(QT_HAVE_NEON) && !defined(QT_ALWAYS_HAVE_SSE2)
             switch (opaqueFormat) {
             case QImage::Format_RGB16:
                 alphaFormat = QImage::Format_ARGB8565_Premultiplied;
@@ -372,7 +365,6 @@ void QRasterPixmapData::createPixmapForImage(QImage &sourceImage, Qt::ImageConve
             default: // We don't care about the others...
                 break;
             }
-#endif
 
             if (!sourceImage.hasAlphaChannel()) {
                 format = opaqueFormat;
diff --git a/src/gui/painting/qdrawhelper.cpp b/src/gui/painting/qdrawhelper.cpp
index e54a45588..9c33cc019 100644
--- a/src/gui/painting/qdrawhelper.cpp
+++ b/src/gui/painting/qdrawhelper.cpp
@@ -625,76 +625,6 @@ static inline uint interpolate_4_pixels_16(uint tl, uint tr, uint bl, uint br, i
     return (((tlrb + trrb + blrb + brrb) >> 8) & 0x00ff00ff) | ((tlag + trag + blag + brag) & 0xff00ff00);
 }
 
-#if defined(QT_ALWAYS_HAVE_SSE2)
-#define interpolate_4_pixels_16_sse2(tl, tr, bl, br, distx, disty, colorMask, v_256, b)  \
-{ \
-    const __m128i dxdy = _mm_mullo_epi16 (distx, disty); \
-    const __m128i distx_ = _mm_slli_epi16(distx, 4); \
-    const __m128i disty_ = _mm_slli_epi16(disty, 4); \
-    const __m128i idxidy =  _mm_add_epi16(dxdy, _mm_sub_epi16(v_256, _mm_add_epi16(distx_, disty_))); \
-    const __m128i dxidy =  _mm_sub_epi16(distx_, dxdy); \
-    const __m128i idxdy =  _mm_sub_epi16(disty_, dxdy); \
- \
-    __m128i tlAG = _mm_srli_epi16(tl, 8); \
-    __m128i tlRB = _mm_and_si128(tl, colorMask); \
-    __m128i trAG = _mm_srli_epi16(tr, 8); \
-    __m128i trRB = _mm_and_si128(tr, colorMask); \
-    __m128i blAG = _mm_srli_epi16(bl, 8); \
-    __m128i blRB = _mm_and_si128(bl, colorMask); \
-    __m128i brAG = _mm_srli_epi16(br, 8); \
-    __m128i brRB = _mm_and_si128(br, colorMask); \
- \
-    tlAG = _mm_mullo_epi16(tlAG, idxidy); \
-    tlRB = _mm_mullo_epi16(tlRB, idxidy); \
-    trAG = _mm_mullo_epi16(trAG, dxidy); \
-    trRB = _mm_mullo_epi16(trRB, dxidy); \
-    blAG = _mm_mullo_epi16(blAG, idxdy); \
-    blRB = _mm_mullo_epi16(blRB, idxdy); \
-    brAG = _mm_mullo_epi16(brAG, dxdy); \
-    brRB = _mm_mullo_epi16(brRB, dxdy); \
- \
-    /* Add the values, and shift to only keep 8 significant bits per colors */ \
-    __m128i rAG =_mm_add_epi16(_mm_add_epi16(tlAG, trAG), _mm_add_epi16(blAG, brAG)); \
-    __m128i rRB =_mm_add_epi16(_mm_add_epi16(tlRB, trRB), _mm_add_epi16(blRB, brRB)); \
-    rAG = _mm_andnot_si128(colorMask, rAG); \
-    rRB = _mm_srli_epi16(rRB, 8); \
-    _mm_storeu_si128((__m128i*)(b), _mm_or_si128(rAG, rRB)); \
-}
-#endif
-
-#if defined(QT_ALWAYS_HAVE_NEON)
-#define interpolate_4_pixels_16_neon(tl, tr, bl, br, distx, disty, disty_, colorMask, invColorMask, v_256, b)  \
-{ \
-    const int16x8_t dxdy = vmulq_s16(distx, disty); \
-    const int16x8_t distx_ = vshlq_n_s16(distx, 4); \
-    const int16x8_t idxidy =  vaddq_s16(dxdy, vsubq_s16(v_256, vaddq_s16(distx_, disty_))); \
-    const int16x8_t dxidy =  vsubq_s16(distx_, dxdy); \
-    const int16x8_t idxdy =  vsubq_s16(disty_, dxdy); \
- \
-    int16x8_t tlAG = vreinterpretq_s16_u16(vshrq_n_u16(vreinterpretq_u16_s16(tl), 8)); \
-    int16x8_t tlRB = vandq_s16(tl, colorMask); \
-    int16x8_t trAG = vreinterpretq_s16_u16(vshrq_n_u16(vreinterpretq_u16_s16(tr), 8)); \
-    int16x8_t trRB = vandq_s16(tr, colorMask); \
-    int16x8_t blAG = vreinterpretq_s16_u16(vshrq_n_u16(vreinterpretq_u16_s16(bl), 8)); \
-    int16x8_t blRB = vandq_s16(bl, colorMask); \
-    int16x8_t brAG = vreinterpretq_s16_u16(vshrq_n_u16(vreinterpretq_u16_s16(br), 8)); \
-    int16x8_t brRB = vandq_s16(br, colorMask); \
- \
-    int16x8_t rAG = vmulq_s16(tlAG, idxidy); \
-    int16x8_t rRB = vmulq_s16(tlRB, idxidy); \
-    rAG = vmlaq_s16(rAG, trAG, dxidy); \
-    rRB = vmlaq_s16(rRB, trRB, dxidy); \
-    rAG = vmlaq_s16(rAG, blAG, idxdy); \
-    rRB = vmlaq_s16(rRB, blRB, idxdy); \
-    rAG = vmlaq_s16(rAG, brAG, dxdy); \
-    rRB = vmlaq_s16(rRB, brRB, dxdy); \
- \
-    rAG = vandq_s16(invColorMask, rAG); \
-    rRB = vreinterpretq_s16_u16(vshrq_n_u16(vreinterpretq_u16_s16(rRB), 8)); \
-    vst1q_s16((int16_t*)(b), vorrq_s16(rAG, rRB)); \
-}
-#endif
-
 template<TextureBlendType blendType>
 Q_STATIC_TEMPLATE_FUNCTION inline void fetchTransformedBilinear_pixelBounds(int max, int l1, int l2, int &v1, int &v2)
 {
@@ -801,70 +731,6 @@ const uint * QT_FASTCALL fetchTransformedBilinear(uint *buffer, const Operator *
                     }
                 }
 
-                if (blendType != BlendTransformedBilinearTiled &&
-                        (format == QImage::Format_ARGB32_Premultiplied || format == QImage::Format_RGB32)) {
-#if defined(QT_ALWAYS_HAVE_SSE2)
-                    const __m128i disty_ = _mm_set1_epi16(disty);
-                    const __m128i idisty_ = _mm_set1_epi16(idisty);
-                    const __m128i colorMask = _mm_set1_epi32(0x00ff00ff);
-
-                    lim -= 3;
-                    for (; f < lim; x += 4, f += 4) {
-                        // Load 4 pixels from s1, and split the alpha-green and red-blue component
-                        __m128i top = _mm_loadu_si128((__m128i*)((const uint *)(s1)+x));
-                        __m128i topAG = _mm_srli_epi16(top, 8);
-                        __m128i topRB = _mm_and_si128(top, colorMask);
-                        // Multiplies each colour component by idisty
-                        topAG = _mm_mullo_epi16 (topAG, idisty_);
-                        topRB = _mm_mullo_epi16 (topRB, idisty_);
-
-                        // Same for the s2 vector
-                        __m128i bottom = _mm_loadu_si128((__m128i*)((const uint *)(s2)+x));
-                        __m128i bottomAG = _mm_srli_epi16(bottom, 8);
-                        __m128i bottomRB = _mm_and_si128(bottom, colorMask);
-                        bottomAG = _mm_mullo_epi16 (bottomAG, disty_);
-                        bottomRB = _mm_mullo_epi16 (bottomRB, disty_);
-
-                        // Add the values, and shift to only keep 8 significant bits per colors
-                        __m128i rAG =_mm_add_epi16(topAG, bottomAG);
-                        rAG = _mm_srli_epi16(rAG, 8);
-                        _mm_storeu_si128((__m128i*)(&intermediate_buffer[1][f]), rAG);
-                        __m128i rRB =_mm_add_epi16(topRB, bottomRB);
-                        rRB = _mm_srli_epi16(rRB, 8);
-                        _mm_storeu_si128((__m128i*)(&intermediate_buffer[0][f]), rRB);
-                    }
-#elif defined(QT_ALWAYS_HAVE_NEON)
-                    const int16x8_t disty_ = vdupq_n_s16(disty);
-                    const int16x8_t idisty_ = vdupq_n_s16(idisty);
-                    const int16x8_t colorMask = vdupq_n_s16(0x00ff);
-
-                    lim -= 3;
-                    for (; f < lim; x += 4, f += 4) {
-                        // Load 4 pixels from s1, and split the alpha-green and red-blue component
-                        int16x8_t top = vld1q_s16((int16_t*)((const uint *)(s1)+x));
-                        int16x8_t topAG = vreinterpretq_s16_u16(vshrq_n_u16(vreinterpretq_u16_s16(top), 8));
-                        int16x8_t topRB = vandq_s16(top, colorMask);
-                        // Multiplies each colour component by idisty
-                        topAG = vmulq_s16(topAG, idisty_);
-                        topRB = vmulq_s16(topRB, idisty_);
-
-                        // Same for the s2 vector
-                        int16x8_t bottom = vld1q_s16((int16_t*)((const uint *)(s2)+x));
-                        int16x8_t bottomAG = vreinterpretq_s16_u16(vshrq_n_u16(vreinterpretq_u16_s16(bottom), 8));
-                        int16x8_t bottomRB = vandq_s16(bottom, colorMask);
-                        bottomAG = vmulq_s16(bottomAG, disty_);
-                        bottomRB = vmulq_s16(bottomRB, disty_);
-
-                        // Add the values, and shift to only keep 8 significant bits per colors
-                        int16x8_t rAG = vaddq_s16(topAG, bottomAG);
-                        rAG = vreinterpretq_s16_u16(vshrq_n_u16(vreinterpretq_u16_s16(rAG), 8));
-                        vst1q_s16((int16_t*)(&intermediate_buffer[1][f]), rAG);
-                        int16x8_t rRB = vaddq_s16(topRB, bottomRB);
-                        rRB = vreinterpretq_s16_u16(vshrq_n_u16(vreinterpretq_u16_s16(rRB), 8));
-                        vst1q_s16((int16_t*)(&intermediate_buffer[0][f]), rRB);
-                    }
-#endif
-                }
                 for (; f < count; f++) { // Same as above but without sse2
                     if (blendType == BlendTransformedBilinearTiled) {
                         if (x >= image_width) x -= image_width;
@@ -931,123 +797,6 @@ const uint * QT_FASTCALL fetchTransformedBilinear(uint *buffer, const Operator *
                 const uchar *s2 = data->texture.scanLine(y2);
                 int disty = (fy & 0x0000ffff) >> 12;
 
-                if (blendType != BlendTransformedBilinearTiled &&
-                    (format == QImage::Format_ARGB32_Premultiplied || format == QImage::Format_RGB32)) {
-
-#define BILINEAR_DOWNSCALE_BOUNDS_PROLOG \
-                    while (b < end) { \
-                        int x1 = (fx >> 16); \
-                        int x2; \
-                        fetchTransformedBilinear_pixelBounds<blendType>(image_width, image_x1, image_x2, x1, x2); \
-                        if (x1 != x2) \
-                            break; \
-                        uint tl = fetch(s1, x1, data->texture.colorTable); \
-                        uint tr = fetch(s1, x2, data->texture.colorTable); \
-                        uint bl = fetch(s2, x1, data->texture.colorTable); \
-                        uint br = fetch(s2, x2, data->texture.colorTable); \
-                        int distx = (fx & 0x0000ffff) >> 12; \
-                        *b = interpolate_4_pixels_16(tl, tr, bl, br, distx, disty); \
-                        fx += fdx; \
-                        ++b; \
-                    } \
-                    uint *boundedEnd; \
-                    if (fdx > 0) \
-                        boundedEnd = qMin(end, buffer + uint((image_x2 - (fx >> 16)) / data->m11)); \
-                    else \
-                        boundedEnd = qMin(end, buffer + uint((image_x1 - (fx >> 16)) / data->m11)); \
-                    boundedEnd -= 3;
-
-#if defined(QT_ALWAYS_HAVE_SSE2)
-                    BILINEAR_DOWNSCALE_BOUNDS_PROLOG
-
-                    const __m128i colorMask = _mm_set1_epi32(0x00ff00ff);
-                    const __m128i v_256 = _mm_set1_epi16(256);
-                    const __m128i v_disty = _mm_set1_epi16(disty);
-                    __m128i v_fdx = _mm_set1_epi32(fdx*4);
-
-                    ptrdiff_t secondLine = reinterpret_cast<const uint *>(s2) - reinterpret_cast<const uint *>(s1);
-
-                    union Vect_buffer { __m128i vect; quint32 i[4]; };
-                    Vect_buffer v_fx;
-
-                    for (int i = 0; i < 4; i++) {
-                        v_fx.i[i] = fx;
-                        fx += fdx;
-                    }
-
-                    while (b < boundedEnd) {
-
-                        Vect_buffer tl, tr, bl, br;
-
-                        for (int i = 0; i < 4; i++) {
-                            int x1 = v_fx.i[i] >> 16;
-                            const uint *addr_tl = reinterpret_cast<const uint *>(s1) + x1;
-                            const uint *addr_tr = addr_tl + 1;
-                            tl.i[i] = *addr_tl;
-                            tr.i[i] = *addr_tr;
-                            bl.i[i] = *(addr_tl+secondLine);
-                            br.i[i] = *(addr_tr+secondLine);
-                        }
-                        __m128i v_distx = _mm_srli_epi16(v_fx.vect, 12);
-                        v_distx = _mm_shufflehi_epi16(v_distx, _MM_SHUFFLE(2,2,0,0));
-                        v_distx = _mm_shufflelo_epi16(v_distx, _MM_SHUFFLE(2,2,0,0));
-
-                        interpolate_4_pixels_16_sse2(tl.vect, tr.vect, bl.vect, br.vect, v_distx, v_disty, colorMask, v_256, b);
-                        b+=4;
-                        v_fx.vect = _mm_add_epi32(v_fx.vect, v_fdx);
-                    }
-                    fx = v_fx.i[0];
-#elif defined(QT_ALWAYS_HAVE_NEON)
-                    BILINEAR_DOWNSCALE_BOUNDS_PROLOG
-
-                    const int16x8_t colorMask = vdupq_n_s16(0x00ff);
-                    const int16x8_t invColorMask = vmvnq_s16(colorMask);
-                    const int16x8_t v_256 = vdupq_n_s16(256);
-                    const int16x8_t v_disty = vdupq_n_s16(disty);
-                    const int16x8_t v_disty_ = vshlq_n_s16(v_disty, 4);
-                    int32x4_t v_fdx = vdupq_n_s32(fdx*4);
-
-                    ptrdiff_t secondLine = reinterpret_cast<const uint *>(s2) - reinterpret_cast<const uint *>(s1);
-
-                    union Vect_buffer { int32x4_t vect; quint32 i[4]; };
-                    Vect_buffer v_fx;
-
-                    for (int i = 0; i < 4; i++) {
-                        v_fx.i[i] = fx;
-                        fx += fdx;
-                    }
-
-                    const int32x4_t v_ffff_mask = vdupq_n_s32(0x0000ffff);
-
-                    while (b < boundedEnd) {
-
-                        Vect_buffer tl, tr, bl, br;
-
-                        Vect_buffer v_fx_shifted;
-                        v_fx_shifted.vect = vshrq_n_s32(v_fx.vect, 16);
-
-                        int32x4_t v_distx = vshrq_n_s32(vandq_s32(v_fx.vect, v_ffff_mask), 12);
-
-                        for (int i = 0; i < 4; i++) {
-                            int x1 = v_fx_shifted.i[i];
-                            const uint *addr_tl = reinterpret_cast<const uint *>(s1) + x1;
-                            const uint *addr_tr = addr_tl + 1;
-                            tl.i[i] = *addr_tl;
-                            tr.i[i] = *addr_tr;
-                            bl.i[i] = *(addr_tl+secondLine);
-                            br.i[i] = *(addr_tr+secondLine);
-                        }
-
-                        v_distx = vorrq_s32(v_distx, vshlq_n_s32(v_distx, 16));
-
-                        interpolate_4_pixels_16_neon(vreinterpretq_s16_s32(tl.vect), vreinterpretq_s16_s32(tr.vect), vreinterpretq_s16_s32(bl.vect), vreinterpretq_s16_s32(br.vect), vreinterpretq_s16_s32(v_distx), v_disty, v_disty_, colorMask, invColorMask, v_256, b);
-                        b+=4;
-                        v_fx.vect = vaddq_s32(v_fx.vect, v_fdx);
-                    }
-                    fx = v_fx.i[0];
-#endif
-                }
-
                 while (b < end) {
                     int x1 = (fx >> 16);
                     int x2;
diff --git a/src/gui/painting/qdrawhelper_p.h b/src/gui/painting/qdrawhelper_p.h
index c758ba55f..9c535dca2 100644
--- a/src/gui/painting/qdrawhelper_p.h
+++ b/src/gui/painting/qdrawhelper_p.h
@@ -62,7 +62,6 @@
 #define QT_FT_END_HEADER
 #endif
 #include "qrasterdefs_p.h"
-#include <qsimd_p.h>
 #include <qmath_p.h>
 
 QT_BEGIN_NAMESPACE
-- 
2.11.0