From 9c484617807a339e8dc09d5da0f3add30d9b7dd6 Mon Sep 17 00:00:00 2001
From: Stuart Archibald <stuart.archibald@googlemail.com>
Date: Wed, 31 Oct 2018 15:40:15 +0000
Subject: [PATCH] Revert "Merge pull request #404 from
 stuartarchibald/wip/llvm7"

This reverts commit ebc4c596ae74d5667790948c56fea41fa1d11885, reversing
changes made to a52ca0d05add8677d1b5be7eb1ba7c1e5bab5cc3.
---
 README.rst                                    |    3 +-
 .../incremental/setup_conda_environment.cmd   |    2 +-
 .../incremental/setup_conda_environment.sh    |    2 +-
 ...issing-header-for-InstructionCombini.patch |   31 +
 conda-recipes/D47188-svml-VF.patch            | 1936 -----------------
 conda-recipes/D47188-svml.patch               |  821 +++++++
 conda-recipes/llvmdev/meta.yaml               |   27 +-
 conda-recipes/llvmdev_manylinux1/meta.yaml    |   13 +-
 conda-recipes/llvmlite/meta.yaml              |    2 +-
 docs/source/admin-guide/install.rst           |    6 +-
 docs/source/conf.py                           |    6 +-
 docs/source/user-guide/ir/index.rst           |    2 +-
 ffi/build.py                                  |    4 +-
 llvmlite/tests/test_binding.py                |   28 +-
 14 files changed, 895 insertions(+), 1988 deletions(-)
 create mode 100644 conda-recipes/0001-Transforms-Add-missing-header-for-InstructionCombini.patch
 delete mode 100644 conda-recipes/D47188-svml-VF.patch
 create mode 100644 conda-recipes/D47188-svml.patch

diff --git a/README.rst b/README.rst
index 4ebf9b731..88afc4513 100644
--- a/README.rst
+++ b/README.rst
@@ -70,8 +70,7 @@ Historical compatibility table:
 =================  ========================
 llvmlite versions  compatible LLVM versions
 =================  ========================
-0.26.0 - ...       7.0.x
-0.23.0 - 0.25.0    6.0.x
+0.23.0 - ...       6.0.x
 0.21.0 - 0.22.0    5.0.x
 0.17.0 - 0.20.0    4.0.x
 0.16.0 - 0.17.0    3.9.x
diff --git a/buildscripts/incremental/setup_conda_environment.cmd b/buildscripts/incremental/setup_conda_environment.cmd
index 9fac4c949..0a62a63b1 100644
--- a/buildscripts/incremental/setup_conda_environment.cmd
+++ b/buildscripts/incremental/setup_conda_environment.cmd
@@ -17,6 +17,6 @@ conda create -n %CONDA_ENV% -q -y python=%PYTHON% cmake
 
 call activate %CONDA_ENV%
 @rem Install llvmdev
-%CONDA_INSTALL% -c numba llvmdev="7.0*"
+%CONDA_INSTALL% -c numba llvmdev="6.0*"
 @rem Install enum34 for Python < 3.4
 if %PYTHON% LSS 3.4 (%CONDA_INSTALL% enum34)
diff --git a/buildscripts/incremental/setup_conda_environment.sh b/buildscripts/incremental/setup_conda_environment.sh
index d933b479c..ddfca7ed8 100755
--- a/buildscripts/incremental/setup_conda_environment.sh
+++ b/buildscripts/incremental/setup_conda_environment.sh
@@ -25,7 +25,7 @@ source activate $CONDA_ENV
 set -v
 
 # Install llvmdev (separate channel, for now)
-$CONDA_INSTALL -c numba llvmdev="7.0*"
+$CONDA_INSTALL -c numba llvmdev="6.0*"
 
 # Install the compiler toolchain, for osx, bootstrapping needed
 # which happens in build.sh
diff --git a/conda-recipes/0001-Transforms-Add-missing-header-for-InstructionCombini.patch b/conda-recipes/0001-Transforms-Add-missing-header-for-InstructionCombini.patch
new file mode 100644
index 000000000..ec5a20983
--- /dev/null
+++ b/conda-recipes/0001-Transforms-Add-missing-header-for-InstructionCombini.patch
@@ -0,0 +1,31 @@
+From 7c9054610e354340f9474dcd13a927f929912d1d Mon Sep 17 00:00:00 2001
+From: Eugene Zelenko <eugene.zelenko@gmail.com>
+Date: Tue, 6 Mar 2018 23:06:13 +0000
+Subject: [PATCH] [Transforms] Add missing header for InstructionCombining.cpp,
+ in order to export LLVMInitializeInstCombine as extern "C". Fixes PR35947.
+
+Patch by Brenton Bostick.
+
+Differential revision: https://reviews.llvm.org/D44140
+
+
+git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@326843 91177308-0d34-0410-b5e6-96231b3b80d8
+---
+ lib/Transforms/InstCombine/InstructionCombining.cpp | 1 +
+ 1 file changed, 1 insertion(+)
+
+diff --git a/lib/Transforms/InstCombine/InstructionCombining.cpp b/lib/Transforms/InstCombine/InstructionCombining.cpp
+index a3b2fe9..7ec7343 100644
+--- a/lib/Transforms/InstCombine/InstructionCombining.cpp
++++ b/lib/Transforms/InstCombine/InstructionCombining.cpp
+@@ -34,6 +34,7 @@
+ //===----------------------------------------------------------------------===//
+ 
+ #include "InstCombineInternal.h"
++#include "llvm-c/Initialization.h"
+ #include "llvm/ADT/APInt.h"
+ #include "llvm/ADT/ArrayRef.h"
+ #include "llvm/ADT/DenseMap.h"
+-- 
+1.8.3.1
+
diff --git a/conda-recipes/D47188-svml-VF.patch b/conda-recipes/D47188-svml-VF.patch
deleted file mode 100644
index 04f337865..000000000
--- a/conda-recipes/D47188-svml-VF.patch
+++ /dev/null
@@ -1,1936 +0,0 @@
-From bcfc1167bf4dafc776f18296b06b2048372d3cb1 Mon Sep 17 00:00:00 2001
-From: Anton Malakhov <Anton.Malakhov@intel.com>
-Date: Tue, 25 Sep 2018 11:24:55 -0500
-Subject: [PATCH] Fixes vectorizer and extends SVML support
-
-This patch is created on top of LLVM 7.0.0 collecting several fixes:
-
-1. https://reviews.llvm.org/D47188 patch fixes the problem with improper calls
-to SVML library as it has non-standard calling conventions. So accordingly it
-has SVML calling conventions definitions and code to set CC to the vectorized
-calls. As SVML provides several implementations for the math functions we also
-took into consideration fast attribute and select more fast implementation in
-such case. This work is based on original Matt Masten's work.
-Author: Denis Nagorny
-
-2. implements support to legalize SVML calls by breaking down the illegal
-vector call instruction into multiple legal vector call instructions during
-code generation. Currently the vectorizer does not check legality of the
-generated SVML (or any VECLIB) call instructions, and this
-can lead to potential problems even during vector type
-legalization. This patch addresses this issue by adding
-a legality check during code generation and replaces the
-illegal SVML call with corresponding legalized instructions.
-(RFC: http://lists.llvm.org/pipermail/llvm-dev/2018-June/124357.html)
-Author: Karthik Senthil
-
-3. Functional merge of the patches above which fixes calling convention
-
-
-diff --git a/include/llvm/Analysis/TargetLibraryInfo.h b/include/llvm/Analysis/TargetLibraryInfo.h
-index a3fe8340..2b93099e 100644
---- a/include/llvm/Analysis/TargetLibraryInfo.h
-+++ b/include/llvm/Analysis/TargetLibraryInfo.h
-@@ -38,6 +38,12 @@ struct VecDesc {
-     NumLibFuncs
-   };
- 
-+enum SVMLAccuracy {
-+  SVML_DEFAULT,
-+  SVML_HA,
-+  SVML_EP
-+};
-+
- /// Implementation of the target library information.
- ///
- /// This class constructs tables that hold the target library information and
-@@ -150,7 +156,8 @@ public:
-   /// Return true if the function F has a vector equivalent with vectorization
-   /// factor VF.
-   bool isFunctionVectorizable(StringRef F, unsigned VF) const {
--    return !getVectorizedFunction(F, VF).empty();
-+     bool Ignored;
-+     return !getVectorizedFunction(F, VF, Ignored, false).empty();
-   }
- 
-   /// Return true if the function F has a vector equivalent with any
-@@ -159,7 +166,8 @@ public:
- 
-   /// Return the name of the equivalent of F, vectorized with factor VF. If no
-   /// such mapping exists, return the empty string.
--  StringRef getVectorizedFunction(StringRef F, unsigned VF) const;
-+  std::string getVectorizedFunction(StringRef F, unsigned VF, bool &FromSVML,
-+                                    bool IsFast) const;
- 
-   /// Return true if the function F has a scalar equivalent, and set VF to be
-   /// the vectorization factor.
-@@ -253,8 +261,9 @@ public:
-   bool isFunctionVectorizable(StringRef F) const {
-     return Impl->isFunctionVectorizable(F);
-   }
--  StringRef getVectorizedFunction(StringRef F, unsigned VF) const {
--    return Impl->getVectorizedFunction(F, VF);
-+  std::string getVectorizedFunction(StringRef F, unsigned VF, bool &FromSVML,
-+                                    bool IsFast) const {
-+    return Impl->getVectorizedFunction(F, VF, FromSVML, IsFast);
-   }
- 
-   /// Tests if the function is both available and a candidate for optimized code
-diff --git a/include/llvm/IR/CMakeLists.txt b/include/llvm/IR/CMakeLists.txt
-index 830f3750..dfe25b6d 100644
---- a/include/llvm/IR/CMakeLists.txt
-+++ b/include/llvm/IR/CMakeLists.txt
-@@ -5,3 +5,7 @@ set(LLVM_TARGET_DEFINITIONS Intrinsics.td)
- tablegen(LLVM IntrinsicEnums.inc -gen-intrinsic-enums)
- tablegen(LLVM IntrinsicImpl.inc -gen-intrinsic-impl)
- add_public_tablegen_target(intrinsics_gen)
-+
-+set(LLVM_TARGET_DEFINITIONS SVML.td)
-+tablegen(LLVM SVML.inc -gen-svml)
-+add_public_tablegen_target(svml_gen)
-diff --git a/include/llvm/IR/CallingConv.h b/include/llvm/IR/CallingConv.h
-index b9c02d7e..1ec5c9b6 100644
---- a/include/llvm/IR/CallingConv.h
-+++ b/include/llvm/IR/CallingConv.h
-@@ -220,6 +220,9 @@ namespace CallingConv {
-     /// shader if tessellation is in use, or otherwise the vertex shader.
-     AMDGPU_ES = 96,
- 
-+    /// Intel_SVML - Calling conventions for Intel Short Math Vector Library
-+    Intel_SVML = 97,
-+
-     /// The highest possible calling convention ID. Must be some 2^k - 1.
-     MaxID = 1023
-   };
-diff --git a/include/llvm/IR/SVML.td b/include/llvm/IR/SVML.td
-new file mode 100644
-index 00000000..5af71040
---- /dev/null
-+++ b/include/llvm/IR/SVML.td
-@@ -0,0 +1,62 @@
-+//===-- Intel_SVML.td - Defines SVML call variants ---------*- tablegen -*-===//
-+//
-+//                     The LLVM Compiler Infrastructure
-+//
-+// This file is distributed under the University of Illinois Open Source
-+// License. See LICENSE.TXT for details.
-+//
-+//===----------------------------------------------------------------------===//
-+//
-+// This file is used by TableGen to define the different typs of SVML function
-+// variants used with -fveclib=SVML.
-+//
-+//===----------------------------------------------------------------------===//
-+
-+class SvmlVariant;
-+
-+def sin        : SvmlVariant;
-+def cos        : SvmlVariant;
-+def pow        : SvmlVariant;
-+def exp        : SvmlVariant;
-+def log        : SvmlVariant;
-+def acos       : SvmlVariant;
-+def acosh      : SvmlVariant;
-+def asin       : SvmlVariant;
-+def asinh      : SvmlVariant;
-+def atan2      : SvmlVariant;
-+def atan       : SvmlVariant;
-+def atanh      : SvmlVariant;
-+def cbrt       : SvmlVariant;
-+def cdfnorm    : SvmlVariant;
-+def cdfnorminv : SvmlVariant;
-+def cosd       : SvmlVariant;
-+def cosh       : SvmlVariant;
-+def erf        : SvmlVariant;
-+def erfc       : SvmlVariant;
-+def erfcinv    : SvmlVariant;
-+def erfinv     : SvmlVariant;
-+def exp10      : SvmlVariant;
-+def exp2       : SvmlVariant;
-+def expm1      : SvmlVariant;
-+def hypot      : SvmlVariant;
-+def invsqrt    : SvmlVariant;
-+def log10      : SvmlVariant;
-+def log1p      : SvmlVariant;
-+def log2       : SvmlVariant;
-+def sind       : SvmlVariant;
-+def sinh       : SvmlVariant;
-+def sqrt       : SvmlVariant;
-+def tan        : SvmlVariant;
-+def tanh       : SvmlVariant;
-+
-+// TODO: SVML does not currently provide _ha and _ep variants of these fucnctions.
-+// We should call the default variant of these functions in all cases instead.
-+
-+// def nearbyint  : SvmlVariant;
-+// def logb       : SvmlVariant;
-+// def floor      : SvmlVariant;
-+// def fmod       : SvmlVariant;
-+// def ceil       : SvmlVariant;
-+// def trunc      : SvmlVariant;
-+// def rint       : SvmlVariant;
-+// def round      : SvmlVariant;
-diff --git a/lib/Analysis/CMakeLists.txt b/lib/Analysis/CMakeLists.txt
-index 8e8535ab..5a434bc9 100644
---- a/lib/Analysis/CMakeLists.txt
-+++ b/lib/Analysis/CMakeLists.txt
-@@ -93,4 +93,5 @@ add_llvm_library(LLVMAnalysis
- 
-   DEPENDS
-   intrinsics_gen
-+  svml_gen
-   )
-diff --git a/lib/Analysis/TargetLibraryInfo.cpp b/lib/Analysis/TargetLibraryInfo.cpp
-index 102135fb..aec30e35 100644
---- a/lib/Analysis/TargetLibraryInfo.cpp
-+++ b/lib/Analysis/TargetLibraryInfo.cpp
-@@ -50,6 +50,11 @@ static bool hasSinCosPiStret(const Triple &T) {
-   return true;
- }
- 
-+std::string svmlMangle(StringRef FnName, const bool IsFast) {
-+  std::string FullName = FnName;
-+  return IsFast ? FullName : FullName + "_ha";
-+}
-+
- /// Initialize the set of available library functions based on the specified
- /// target triple. This should be carefully written so that a missing target
- /// triple gets a sane set of defaults.
-@@ -1452,109 +1457,9 @@ void TargetLibraryInfoImpl::addVectorizableFunctionsFromVecLib(
-   }
-   case SVML: {
-     const VecDesc VecFuncs[] = {
--        {"sin", "__svml_sin2", 2},
--        {"sin", "__svml_sin4", 4},
--        {"sin", "__svml_sin8", 8},
--
--        {"sinf", "__svml_sinf4", 4},
--        {"sinf", "__svml_sinf8", 8},
--        {"sinf", "__svml_sinf16", 16},
--
--        {"llvm.sin.f64", "__svml_sin2", 2},
--        {"llvm.sin.f64", "__svml_sin4", 4},
--        {"llvm.sin.f64", "__svml_sin8", 8},
--
--        {"llvm.sin.f32", "__svml_sinf4", 4},
--        {"llvm.sin.f32", "__svml_sinf8", 8},
--        {"llvm.sin.f32", "__svml_sinf16", 16},
--
--        {"cos", "__svml_cos2", 2},
--        {"cos", "__svml_cos4", 4},
--        {"cos", "__svml_cos8", 8},
--
--        {"cosf", "__svml_cosf4", 4},
--        {"cosf", "__svml_cosf8", 8},
--        {"cosf", "__svml_cosf16", 16},
--
--        {"llvm.cos.f64", "__svml_cos2", 2},
--        {"llvm.cos.f64", "__svml_cos4", 4},
--        {"llvm.cos.f64", "__svml_cos8", 8},
--
--        {"llvm.cos.f32", "__svml_cosf4", 4},
--        {"llvm.cos.f32", "__svml_cosf8", 8},
--        {"llvm.cos.f32", "__svml_cosf16", 16},
--
--        {"pow", "__svml_pow2", 2},
--        {"pow", "__svml_pow4", 4},
--        {"pow", "__svml_pow8", 8},
--
--        {"powf", "__svml_powf4", 4},
--        {"powf", "__svml_powf8", 8},
--        {"powf", "__svml_powf16", 16},
--
--        { "__pow_finite", "__svml_pow2", 2 },
--        { "__pow_finite", "__svml_pow4", 4 },
--        { "__pow_finite", "__svml_pow8", 8 },
--
--        { "__powf_finite", "__svml_powf4", 4 },
--        { "__powf_finite", "__svml_powf8", 8 },
--        { "__powf_finite", "__svml_powf16", 16 },
--
--        {"llvm.pow.f64", "__svml_pow2", 2},
--        {"llvm.pow.f64", "__svml_pow4", 4},
--        {"llvm.pow.f64", "__svml_pow8", 8},
--
--        {"llvm.pow.f32", "__svml_powf4", 4},
--        {"llvm.pow.f32", "__svml_powf8", 8},
--        {"llvm.pow.f32", "__svml_powf16", 16},
--
--        {"exp", "__svml_exp2", 2},
--        {"exp", "__svml_exp4", 4},
--        {"exp", "__svml_exp8", 8},
--
--        {"expf", "__svml_expf4", 4},
--        {"expf", "__svml_expf8", 8},
--        {"expf", "__svml_expf16", 16},
--
--        { "__exp_finite", "__svml_exp2", 2 },
--        { "__exp_finite", "__svml_exp4", 4 },
--        { "__exp_finite", "__svml_exp8", 8 },
--
--        { "__expf_finite", "__svml_expf4", 4 },
--        { "__expf_finite", "__svml_expf8", 8 },
--        { "__expf_finite", "__svml_expf16", 16 },
--
--        {"llvm.exp.f64", "__svml_exp2", 2},
--        {"llvm.exp.f64", "__svml_exp4", 4},
--        {"llvm.exp.f64", "__svml_exp8", 8},
--
--        {"llvm.exp.f32", "__svml_expf4", 4},
--        {"llvm.exp.f32", "__svml_expf8", 8},
--        {"llvm.exp.f32", "__svml_expf16", 16},
--
--        {"log", "__svml_log2", 2},
--        {"log", "__svml_log4", 4},
--        {"log", "__svml_log8", 8},
--
--        {"logf", "__svml_logf4", 4},
--        {"logf", "__svml_logf8", 8},
--        {"logf", "__svml_logf16", 16},
--
--        { "__log_finite", "__svml_log2", 2 },
--        { "__log_finite", "__svml_log4", 4 },
--        { "__log_finite", "__svml_log8", 8 },
--
--        { "__logf_finite", "__svml_logf4", 4 },
--        { "__logf_finite", "__svml_logf8", 8 },
--        { "__logf_finite", "__svml_logf16", 16 },
--
--        {"llvm.log.f64", "__svml_log2", 2},
--        {"llvm.log.f64", "__svml_log4", 4},
--        {"llvm.log.f64", "__svml_log8", 8},
--
--        {"llvm.log.f32", "__svml_logf4", 4},
--        {"llvm.log.f32", "__svml_logf8", 8},
--        {"llvm.log.f32", "__svml_logf16", 16},
-+#define GET_SVML_VARIANTS
-+#include "llvm/IR/SVML.inc"
-+#undef GET_SVML_VARIANTS
-     };
-     addVectorizableFunctions(VecFuncs);
-     break;
-@@ -1575,19 +1480,26 @@ bool TargetLibraryInfoImpl::isFunctionVectorizable(StringRef funcName) const {
-   return I != VectorDescs.end() && StringRef(I->ScalarFnName) == funcName;
- }
- 
--StringRef TargetLibraryInfoImpl::getVectorizedFunction(StringRef F,
--                                                       unsigned VF) const {
-+std::string TargetLibraryInfoImpl::getVectorizedFunction(StringRef F,
-+                                                         unsigned VF,
-+                                                         bool &FromSVML,
-+                                                         bool IsFast) const {
-+  FromSVML = ClVectorLibrary == SVML;
-   F = sanitizeFunctionName(F);
-   if (F.empty())
-     return F;
-   std::vector<VecDesc>::const_iterator I = std::lower_bound(
-       VectorDescs.begin(), VectorDescs.end(), F, compareWithScalarFnName);
-   while (I != VectorDescs.end() && StringRef(I->ScalarFnName) == F) {
--    if (I->VectorizationFactor == VF)
-+    if (I->VectorizationFactor == VF) {
-+      if (FromSVML) {
-+        return svmlMangle(I->VectorFnName, IsFast);
-+      }
-       return I->VectorFnName;
-+    }
-     ++I;
-   }
--  return StringRef();
-+  return std::string();
- }
- 
- StringRef TargetLibraryInfoImpl::getScalarizedFunction(StringRef F,
-diff --git a/lib/AsmParser/LLLexer.cpp b/lib/AsmParser/LLLexer.cpp
-index da9855ff..c7182754 100644
---- a/lib/AsmParser/LLLexer.cpp
-+++ b/lib/AsmParser/LLLexer.cpp
-@@ -600,6 +600,7 @@ lltok::Kind LLLexer::LexIdentifier() {
-   KEYWORD(spir_kernel);
-   KEYWORD(spir_func);
-   KEYWORD(intel_ocl_bicc);
-+  KEYWORD(intel_svmlcc);
-   KEYWORD(x86_64_sysvcc);
-   KEYWORD(win64cc);
-   KEYWORD(x86_regcallcc);
-diff --git a/lib/AsmParser/LLParser.cpp b/lib/AsmParser/LLParser.cpp
-index 7cf74dd1..0b539e86 100644
---- a/lib/AsmParser/LLParser.cpp
-+++ b/lib/AsmParser/LLParser.cpp
-@@ -1843,6 +1843,7 @@ void LLParser::ParseOptionalDLLStorageClass(unsigned &Res) {
- ///   ::= 'ccc'
- ///   ::= 'fastcc'
- ///   ::= 'intel_ocl_bicc'
-+///   ::= 'intel_svmlcc'
- ///   ::= 'coldcc'
- ///   ::= 'x86_stdcallcc'
- ///   ::= 'x86_fastcallcc'
-@@ -1902,6 +1903,7 @@ bool LLParser::ParseOptionalCallingConv(unsigned &CC) {
-   case lltok::kw_spir_kernel:    CC = CallingConv::SPIR_KERNEL; break;
-   case lltok::kw_spir_func:      CC = CallingConv::SPIR_FUNC; break;
-   case lltok::kw_intel_ocl_bicc: CC = CallingConv::Intel_OCL_BI; break;
-+  case lltok::kw_intel_svmlcc:   CC = CallingConv::Intel_SVML; break;
-   case lltok::kw_x86_64_sysvcc:  CC = CallingConv::X86_64_SysV; break;
-   case lltok::kw_win64cc:        CC = CallingConv::Win64; break;
-   case lltok::kw_webkit_jscc:    CC = CallingConv::WebKit_JS; break;
-diff --git a/lib/AsmParser/LLToken.h b/lib/AsmParser/LLToken.h
-index 8d8c7e99..3c89b78a 100644
---- a/lib/AsmParser/LLToken.h
-+++ b/lib/AsmParser/LLToken.h
-@@ -131,6 +131,7 @@ enum Kind {
-   kw_fastcc,
-   kw_coldcc,
-   kw_intel_ocl_bicc,
-+  kw_intel_svmlcc,
-   kw_x86_stdcallcc,
-   kw_x86_fastcallcc,
-   kw_x86_thiscallcc,
-diff --git a/lib/IR/AsmWriter.cpp b/lib/IR/AsmWriter.cpp
-index 99a25a72..0b6e6787 100644
---- a/lib/IR/AsmWriter.cpp
-+++ b/lib/IR/AsmWriter.cpp
-@@ -360,6 +360,7 @@ static void PrintCallingConv(unsigned cc, raw_ostream &Out) {
-   case CallingConv::X86_RegCall:   Out << "x86_regcallcc"; break;
-   case CallingConv::X86_VectorCall:Out << "x86_vectorcallcc"; break;
-   case CallingConv::Intel_OCL_BI:  Out << "intel_ocl_bicc"; break;
-+  case CallingConv::Intel_SVML:    Out << "intel_svmlcc"; break;
-   case CallingConv::ARM_APCS:      Out << "arm_apcscc"; break;
-   case CallingConv::ARM_AAPCS:     Out << "arm_aapcscc"; break;
-   case CallingConv::ARM_AAPCS_VFP: Out << "arm_aapcs_vfpcc"; break;
-diff --git a/lib/IR/Verifier.cpp b/lib/IR/Verifier.cpp
-index e5231bb7..58b1f0a4 100644
---- a/lib/IR/Verifier.cpp
-+++ b/lib/IR/Verifier.cpp
-@@ -2114,6 +2114,7 @@ void Verifier::visitFunction(const Function &F) {
-   case CallingConv::Fast:
-   case CallingConv::Cold:
-   case CallingConv::Intel_OCL_BI:
-+  case CallingConv::Intel_SVML:
-   case CallingConv::PTX_Kernel:
-   case CallingConv::PTX_Device:
-     Assert(!F.isVarArg(), "Calling convention does not support varargs or "
-diff --git a/lib/Target/X86/X86CallingConv.td b/lib/Target/X86/X86CallingConv.td
-index fcc9a296..d36c3a0f 100644
---- a/lib/Target/X86/X86CallingConv.td
-+++ b/lib/Target/X86/X86CallingConv.td
-@@ -476,12 +476,29 @@ def RetCC_X86_64 : CallingConv<[
-   CCDelegateTo<RetCC_X86_64_C>
- ]>;
- 
-+// Intel_SVML return-value convention.
-+def RetCC_Intel_SVML : CallingConv<[
-+  // Vector types are returned in XMM0,XMM1
-+  CCIfType<[v4f32, v2f64],
-+            CCAssignToReg<[XMM0,XMM1]>>,
-+
-+  // 256-bit FP vectors
-+  CCIfType<[v8f32, v4f64],
-+            CCAssignToReg<[YMM0,YMM1]>>,
-+
-+  // 512-bit FP vectors
-+  CCIfType<[v16f32, v8f64],
-+            CCAssignToReg<[ZMM0,ZMM1]>>
-+]>;
-+
- // This is the return-value convention used for the entire X86 backend.
- def RetCC_X86 : CallingConv<[
- 
-   // Check if this is the Intel OpenCL built-ins calling convention
-   CCIfCC<"CallingConv::Intel_OCL_BI", CCDelegateTo<RetCC_Intel_OCL_BI>>,
- 
-+  CCIfCC<"CallingConv::Intel_SVML", CCDelegateTo<RetCC_Intel_SVML>>,
-+
-   CCIfSubtarget<"is64Bit()", CCDelegateTo<RetCC_X86_64>>,
-   CCDelegateTo<RetCC_X86_32>
- ]>;
-@@ -983,6 +1000,22 @@ def CC_Intel_OCL_BI : CallingConv<[
-   CCDelegateTo<CC_X86_32_C>
- ]>;
- 
-+// X86-64 Intel Short Vector Math Library calling convention.
-+def CC_Intel_SVML : CallingConv<[
-+
-+  // The SSE vector arguments are passed in XMM registers.
-+  CCIfType<[v4f32, v2f64],
-+           CCAssignToReg<[XMM0, XMM1, XMM2]>>,
-+
-+  // The 256-bit vector arguments are passed in YMM registers.
-+  CCIfType<[v8f32, v4f64],
-+           CCAssignToReg<[YMM0, YMM1, YMM2]>>,
-+
-+  // The 512-bit vector arguments are passed in ZMM registers.
-+  CCIfType<[v16f32, v8f64],
-+           CCAssignToReg<[ZMM0, ZMM1, ZMM2]>>
-+]>;
-+
- def CC_X86_32_Intr : CallingConv<[
-   CCAssignToStack<4, 4>
- ]>;
-@@ -1039,6 +1072,7 @@ def CC_X86_64 : CallingConv<[
- // This is the argument convention used for the entire X86 backend.
- def CC_X86 : CallingConv<[
-   CCIfCC<"CallingConv::Intel_OCL_BI", CCDelegateTo<CC_Intel_OCL_BI>>,
-+  CCIfCC<"CallingConv::Intel_SVML", CCDelegateTo<CC_Intel_SVML>>,
-   CCIfSubtarget<"is64Bit()", CCDelegateTo<CC_X86_64>>,
-   CCDelegateTo<CC_X86_32>
- ]>;
-@@ -1147,4 +1181,27 @@ def CSR_SysV64_RegCall_NoSSE : CalleeSavedRegs<(add RBX, RBP, RSP,
-                                                (sequence "R%u", 12, 15))>;
- def CSR_SysV64_RegCall       : CalleeSavedRegs<(add CSR_SysV64_RegCall_NoSSE,               
-                                                (sequence "XMM%u", 8, 15))>;
--                                               
-+
-+// SVML calling convention 
-+def CSR_32_Intel_SVML        : CalleeSavedRegs<(add CSR_32_RegCall_NoSSE)>;
-+def CSR_32_Intel_SVML_AVX512 : CalleeSavedRegs<(add CSR_32_Intel_SVML,
-+                                                K4, K5, K6, K7)>;
-+
-+def CSR_64_Intel_SVML_NoSSE : CalleeSavedRegs<(add RBX, RSI, RDI, RBP, RSP, R12, R13, R14, R15)>;
-+
-+def CSR_64_Intel_SVML       : CalleeSavedRegs<(add CSR_64_Intel_SVML_NoSSE,
-+                                               (sequence "XMM%u", 8, 15))>;
-+def CSR_Win64_Intel_SVML    : CalleeSavedRegs<(add CSR_64_Intel_SVML_NoSSE,
-+                                               (sequence "XMM%u", 6, 15))>;
-+
-+def CSR_64_Intel_SVML_AVX        : CalleeSavedRegs<(add CSR_64_Intel_SVML_NoSSE,
-+                                                    (sequence "YMM%u", 8, 15))>;
-+def CSR_Win64_Intel_SVML_AVX     : CalleeSavedRegs<(add CSR_64_Intel_SVML_NoSSE,
-+                                                    (sequence "YMM%u", 6, 15))>;
-+
-+def CSR_64_Intel_SVML_AVX512     : CalleeSavedRegs<(add CSR_64_Intel_SVML_NoSSE,
-+                                                    (sequence "ZMM%u", 16, 31),
-+                                                    K4, K5, K6, K7)>;
-+def CSR_Win64_Intel_SVML_AVX512  : CalleeSavedRegs<(add CSR_64_Intel_SVML_NoSSE,
-+                                                    (sequence "ZMM%u", 6, 21),
-+                                                    K4, K5, K6, K7)>;
-diff --git a/lib/Target/X86/X86ISelLowering.cpp b/lib/Target/X86/X86ISelLowering.cpp
-index 2820004c..817be44a 100644
---- a/lib/Target/X86/X86ISelLowering.cpp
-+++ b/lib/Target/X86/X86ISelLowering.cpp
-@@ -3279,7 +3279,8 @@ SDValue X86TargetLowering::LowerFormalArguments(
-     // FIXME: Only some x86_32 calling conventions support AVX512.
-     if (Subtarget.hasAVX512() &&
-         (Is64Bit || (CallConv == CallingConv::X86_VectorCall ||
--                     CallConv == CallingConv::Intel_OCL_BI)))
-+                     CallConv == CallingConv::Intel_OCL_BI   ||
-+                     CallConv == CallingConv::Intel_SVML)))
-       VecVT = MVT::v16f32;
-     else if (Subtarget.hasAVX())
-       VecVT = MVT::v8f32;
-diff --git a/lib/Target/X86/X86RegisterInfo.cpp b/lib/Target/X86/X86RegisterInfo.cpp
-index 55842a4a..28877c90 100644
---- a/lib/Target/X86/X86RegisterInfo.cpp
-+++ b/lib/Target/X86/X86RegisterInfo.cpp
-@@ -311,6 +311,23 @@ X86RegisterInfo::getCalleeSavedRegs(const MachineFunction *MF) const {
-       return CSR_64_Intel_OCL_BI_SaveList;
-     break;
-   }
-+  case CallingConv::Intel_SVML: {
-+    if (Is64Bit) {
-+      if (HasAVX512)
-+        return IsWin64 ? CSR_Win64_Intel_SVML_AVX512_SaveList :
-+                         CSR_64_Intel_SVML_AVX512_SaveList;
-+      if (HasAVX)
-+        return IsWin64 ? CSR_Win64_Intel_SVML_AVX_SaveList :
-+                         CSR_64_Intel_SVML_AVX_SaveList;
-+
-+      return IsWin64 ? CSR_Win64_Intel_SVML_SaveList :
-+                       CSR_64_Intel_SVML_SaveList;
-+    } else { // Is32Bit
-+        if (HasAVX512)
-+            return CSR_32_Intel_SVML_AVX512_SaveList;
-+        return CSR_32_Intel_SVML_SaveList;
-+    }
-+  }
-   case CallingConv::HHVM:
-     return CSR_64_HHVM_SaveList;
-   case CallingConv::X86_RegCall:
-@@ -425,6 +442,23 @@ X86RegisterInfo::getCallPreservedMask(const MachineFunction &MF,
-       return CSR_64_Intel_OCL_BI_RegMask;
-     break;
-   }
-+  case CallingConv::Intel_SVML: {
-+    if (Is64Bit) {
-+      if (HasAVX512)
-+        return IsWin64 ? CSR_Win64_Intel_SVML_AVX512_RegMask :
-+                         CSR_64_Intel_SVML_AVX512_RegMask;
-+      if (HasAVX)
-+        return IsWin64 ? CSR_Win64_Intel_SVML_AVX_RegMask :
-+                         CSR_64_Intel_SVML_AVX_RegMask;
-+
-+      return IsWin64 ? CSR_Win64_Intel_SVML_RegMask :
-+                       CSR_64_Intel_SVML_RegMask;
-+    } else { // Is32Bit
-+        if (HasAVX512)
-+            return CSR_32_Intel_SVML_AVX512_RegMask;
-+        return CSR_32_Intel_SVML_RegMask;
-+    }
-+  }
-   case CallingConv::HHVM:
-     return CSR_64_HHVM_RegMask;
-   case CallingConv::X86_RegCall:
-diff --git a/lib/Target/X86/X86Subtarget.h b/lib/Target/X86/X86Subtarget.h
-index 85e8256a..3208a93d 100644
---- a/lib/Target/X86/X86Subtarget.h
-+++ b/lib/Target/X86/X86Subtarget.h
-@@ -769,6 +769,7 @@ public:
-     case CallingConv::X86_ThisCall:
-     case CallingConv::X86_VectorCall:
-     case CallingConv::Intel_OCL_BI:
-+    case CallingConv::Intel_SVML:
-       return isTargetWin64();
-     // This convention allows using the Win64 convention on other targets.
-     case CallingConv::Win64:
-diff --git a/lib/Transforms/Vectorize/LoopVectorize.cpp b/lib/Transforms/Vectorize/LoopVectorize.cpp
-index 1c7d0a63..299b161d 100644
---- a/lib/Transforms/Vectorize/LoopVectorize.cpp
-+++ b/lib/Transforms/Vectorize/LoopVectorize.cpp
-@@ -602,6 +602,27 @@ protected:
-   /// vector of instructions.
-   void addMetadata(ArrayRef<Value *> To, Instruction *From);
- 
-+  /// Check legality of given SVML call instruction \p VecCall generated for
-+  /// scalar call \p Call. If illegal then the appropriate legal instruction
-+  /// is returned.
-+  Value *legalizeSVMLCall(CallInst *VecCall, CallInst *Call);
-+
-+  /// Returns the legal VF for a call instruction \p CI using TTI information
-+  /// and vector type.
-+  unsigned getLegalVFForCall(CallInst *CI);
-+
-+  /// Partially vectorize a given call \p Call by breaking it down into multiple
-+  /// calls of \p LegalCall, decided by the variant VF \p LegalVF.
-+  Value *partialVectorizeCall(CallInst *Call, CallInst *LegalCall,
-+                              unsigned LegalVF);
-+
-+  /// Generate shufflevector instruction for a vector value \p V based on the
-+  /// current \p Part and a smaller VF \p LegalVF.
-+  Value *generateShuffleValue(Value *V, unsigned LegalVF, unsigned Part);
-+
-+  /// Combine partially vectorized calls stored in \p CallResults.
-+  Value *combinePartialVecCalls(SmallVectorImpl<Value *> &CallResults);
-+
-   /// The original loop.
-   Loop *OrigLoop;
- 
-@@ -4105,6 +4126,7 @@ void InnerLoopVectorizer::widenInstruction(Instruction &I) {
-       }
- 
-       Function *VectorF;
-+      bool FromSVML = false;
-       if (UseVectorIntrinsic) {
-         // Use vector version of the intrinsic.
-         Type *TysForDecl[] = {CI->getType()};
-@@ -4113,7 +4135,8 @@ void InnerLoopVectorizer::widenInstruction(Instruction &I) {
-         VectorF = Intrinsic::getDeclaration(M, ID, TysForDecl);
-       } else {
-         // Use vector version of the library call.
--        StringRef VFnName = TLI->getVectorizedFunction(FnName, VF);
-+        bool IsFast = CI->getFastMathFlags().isFast();
-+        std::string VFnName = TLI->getVectorizedFunction(FnName, VF, FromSVML, IsFast);
-         assert(!VFnName.empty() && "Vector function name is empty.");
-         VectorF = M->getFunction(VFnName);
-         if (!VectorF) {
-@@ -4132,9 +4155,22 @@ void InnerLoopVectorizer::widenInstruction(Instruction &I) {
- 
-       if (isa<FPMathOperator>(V))
-         V->copyFastMathFlags(CI);
--
--      VectorLoopValueMap.setVectorValue(&I, Part, V);
--      addMetadata(V, &I);
-+      if (FromSVML)
-+        V->setCallingConv(CallingConv::Intel_SVML);
-+      // Perform legalization of SVML call instruction only if original call
-+      // was not Intrinsic
-+      if (!isa<IntrinsicInst>(CI) && FromSVML) {
-+        assert((V->getCalledFunction()->getName()).startswith("__svml"));
-+        LLVM_DEBUG(dbgs() << "LV(SVML): Vector call inst:"; V->dump());
-+        auto *LegalV = cast<Instruction>(legalizeSVMLCall(V, CI));
-+        LLVM_DEBUG(dbgs() << "LV: Completed SVML legalization.\n LegalV: ";
-+                   LegalV->dump());
-+        VectorLoopValueMap.setVectorValue(&I, Part, LegalV);
-+        addMetadata(LegalV, &I);
-+      } else {
-+        VectorLoopValueMap.setVectorValue(&I, Part, V);
-+        addMetadata(V, &I);
-+      }
-     }
- 
-     break;
-@@ -4163,6 +4199,244 @@ void InnerLoopVectorizer::updateAnalysis() {
-   assert(DT->verify(DominatorTree::VerificationLevel::Fast));
- }
- 
-+//===----------------------------------------------------------------------===//
-+// Implementation of functions for SVML vector call legalization.
-+//===----------------------------------------------------------------------===//
-+//
-+// Unlike other VECLIBs, SVML needs to be used with target-legal
-+// vector types. Otherwise, link failures and/or runtime failures
-+// will occur. A motivating example could be -
-+//
-+//   double *a;
-+//   float *b;
-+//   #pragma clang loop vectorize_width(8)
-+//   for(i = 0; i < N; ++i) {
-+//     a[i] = sin(i);   // Legal SVML VF must be 4 or below on AVX
-+//     b[i] = cosf(i);  // VF can be 8 on AVX since 8 floats can fit in YMM
-+//    }
-+//
-+// Current implementation of vector code generation in LV is
-+// driven based on a single VF (in InnerLoopVectorizer::VF). This
-+// inhibits the flexibility of adjusting/choosing different VF
-+// for different instructions.
-+//
-+// Due to this limitation it is much more straightforward to
-+// first generate the illegal sin8 (svml_sin8 for SVML vector
-+// library) call and then legalize it than trying to avoid
-+// generating illegal code from the beginning.
-+//
-+// A solution for this problem is to check legality of the
-+// call instruction right after generating it in vectorizer and
-+// if it is illegal we split the call arguments and issue multiple
-+// calls to match the legal VF. This is demonstrated currently for
-+// the SVML vector library calls (non-intrinsic version only).
-+//
-+// Future directions and extensions:
-+// 1) This legalization example shows us that a good direction
-+//    for the VPlan framework would be to model the vector call
-+//    instructions in a way that legal VF for each call is chosen
-+//    correctly within vectorizer and illegal code generation is
-+//    avoided.
-+// 2) This logic can also be extended to general vector functions
-+//    i.e. legalization OpenMP decalre simd functions. The
-+//    requirements needed for this will be documented soon.
-+
-+Value *InnerLoopVectorizer::legalizeSVMLCall(CallInst *VecCall,
-+                                             CallInst *Call) {
-+  unsigned LegalVF = getLegalVFForCall(VecCall);
-+
-+  assert(LegalVF > 1 &&
-+         "Legal VF for SVML call must be greater than 1 to vectorize");
-+
-+  if (LegalVF == VF)
-+    return VecCall;
-+  else if (LegalVF > VF)
-+    // TODO: handle case when we are underfilling vectors
-+    return VecCall;
-+
-+  // Legal VF for this SVML call is smaller than chosen VF, break it down into
-+  // smaller call instructions
-+
-+  // Convert args, types and return type to match legal VF
-+  SmallVector<Type *, 4> NewTys;
-+  SmallVector<Value *, 4> NewArgs;
-+  Type *NewRetTy = ToVectorTy(Call->getType(), LegalVF);
-+
-+  for (Value *ArgOperand : Call->arg_operands()) {
-+    Type *Ty = ToVectorTy(ArgOperand->getType(), LegalVF);
-+    NewTys.push_back(Ty);
-+    NewArgs.push_back(UndefValue::get(Ty));
-+  }
-+
-+  // Construct legal vector function
-+  Function *F = Call->getCalledFunction();
-+  StringRef FnName = F->getName();
-+  Module *M = Call->getModule();
-+  bool unused = false;
-+  std::string LegalVFnName = TLI->getVectorizedFunction(FnName, LegalVF, unused, Call->getFastMathFlags().isFast());
-+  LLVM_DEBUG(dbgs() << "LV(SVML): LegalVFnName: " << LegalVFnName << " FnName: " << FnName << "\n");
-+  assert(!LegalVFnName.empty() && (LegalVFnName != FnName) &&
-+         "Could not find legal vector function in TLI.");
-+
-+  // Since this is targeting SVML calls specifically, we know the module
-+  // will not have a vector version of the call
-+  assert(!M->getFunction(LegalVFnName) &&
-+         "Module has vector version for legal SVML call.");
-+  FunctionType *LegalFTy = FunctionType::get(NewRetTy, NewTys, false);
-+  Function *LegalVectorF =
-+      Function::Create(LegalFTy, Function::ExternalLinkage, LegalVFnName, M);
-+  assert(LegalVectorF && "Can't create legal SVML vector function.");
-+  LegalVectorF->copyAttributesFrom(F);
-+
-+  LLVM_DEBUG(dbgs() << "LV(SVML): LegalVectorF: "; LegalVectorF->dump());
-+
-+  SmallVector<OperandBundleDef, 1> OpBundles;
-+  Call->getOperandBundlesAsDefs(OpBundles);
-+  CallInst *LegalV = CallInst::Create(LegalVectorF, NewArgs, OpBundles);
-+
-+  if (isa<FPMathOperator>(LegalV))
-+    LegalV->copyFastMathFlags(Call);
-+
-+  // Set SVML calling conventions
-+  LegalV->setCallingConv(CallingConv::Intel_SVML);
-+
-+  LLVM_DEBUG(dbgs() << "LV(SVML): LegalV: "; LegalV->dump());
-+
-+  Value *LegalizedCall = partialVectorizeCall(VecCall, LegalV, LegalVF);
-+
-+  LLVM_DEBUG(dbgs() << "LV(SVML): LegalizedCall: "; LegalizedCall->dump());
-+
-+  // Remove the illegal call from Builder
-+  VecCall->eraseFromParent();
-+
-+  if (LegalV)
-+    delete LegalV;
-+
-+  return LegalizedCall;
-+}
-+
-+unsigned InnerLoopVectorizer::getLegalVFForCall(CallInst *CI) {
-+  const DataLayout DL = CI->getModule()->getDataLayout();
-+  FunctionType *CallFT = CI->getFunctionType();
-+  // All functions that need legalization should have a vector return type.
-+  // This is true for all SVML functions that are currently supported.
-+  assert(isa<VectorType>(CallFT->getReturnType()) &&
-+         "Return type of call that needs legalization is not a vector.");
-+  auto *VecCallRetType = cast<VectorType>(CallFT->getReturnType());
-+  Type *ElemType = VecCallRetType->getElementType();
-+
-+  unsigned TypeBitWidth = DL.getTypeSizeInBits(ElemType);
-+  unsigned VectorBitWidth = TTI->getRegisterBitWidth(true);
-+  unsigned LegalVF = VectorBitWidth / TypeBitWidth;
-+
-+  LLVM_DEBUG(dbgs() << "LV(SVML): Type Bit Width: " << TypeBitWidth << "\n");
-+  LLVM_DEBUG(dbgs() << "LV(SVML): Current VL: " << VF << "\n");
-+  LLVM_DEBUG(dbgs() << "LV(SVML): Vector Bit Width: " << VectorBitWidth
-+                    << "\n");
-+  LLVM_DEBUG(dbgs() << "LV(SVML): Legal Target VL: " << LegalVF << "\n");
-+
-+  return LegalVF;
-+}
-+
-+// Partial vectorization of a call instruction is achieved by making clones of
-+// \p LegalCall and overwriting its argument operands with shufflevector
-+// equivalent decided based on \p LegalVF and current Part being filled.
-+Value *InnerLoopVectorizer::partialVectorizeCall(CallInst *Call,
-+                                                 CallInst *LegalCall,
-+                                                 unsigned LegalVF) {
-+  unsigned NumParts = VF / LegalVF;
-+  LLVM_DEBUG(dbgs() << "LV(SVML): NumParts: " << NumParts << "\n");
-+  SmallVector<Value *, 8> CallResults;
-+
-+  for (unsigned Part = 0; Part < NumParts; ++Part) {
-+    auto *ClonedCall = cast<CallInst>(LegalCall->clone());
-+
-+    // Update the arg operand of cloned call to shufflevector
-+    for (unsigned i = 0, ie = Call->getNumArgOperands(); i != ie; ++i) {
-+      auto *NewOp = generateShuffleValue(Call->getArgOperand(i), LegalVF, Part);
-+      ClonedCall->setArgOperand(i, NewOp);
-+    }
-+
-+    LLVM_DEBUG(dbgs() << "LV(SVML): ClonedCall: "; ClonedCall->dump());
-+
-+    auto *PartialVecCall = Builder.Insert(ClonedCall);
-+    CallResults.push_back(PartialVecCall);
-+  }
-+
-+  return combinePartialVecCalls(CallResults);
-+}
-+
-+Value *InnerLoopVectorizer::generateShuffleValue(Value *V, unsigned LegalVF,
-+                                                 unsigned Part) {
-+  // Example:
-+  // Consider the following vector code -
-+  // %1 = sitofp <4 x i32> %0 to <4 x double>
-+  // %2 = call <4 x double> @__svml_sin4(<4 x double> %1)
-+  //
-+  // If the LegalVF is 2, we partially vectorize the sin4 call by invoking
-+  // generateShuffleValue on the operand %1
-+  // If Part = 1, output value is -
-+  // %shuffle = shufflevector <4 x double> %1, <4 x double> undef, <2 x i32><i32 0, i32 1>
-+  // and if Part = 2, output is -
-+  // %shuffle7 =shufflevector <4 x double> %1, <4 x double> undef, <2 x i32><i32 2, i32 3>
-+
-+  assert(isa<VectorType>(V->getType()) &&
-+         "Cannot generate shuffles for non-vector values.");
-+  SmallVector<unsigned, 4> ShuffleMask;
-+  Value *Undef = UndefValue::get(V->getType());
-+
-+  unsigned ElemIdx = Part * LegalVF;
-+
-+  for (unsigned K = 0; K < LegalVF; K++)
-+    ShuffleMask.push_back(ElemIdx + K);
-+
-+  auto *ShuffleInst =
-+      Builder.CreateShuffleVector(V, Undef, ShuffleMask, "shuffle");
-+
-+  return ShuffleInst;
-+}
-+
-+// Results of the calls executed by smaller legal call instructions must be
-+// combined to match the original VF for later use. This is done by constructing
-+// shufflevector instructions in a cumulative fashion.
-+Value *InnerLoopVectorizer::combinePartialVecCalls(
-+    SmallVectorImpl<Value *> &CallResults) {
-+  assert(isa<VectorType>(CallResults[0]->getType()) &&
-+         "Cannot combine calls with non-vector results.");
-+  auto *CallType = cast<VectorType>(CallResults[0]->getType());
-+
-+  Value *CombinedShuffle;
-+  unsigned NumElems = CallType->getNumElements() * 2;
-+  unsigned NumRegs = CallResults.size();
-+
-+  assert(NumRegs >= 2 && isPowerOf2_32(NumRegs) &&
-+         "Number of partial vector calls to combine must be a power of 2 "
-+         "(atleast 2^1)");
-+
-+  while (NumRegs > 1) {
-+    for (unsigned I = 0; I < NumRegs; I += 2) {
-+      SmallVector<unsigned, 4> ShuffleMask;
-+      for (unsigned J = 0; J < NumElems; J++)
-+        ShuffleMask.push_back(J);
-+
-+      CombinedShuffle = Builder.CreateShuffleVector(
-+          CallResults[I], CallResults[I + 1], ShuffleMask, "combined");
-+      LLVM_DEBUG(dbgs() << "LV(SVML): CombinedShuffle:";
-+                 CombinedShuffle->dump());
-+      CallResults.push_back(CombinedShuffle);
-+    }
-+
-+    SmallVector<Value *, 2>::iterator Start = CallResults.begin();
-+    SmallVector<Value *, 2>::iterator End = Start + NumRegs;
-+    CallResults.erase(Start, End);
-+
-+    NumElems *= 2;
-+    NumRegs /= 2;
-+  }
-+
-+  return CombinedShuffle;
-+}
-+
- void LoopVectorizationCostModel::collectLoopScalars(unsigned VF) {
-   // We should not collect Scalars more than once per VF. Right now, this
-   // function is called from collectUniformsAndScalars(), which already does
-diff --git a/test/Transforms/LoopVectorize/X86/scatter_crash.ll b/test/Transforms/LoopVectorize/X86/scatter_crash.ll
-old mode 100755
-new mode 100644
-diff --git a/test/Transforms/LoopVectorize/X86/svml-calls-finite.ll b/test/Transforms/LoopVectorize/X86/svml-calls-finite.ll
-index 5a4bfe5e..4da2e48a 100644
---- a/test/Transforms/LoopVectorize/X86/svml-calls-finite.ll
-+++ b/test/Transforms/LoopVectorize/X86/svml-calls-finite.ll
-@@ -39,7 +39,8 @@ for.end:                                          ; preds = %for.body
- declare double @__exp_finite(double) #0
- 
- ; CHECK-LABEL: @exp_f64
--; CHECK: <4 x double> @__svml_exp4
-+; CHECK: <2 x double> @__svml_exp2
-+; CHECK: <2 x double> @__svml_exp2
- ; CHECK: ret
- define void @exp_f64(double* nocapture %varray) {
- entry:
-@@ -99,7 +100,8 @@ for.end:                                          ; preds = %for.body
- declare double @__log_finite(double) #0
- 
- ; CHECK-LABEL: @log_f64
--; CHECK: <4 x double> @__svml_log4
-+; CHECK: <2 x double> @__svml_log2
-+; CHECK: <2 x double> @__svml_log2
- ; CHECK: ret
- define void @log_f64(double* nocapture %varray) {
- entry:
-@@ -159,7 +161,8 @@ for.end:                                          ; preds = %for.body
- declare double @__pow_finite(double, double) #0
- 
- ; CHECK-LABEL: @pow_f64
--; CHECK: <4 x double> @__svml_pow4
-+; CHECK: <2 x double> @__svml_pow2
-+; CHECK: <2 x double> @__svml_pow2
- ; CHECK: ret
- define void @pow_f64(double* nocapture %varray, double* nocapture readonly %exp) {
- entry:
-diff --git a/test/Transforms/LoopVectorize/X86/svml-calls.ll b/test/Transforms/LoopVectorize/X86/svml-calls.ll
-index 8ff62f17..4d48d981 100644
---- a/test/Transforms/LoopVectorize/X86/svml-calls.ll
-+++ b/test/Transforms/LoopVectorize/X86/svml-calls.ll
-@@ -31,7 +31,7 @@ declare float @llvm.log.f32(float) #0
- 
- define void @sin_f64(double* nocapture %varray) {
- ; CHECK-LABEL: @sin_f64(
--; CHECK:    [[TMP5:%.*]] = call <4 x double> @__svml_sin4(<4 x double> [[TMP4:%.*]])
-+; CHECK:    [[TMP5:%.*]] = call intel_svmlcc <4 x double> @__svml_sin4_ha(<4 x double> [[TMP4:%.*]])
- ; CHECK:    ret void
- ;
- entry:
-@@ -54,7 +54,7 @@ for.end:
- 
- define void @sin_f32(float* nocapture %varray) {
- ; CHECK-LABEL: @sin_f32(
--; CHECK:    [[TMP5:%.*]] = call <4 x float> @__svml_sinf4(<4 x float> [[TMP4:%.*]])
-+; CHECK:    [[TMP5:%.*]] = call intel_svmlcc <4 x float> @__svml_sinf4_ha(<4 x float> [[TMP4:%.*]])
- ; CHECK:    ret void
- ;
- entry:
-@@ -77,7 +77,7 @@ for.end:
- 
- define void @sin_f64_intrinsic(double* nocapture %varray) {
- ; CHECK-LABEL: @sin_f64_intrinsic(
--; CHECK:    [[TMP5:%.*]] = call <4 x double> @__svml_sin4(<4 x double> [[TMP4:%.*]])
-+; CHECK:    [[TMP5:%.*]] = call intel_svmlcc <4 x double> @__svml_sin4_ha(<4 x double> [[TMP4:%.*]])
- ; CHECK:    ret void
- ;
- entry:
-@@ -100,7 +100,7 @@ for.end:
- 
- define void @sin_f32_intrinsic(float* nocapture %varray) {
- ; CHECK-LABEL: @sin_f32_intrinsic(
--; CHECK:    [[TMP5:%.*]] = call <4 x float> @__svml_sinf4(<4 x float> [[TMP4:%.*]])
-+; CHECK:    [[TMP5:%.*]] = call intel_svmlcc <4 x float> @__svml_sinf4_ha(<4 x float> [[TMP4:%.*]])
- ; CHECK:    ret void
- ;
- entry:
-@@ -123,7 +123,7 @@ for.end:
- 
- define void @cos_f64(double* nocapture %varray) {
- ; CHECK-LABEL: @cos_f64(
--; CHECK:    [[TMP5:%.*]] = call <4 x double> @__svml_cos4(<4 x double> [[TMP4:%.*]])
-+; CHECK:    [[TMP5:%.*]] = call intel_svmlcc <4 x double> @__svml_cos4_ha(<4 x double> [[TMP4:%.*]])
- ; CHECK:    ret void
- ;
- entry:
-@@ -146,7 +146,7 @@ for.end:
- 
- define void @cos_f32(float* nocapture %varray) {
- ; CHECK-LABEL: @cos_f32(
--; CHECK:    [[TMP5:%.*]] = call <4 x float> @__svml_cosf4(<4 x float> [[TMP4:%.*]])
-+; CHECK:    [[TMP5:%.*]] = call intel_svmlcc <4 x float> @__svml_cosf4_ha(<4 x float> [[TMP4:%.*]])
- ; CHECK:    ret void
- ;
- entry:
-@@ -169,7 +169,7 @@ for.end:
- 
- define void @cos_f64_intrinsic(double* nocapture %varray) {
- ; CHECK-LABEL: @cos_f64_intrinsic(
--; CHECK:    [[TMP5:%.*]] = call <4 x double> @__svml_cos4(<4 x double> [[TMP4:%.*]])
-+; CHECK:    [[TMP5:%.*]] = call intel_svmlcc <4 x double> @__svml_cos4_ha(<4 x double> [[TMP4:%.*]])
- ; CHECK:    ret void
- ;
- entry:
-@@ -192,7 +192,7 @@ for.end:
- 
- define void @cos_f32_intrinsic(float* nocapture %varray) {
- ; CHECK-LABEL: @cos_f32_intrinsic(
--; CHECK:    [[TMP5:%.*]] = call <4 x float> @__svml_cosf4(<4 x float> [[TMP4:%.*]])
-+; CHECK:    [[TMP5:%.*]] = call intel_svmlcc <4 x float> @__svml_cosf4_ha(<4 x float> [[TMP4:%.*]])
- ; CHECK:    ret void
- ;
- entry:
-@@ -215,7 +215,7 @@ for.end:
- 
- define void @pow_f64(double* nocapture %varray, double* nocapture readonly %exp) {
- ; CHECK-LABEL: @pow_f64(
--; CHECK:    [[TMP8:%.*]] = call <4 x double> @__svml_pow4(<4 x double> [[TMP4:%.*]], <4 x double> [[WIDE_LOAD:%.*]])
-+; CHECK:    [[TMP8:%.*]] = call intel_svmlcc <4 x double> @__svml_pow4_ha(<4 x double> [[TMP4:%.*]], <4 x double> [[WIDE_LOAD:%.*]])
- ; CHECK:    ret void
- ;
- entry:
-@@ -240,7 +240,7 @@ for.end:
- 
- define void @pow_f64_intrinsic(double* nocapture %varray, double* nocapture readonly %exp) {
- ; CHECK-LABEL: @pow_f64_intrinsic(
--; CHECK:    [[TMP8:%.*]] = call <4 x double> @__svml_pow4(<4 x double> [[TMP4:%.*]], <4 x double> [[WIDE_LOAD:%.*]])
-+; CHECK:    [[TMP8:%.*]] = call intel_svmlcc <4 x double> @__svml_pow4_ha(<4 x double> [[TMP4:%.*]], <4 x double> [[WIDE_LOAD:%.*]])
- ; CHECK:    ret void
- ;
- entry:
-@@ -265,7 +265,7 @@ for.end:
- 
- define void @pow_f32(float* nocapture %varray, float* nocapture readonly %exp) {
- ; CHECK-LABEL: @pow_f32(
--; CHECK:    [[TMP8:%.*]] = call <4 x float> @__svml_powf4(<4 x float> [[TMP4:%.*]], <4 x float> [[WIDE_LOAD:%.*]])
-+; CHECK:    [[TMP8:%.*]] = call intel_svmlcc <4 x float> @__svml_powf4_ha(<4 x float> [[TMP4:%.*]], <4 x float> [[WIDE_LOAD:%.*]])
- ; CHECK:    ret void
- ;
- entry:
-@@ -290,7 +290,7 @@ for.end:
- 
- define void @pow_f32_intrinsic(float* nocapture %varray, float* nocapture readonly %exp) {
- ; CHECK-LABEL: @pow_f32_intrinsic(
--; CHECK:    [[TMP8:%.*]] = call <4 x float> @__svml_powf4(<4 x float> [[TMP4:%.*]], <4 x float> [[WIDE_LOAD:%.*]])
-+; CHECK:    [[TMP8:%.*]] = call intel_svmlcc <4 x float> @__svml_powf4_ha(<4 x float> [[TMP4:%.*]], <4 x float> [[WIDE_LOAD:%.*]])
- ; CHECK:    ret void
- ;
- entry:
-@@ -315,7 +315,7 @@ for.end:
- 
- define void @exp_f64(double* nocapture %varray) {
- ; CHECK-LABEL: @exp_f64(
--; CHECK:    [[TMP5:%.*]] = call <4 x double> @__svml_exp4(<4 x double> [[TMP4:%.*]])
-+; CHECK:    [[TMP5:%.*]] = call intel_svmlcc <4 x double> @__svml_exp4_ha(<4 x double> [[TMP4:%.*]])
- ; CHECK:    ret void
- ;
- entry:
-@@ -338,7 +338,7 @@ for.end:
- 
- define void @exp_f32(float* nocapture %varray) {
- ; CHECK-LABEL: @exp_f32(
--; CHECK:    [[TMP5:%.*]] = call <4 x float> @__svml_expf4(<4 x float> [[TMP4:%.*]])
-+; CHECK:    [[TMP5:%.*]] = call intel_svmlcc <4 x float> @__svml_expf4_ha(<4 x float> [[TMP4:%.*]])
- ; CHECK:    ret void
- ;
- entry:
-@@ -361,7 +361,7 @@ for.end:
- 
- define void @exp_f64_intrinsic(double* nocapture %varray) {
- ; CHECK-LABEL: @exp_f64_intrinsic(
--; CHECK:    [[TMP5:%.*]] = call <4 x double> @__svml_exp4(<4 x double> [[TMP4:%.*]])
-+; CHECK:    [[TMP5:%.*]] = call intel_svmlcc <4 x double> @__svml_exp4_ha(<4 x double> [[TMP4:%.*]])
- ; CHECK:    ret void
- ;
- entry:
-@@ -384,7 +384,7 @@ for.end:
- 
- define void @exp_f32_intrinsic(float* nocapture %varray) {
- ; CHECK-LABEL: @exp_f32_intrinsic(
--; CHECK:    [[TMP5:%.*]] = call <4 x float> @__svml_expf4(<4 x float> [[TMP4:%.*]])
-+; CHECK:    [[TMP5:%.*]] = call intel_svmlcc <4 x float> @__svml_expf4_ha(<4 x float> [[TMP4:%.*]])
- ; CHECK:    ret void
- ;
- entry:
-@@ -407,7 +407,7 @@ for.end:
- 
- define void @log_f64(double* nocapture %varray) {
- ; CHECK-LABEL: @log_f64(
--; CHECK:    [[TMP5:%.*]] = call <4 x double> @__svml_log4(<4 x double> [[TMP4:%.*]])
-+; CHECK:    [[TMP5:%.*]] = call intel_svmlcc <4 x double> @__svml_log4_ha(<4 x double> [[TMP4:%.*]])
- ; CHECK:    ret void
- ;
- entry:
-@@ -430,7 +430,7 @@ for.end:
- 
- define void @log_f32(float* nocapture %varray) {
- ; CHECK-LABEL: @log_f32(
--; CHECK:    [[TMP5:%.*]] = call <4 x float> @__svml_logf4(<4 x float> [[TMP4:%.*]])
-+; CHECK:    [[TMP5:%.*]] = call intel_svmlcc <4 x float> @__svml_logf4_ha(<4 x float> [[TMP4:%.*]])
- ; CHECK:    ret void
- ;
- entry:
-@@ -453,7 +453,7 @@ for.end:
- 
- define void @log_f64_intrinsic(double* nocapture %varray) {
- ; CHECK-LABEL: @log_f64_intrinsic(
--; CHECK:    [[TMP5:%.*]] = call <4 x double> @__svml_log4(<4 x double> [[TMP4:%.*]])
-+; CHECK:    [[TMP5:%.*]] = call intel_svmlcc <4 x double> @__svml_log4_ha(<4 x double> [[TMP4:%.*]])
- ; CHECK:    ret void
- ;
- entry:
-@@ -476,7 +476,7 @@ for.end:
- 
- define void @log_f32_intrinsic(float* nocapture %varray) {
- ; CHECK-LABEL: @log_f32_intrinsic(
--; CHECK:    [[TMP5:%.*]] = call <4 x float> @__svml_logf4(<4 x float> [[TMP4:%.*]])
-+; CHECK:    [[TMP5:%.*]] = call intel_svmlcc <4 x float> @__svml_logf4_ha(<4 x float> [[TMP4:%.*]])
- ; CHECK:    ret void
- ;
- entry:
-@@ -497,5 +497,44 @@ for.end:
-   ret void
- }
- 
--attributes #0 = { nounwind readnone }
-+; CHECK-LABEL: @atan2_finite
-+; CHECK: intel_svmlcc <4 x double> @__svml_atan24
-+; CHECK: intel_svmlcc <4 x double> @__svml_atan24
-+; CHECK: ret
-+
-+declare double @__atan2_finite(double, double) local_unnamed_addr #0
- 
-+define void @atan2_finite([100 x double]* nocapture %varray) local_unnamed_addr #0 {
-+entry:
-+  br label %for.cond1.preheader
-+
-+for.cond1.preheader:                              ; preds = %for.inc7, %entry
-+  %indvars.iv19 = phi i64 [ 0, %entry ], [ %indvars.iv.next20, %for.inc7 ]
-+  %0 = trunc i64 %indvars.iv19 to i32
-+  %conv = sitofp i32 %0 to double
-+  br label %for.body3
-+
-+for.body3:                                        ; preds = %for.body3, %for.cond1.preheader
-+  %indvars.iv = phi i64 [ 0, %for.cond1.preheader ], [ %indvars.iv.next, %for.body3 ]
-+  %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
-+  %1 = trunc i64 %indvars.iv.next to i32
-+  %conv4 = sitofp i32 %1 to double
-+  %call = tail call fast double @__atan2_finite(double %conv, double %conv4)
-+  %arrayidx6 = getelementptr inbounds [100 x double], [100 x double]* %varray, i64 %indvars.iv19, i64 %indvars.iv
-+  store double %call, double* %arrayidx6, align 8
-+  %exitcond = icmp eq i64 %indvars.iv.next, 100
-+  br i1 %exitcond, label %for.inc7, label %for.body3, !llvm.loop !5
-+
-+for.inc7:                                         ; preds = %for.body3
-+  %indvars.iv.next20 = add nuw nsw i64 %indvars.iv19, 1
-+  %exitcond21 = icmp eq i64 %indvars.iv.next20, 100
-+  br i1 %exitcond21, label %for.end9, label %for.cond1.preheader
-+
-+for.end9:                                         ; preds = %for.inc7
-+  ret void
-+}
-+
-+attributes #0 = { nounwind readnone }
-+!5 = distinct !{!5, !6, !7}
-+!6 = !{!"llvm.loop.vectorize.width", i32 8}
-+!7 = !{!"llvm.loop.vectorize.enable", i1 true}
-diff --git a/test/Transforms/LoopVectorize/X86/svml-legal-calls.ll b/test/Transforms/LoopVectorize/X86/svml-legal-calls.ll
-new file mode 100644
-index 00000000..93676abb
---- /dev/null
-+++ b/test/Transforms/LoopVectorize/X86/svml-legal-calls.ll
-@@ -0,0 +1,508 @@
-+; Check legalization of SVML calls. Also checks that intrinsic calls are not legalizedby vectorizer.
-+
-+; RUN: opt -vector-library=SVML -loop-vectorize -force-vector-width=8 -force-vector-interleave=1 -mattr=avx -S < %s | FileCheck %s
-+
-+target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
-+target triple = "x86_64-unknown-linux-gnu"
-+
-+declare double @sin(double) #0
-+declare float @sinf(float) #0
-+declare double @llvm.sin.f64(double) #0
-+declare float @llvm.sin.f32(float) #0
-+
-+declare double @cos(double) #0
-+declare float @cosf(float) #0
-+declare double @llvm.cos.f64(double) #0
-+declare float @llvm.cos.f32(float) #0
-+
-+declare double @pow(double, double) #0
-+declare float @powf(float, float) #0
-+declare double @llvm.pow.f64(double, double) #0
-+declare float @llvm.pow.f32(float, float) #0
-+
-+declare double @exp(double) #0
-+declare float @expf(float) #0
-+declare double @llvm.exp.f64(double) #0
-+declare float @llvm.exp.f32(float) #0
-+
-+declare double @log(double) #0
-+declare float @logf(float) #0
-+declare double @llvm.log.f64(double) #0
-+declare float @llvm.log.f32(float) #0
-+
-+
-+define void @sin_f64(double* nocapture %varray) {
-+; CHECK-LABEL: @sin_f64(
-+; CHECK:    [[TMP1:%.*]] = call intel_svmlcc <4 x double> @__svml_sin4_ha(<4 x double> [[TMP2:%.*]])
-+; CHECK:    [[TMP3:%.*]] = call intel_svmlcc <4 x double> @__svml_sin4_ha(<4 x double> [[TMP4:%.*]])
-+; CHECK:    ret void
-+;
-+entry:
-+  br label %for.body
-+
-+for.body:
-+  %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ]
-+  %tmp = trunc i64 %iv to i32
-+  %conv = sitofp i32 %tmp to double
-+  %call = tail call double @sin(double %conv)
-+  %arrayidx = getelementptr inbounds double, double* %varray, i64 %iv
-+  store double %call, double* %arrayidx, align 4
-+  %iv.next = add nuw nsw i64 %iv, 1
-+  %exitcond = icmp eq i64 %iv.next, 1000
-+  br i1 %exitcond, label %for.end, label %for.body
-+
-+for.end:
-+  ret void
-+}
-+
-+define void @sin_f32(float* nocapture %varray) {
-+; CHECK-LABEL: @sin_f32(
-+; CHECK:    [[TMP1:%.*]] = call intel_svmlcc <8 x float> @__svml_sinf8_ha(<8 x float> [[TMP2:%.*]])
-+; CHECK:    ret void
-+;
-+entry:
-+  br label %for.body
-+
-+for.body:
-+  %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ]
-+  %tmp = trunc i64 %iv to i32
-+  %conv = sitofp i32 %tmp to float
-+  %call = tail call float @sinf(float %conv)
-+  %arrayidx = getelementptr inbounds float, float* %varray, i64 %iv
-+  store float %call, float* %arrayidx, align 4
-+  %iv.next = add nuw nsw i64 %iv, 1
-+  %exitcond = icmp eq i64 %iv.next, 1000
-+  br i1 %exitcond, label %for.end, label %for.body
-+
-+for.end:
-+  ret void
-+}
-+
-+define void @sin_f64_intrinsic(double* nocapture %varray) {
-+; CHECK-LABEL: @sin_f64_intrinsic(
-+; CHECK:    [[TMP1:%.*]] = call intel_svmlcc <8 x double> @__svml_sin8_ha(<8 x double> [[TMP2:%.*]])
-+; CHECK:    ret void
-+;
-+entry:
-+  br label %for.body
-+
-+for.body:
-+  %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ]
-+  %tmp = trunc i64 %iv to i32
-+  %conv = sitofp i32 %tmp to double
-+  %call = tail call double @llvm.sin.f64(double %conv)
-+  %arrayidx = getelementptr inbounds double, double* %varray, i64 %iv
-+  store double %call, double* %arrayidx, align 4
-+  %iv.next = add nuw nsw i64 %iv, 1
-+  %exitcond = icmp eq i64 %iv.next, 1000
-+  br i1 %exitcond, label %for.end, label %for.body
-+
-+for.end:
-+  ret void
-+}
-+
-+define void @sin_f32_intrinsic(float* nocapture %varray) {
-+; CHECK-LABEL: @sin_f32_intrinsic(
-+; CHECK:    [[TMP1:%.*]] = call intel_svmlcc <8 x float> @__svml_sinf8_ha(<8 x float> [[TMP2:%.*]])
-+; CHECK:    ret void
-+;
-+entry:
-+  br label %for.body
-+
-+for.body:
-+  %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ]
-+  %tmp = trunc i64 %iv to i32
-+  %conv = sitofp i32 %tmp to float
-+  %call = tail call float @llvm.sin.f32(float %conv)
-+  %arrayidx = getelementptr inbounds float, float* %varray, i64 %iv
-+  store float %call, float* %arrayidx, align 4
-+  %iv.next = add nuw nsw i64 %iv, 1
-+  %exitcond = icmp eq i64 %iv.next, 1000
-+  br i1 %exitcond, label %for.end, label %for.body
-+
-+for.end:
-+  ret void
-+}
-+
-+define void @cos_f64(double* nocapture %varray) {
-+; CHECK-LABEL: @cos_f64(
-+; CHECK:    [[TMP1:%.*]] = call intel_svmlcc <4 x double> @__svml_cos4_ha(<4 x double> [[TMP2:%.*]])
-+; CHECK:    [[TMP3:%.*]] = call intel_svmlcc <4 x double> @__svml_cos4_ha(<4 x double> [[TMP4:%.*]])
-+; CHECK:    ret void
-+;
-+entry:
-+  br label %for.body
-+
-+for.body:
-+  %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ]
-+  %tmp = trunc i64 %iv to i32
-+  %conv = sitofp i32 %tmp to double
-+  %call = tail call double @cos(double %conv)
-+  %arrayidx = getelementptr inbounds double, double* %varray, i64 %iv
-+  store double %call, double* %arrayidx, align 4
-+  %iv.next = add nuw nsw i64 %iv, 1
-+  %exitcond = icmp eq i64 %iv.next, 1000
-+  br i1 %exitcond, label %for.end, label %for.body
-+
-+for.end:
-+  ret void
-+}
-+
-+define void @cos_f32(float* nocapture %varray) {
-+; CHECK-LABEL: @cos_f32(
-+; CHECK:    [[TMP1:%.*]] = call intel_svmlcc <8 x float> @__svml_cosf8_ha(<8 x float> [[TMP2:%.*]])
-+; CHECK:    ret void
-+;
-+entry:
-+  br label %for.body
-+
-+for.body:
-+  %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ]
-+  %tmp = trunc i64 %iv to i32
-+  %conv = sitofp i32 %tmp to float
-+  %call = tail call float @cosf(float %conv)
-+  %arrayidx = getelementptr inbounds float, float* %varray, i64 %iv
-+  store float %call, float* %arrayidx, align 4
-+  %iv.next = add nuw nsw i64 %iv, 1
-+  %exitcond = icmp eq i64 %iv.next, 1000
-+  br i1 %exitcond, label %for.end, label %for.body
-+
-+for.end:
-+  ret void
-+}
-+
-+define void @cos_f64_intrinsic(double* nocapture %varray) {
-+; CHECK-LABEL: @cos_f64_intrinsic(
-+; CHECK:    [[TMP1:%.*]] = call intel_svmlcc <8 x double> @__svml_cos8_ha(<8 x double> [[TMP2:%.*]])
-+; CHECK:    ret void
-+;
-+entry:
-+  br label %for.body
-+
-+for.body:
-+  %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ]
-+  %tmp = trunc i64 %iv to i32
-+  %conv = sitofp i32 %tmp to double
-+  %call = tail call double @llvm.cos.f64(double %conv)
-+  %arrayidx = getelementptr inbounds double, double* %varray, i64 %iv
-+  store double %call, double* %arrayidx, align 4
-+  %iv.next = add nuw nsw i64 %iv, 1
-+  %exitcond = icmp eq i64 %iv.next, 1000
-+  br i1 %exitcond, label %for.end, label %for.body
-+
-+for.end:
-+  ret void
-+}
-+
-+define void @cos_f32_intrinsic(float* nocapture %varray) {
-+; CHECK-LABEL: @cos_f32_intrinsic(
-+; CHECK:    [[TMP1:%.*]] = call intel_svmlcc <8 x float> @__svml_cosf8_ha(<8 x float> [[TMP2:%.*]])
-+; CHECK:    ret void
-+;
-+entry:
-+  br label %for.body
-+
-+for.body:
-+  %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ]
-+  %tmp = trunc i64 %iv to i32
-+  %conv = sitofp i32 %tmp to float
-+  %call = tail call float @llvm.cos.f32(float %conv)
-+  %arrayidx = getelementptr inbounds float, float* %varray, i64 %iv
-+  store float %call, float* %arrayidx, align 4
-+  %iv.next = add nuw nsw i64 %iv, 1
-+  %exitcond = icmp eq i64 %iv.next, 1000
-+  br i1 %exitcond, label %for.end, label %for.body
-+
-+for.end:
-+  ret void
-+}
-+
-+define void @pow_f64(double* nocapture %varray, double* nocapture readonly %exp) {
-+; CHECK-LABEL: @pow_f64(
-+; CHECK:    [[TMP1:%.*]] = call intel_svmlcc <4 x double> @__svml_pow4_ha(<4 x double> [[TMP2:%.*]], <4 x double> [[TMP3:%.*]])
-+; CHECK:    [[TMP4:%.*]] = call intel_svmlcc <4 x double> @__svml_pow4_ha(<4 x double> [[TMP5:%.*]], <4 x double> [[TMP6:%.*]])
-+; CHECK:    ret void
-+;
-+entry:
-+  br label %for.body
-+
-+for.body:
-+  %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ]
-+  %tmp = trunc i64 %iv to i32
-+  %conv = sitofp i32 %tmp to double
-+  %arrayidx = getelementptr inbounds double, double* %exp, i64 %iv
-+  %tmp1 = load double, double* %arrayidx, align 4
-+  %tmp2 = tail call double @pow(double %conv, double %tmp1)
-+  %arrayidx2 = getelementptr inbounds double, double* %varray, i64 %iv
-+  store double %tmp2, double* %arrayidx2, align 4
-+  %iv.next = add nuw nsw i64 %iv, 1
-+  %exitcond = icmp eq i64 %iv.next, 1000
-+  br i1 %exitcond, label %for.end, label %for.body
-+
-+for.end:
-+  ret void
-+}
-+
-+define void @pow_f64_intrinsic(double* nocapture %varray, double* nocapture readonly %exp) {
-+; CHECK-LABEL: @pow_f64_intrinsic(
-+; CHECK:    [[TMP1:%.*]] = call intel_svmlcc <8 x double> @__svml_pow8_ha(<8 x double> [[TMP2:%.*]], <8 x double> [[TMP3:%.*]])
-+; CHECK:    ret void
-+;
-+entry:
-+  br label %for.body
-+
-+for.body:
-+  %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ]
-+  %tmp = trunc i64 %iv to i32
-+  %conv = sitofp i32 %tmp to double
-+  %arrayidx = getelementptr inbounds double, double* %exp, i64 %iv
-+  %tmp1 = load double, double* %arrayidx, align 4
-+  %tmp2 = tail call double @llvm.pow.f64(double %conv, double %tmp1)
-+  %arrayidx2 = getelementptr inbounds double, double* %varray, i64 %iv
-+  store double %tmp2, double* %arrayidx2, align 4
-+  %iv.next = add nuw nsw i64 %iv, 1
-+  %exitcond = icmp eq i64 %iv.next, 1000
-+  br i1 %exitcond, label %for.end, label %for.body
-+
-+for.end:
-+  ret void
-+}
-+
-+define void @pow_f32(float* nocapture %varray, float* nocapture readonly %exp) {
-+; CHECK-LABEL: @pow_f32(
-+; CHECK:    [[TMP1:%.*]] = call intel_svmlcc <8 x float> @__svml_powf8_ha(<8 x float> [[TMP2:%.*]], <8 x float> [[WIDE_LOAD:%.*]])
-+; CHECK:    ret void
-+;
-+entry:
-+  br label %for.body
-+
-+for.body:
-+  %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ]
-+  %tmp = trunc i64 %iv to i32
-+  %conv = sitofp i32 %tmp to float
-+  %arrayidx = getelementptr inbounds float, float* %exp, i64 %iv
-+  %tmp1 = load float, float* %arrayidx, align 4
-+  %tmp2 = tail call float @powf(float %conv, float %tmp1)
-+  %arrayidx2 = getelementptr inbounds float, float* %varray, i64 %iv
-+  store float %tmp2, float* %arrayidx2, align 4
-+  %iv.next = add nuw nsw i64 %iv, 1
-+  %exitcond = icmp eq i64 %iv.next, 1000
-+  br i1 %exitcond, label %for.end, label %for.body
-+
-+for.end:
-+  ret void
-+}
-+
-+define void @pow_f32_intrinsic(float* nocapture %varray, float* nocapture readonly %exp) {
-+; CHECK-LABEL: @pow_f32_intrinsic(
-+; CHECK:    [[TMP1:%.*]] = call intel_svmlcc <8 x float> @__svml_powf8_ha(<8 x float> [[TMP2:%.*]], <8 x float> [[TMP3:%.*]])
-+; CHECK:    ret void
-+;
-+entry:
-+  br label %for.body
-+
-+for.body:
-+  %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ]
-+  %tmp = trunc i64 %iv to i32
-+  %conv = sitofp i32 %tmp to float
-+  %arrayidx = getelementptr inbounds float, float* %exp, i64 %iv
-+  %tmp1 = load float, float* %arrayidx, align 4
-+  %tmp2 = tail call float @llvm.pow.f32(float %conv, float %tmp1)
-+  %arrayidx2 = getelementptr inbounds float, float* %varray, i64 %iv
-+  store float %tmp2, float* %arrayidx2, align 4
-+  %iv.next = add nuw nsw i64 %iv, 1
-+  %exitcond = icmp eq i64 %iv.next, 1000
-+  br i1 %exitcond, label %for.end, label %for.body
-+
-+for.end:
-+  ret void
-+}
-+
-+define void @exp_f64(double* nocapture %varray) {
-+; CHECK-LABEL: @exp_f64(
-+; CHECK:    [[TMP1:%.*]] = call intel_svmlcc <4 x double> @__svml_exp4_ha(<4 x double> [[TMP2:%.*]])
-+; CHECK:    [[TMP3:%.*]] = call intel_svmlcc <4 x double> @__svml_exp4_ha(<4 x double> [[TMP4:%.*]])
-+; CHECK:    ret void
-+;
-+entry:
-+  br label %for.body
-+
-+for.body:
-+  %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ]
-+  %tmp = trunc i64 %iv to i32
-+  %conv = sitofp i32 %tmp to double
-+  %call = tail call double @exp(double %conv)
-+  %arrayidx = getelementptr inbounds double, double* %varray, i64 %iv
-+  store double %call, double* %arrayidx, align 4
-+  %iv.next = add nuw nsw i64 %iv, 1
-+  %exitcond = icmp eq i64 %iv.next, 1000
-+  br i1 %exitcond, label %for.end, label %for.body
-+
-+for.end:
-+  ret void
-+}
-+
-+define void @exp_f32(float* nocapture %varray) {
-+; CHECK-LABEL: @exp_f32(
-+; CHECK:    [[TMP1:%.*]] = call intel_svmlcc <8 x float> @__svml_expf8_ha(<8 x float> [[TMP2:%.*]])
-+; CHECK:    ret void
-+;
-+entry:
-+  br label %for.body
-+
-+for.body:
-+  %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ]
-+  %tmp = trunc i64 %iv to i32
-+  %conv = sitofp i32 %tmp to float
-+  %call = tail call float @expf(float %conv)
-+  %arrayidx = getelementptr inbounds float, float* %varray, i64 %iv
-+  store float %call, float* %arrayidx, align 4
-+  %iv.next = add nuw nsw i64 %iv, 1
-+  %exitcond = icmp eq i64 %iv.next, 1000
-+  br i1 %exitcond, label %for.end, label %for.body
-+
-+for.end:
-+  ret void
-+}
-+
-+define void @exp_f64_intrinsic(double* nocapture %varray) {
-+; CHECK-LABEL: @exp_f64_intrinsic(
-+; CHECK:    [[TMP1:%.*]] = call intel_svmlcc <8 x double> @__svml_exp8_ha(<8 x double> [[TMP2:%.*]])
-+; CHECK:    ret void
-+;
-+entry:
-+  br label %for.body
-+
-+for.body:
-+  %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ]
-+  %tmp = trunc i64 %iv to i32
-+  %conv = sitofp i32 %tmp to double
-+  %call = tail call double @llvm.exp.f64(double %conv)
-+  %arrayidx = getelementptr inbounds double, double* %varray, i64 %iv
-+  store double %call, double* %arrayidx, align 4
-+  %iv.next = add nuw nsw i64 %iv, 1
-+  %exitcond = icmp eq i64 %iv.next, 1000
-+  br i1 %exitcond, label %for.end, label %for.body
-+
-+for.end:
-+  ret void
-+}
-+
-+define void @exp_f32_intrinsic(float* nocapture %varray) {
-+; CHECK-LABEL: @exp_f32_intrinsic(
-+; CHECK:    [[TMP1:%.*]] = call intel_svmlcc <8 x float> @__svml_expf8_ha(<8 x float> [[TMP2:%.*]])
-+; CHECK:    ret void
-+;
-+entry:
-+  br label %for.body
-+
-+for.body:
-+  %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ]
-+  %tmp = trunc i64 %iv to i32
-+  %conv = sitofp i32 %tmp to float
-+  %call = tail call float @llvm.exp.f32(float %conv)
-+  %arrayidx = getelementptr inbounds float, float* %varray, i64 %iv
-+  store float %call, float* %arrayidx, align 4
-+  %iv.next = add nuw nsw i64 %iv, 1
-+  %exitcond = icmp eq i64 %iv.next, 1000
-+  br i1 %exitcond, label %for.end, label %for.body
-+
-+for.end:
-+  ret void
-+}
-+
-+define void @log_f64(double* nocapture %varray) {
-+; CHECK-LABEL: @log_f64(
-+; CHECK:    [[TMP1:%.*]] = call intel_svmlcc <4 x double> @__svml_log4_ha(<4 x double> [[TMP2:%.*]])
-+; CHECK:    [[TMP3:%.*]] = call intel_svmlcc <4 x double> @__svml_log4_ha(<4 x double> [[TMP4:%.*]])
-+; CHECK:    ret void
-+;
-+entry:
-+  br label %for.body
-+
-+for.body:
-+  %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ]
-+  %tmp = trunc i64 %iv to i32
-+  %conv = sitofp i32 %tmp to double
-+  %call = tail call double @log(double %conv)
-+  %arrayidx = getelementptr inbounds double, double* %varray, i64 %iv
-+  store double %call, double* %arrayidx, align 4
-+  %iv.next = add nuw nsw i64 %iv, 1
-+  %exitcond = icmp eq i64 %iv.next, 1000
-+  br i1 %exitcond, label %for.end, label %for.body
-+
-+for.end:
-+  ret void
-+}
-+
-+define void @log_f32(float* nocapture %varray) {
-+; CHECK-LABEL: @log_f32(
-+; CHECK:    [[TMP1:%.*]] = call intel_svmlcc <8 x float> @__svml_logf8_ha(<8 x float> [[TMP2:%.*]])
-+; CHECK:    ret void
-+;
-+entry:
-+  br label %for.body
-+
-+for.body:
-+  %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ]
-+  %tmp = trunc i64 %iv to i32
-+  %conv = sitofp i32 %tmp to float
-+  %call = tail call float @logf(float %conv)
-+  %arrayidx = getelementptr inbounds float, float* %varray, i64 %iv
-+  store float %call, float* %arrayidx, align 4
-+  %iv.next = add nuw nsw i64 %iv, 1
-+  %exitcond = icmp eq i64 %iv.next, 1000
-+  br i1 %exitcond, label %for.end, label %for.body
-+
-+for.end:
-+  ret void
-+}
-+
-+define void @log_f64_intrinsic(double* nocapture %varray) {
-+; CHECK-LABEL: @log_f64_intrinsic(
-+; CHECK:    [[TMP1:%.*]] = call intel_svmlcc <8 x double> @__svml_log8_ha(<8 x double> [[TMP2:%.*]])
-+; CHECK:    ret void
-+;
-+entry:
-+  br label %for.body
-+
-+for.body:
-+  %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ]
-+  %tmp = trunc i64 %iv to i32
-+  %conv = sitofp i32 %tmp to double
-+  %call = tail call double @llvm.log.f64(double %conv)
-+  %arrayidx = getelementptr inbounds double, double* %varray, i64 %iv
-+  store double %call, double* %arrayidx, align 4
-+  %iv.next = add nuw nsw i64 %iv, 1
-+  %exitcond = icmp eq i64 %iv.next, 1000
-+  br i1 %exitcond, label %for.end, label %for.body
-+
-+for.end:
-+  ret void
-+}
-+
-+define void @log_f32_intrinsic(float* nocapture %varray) {
-+; CHECK-LABEL: @log_f32_intrinsic(
-+; CHECK:    [[TMP1:%.*]] = call intel_svmlcc <8 x float> @__svml_logf8_ha(<8 x float> [[TMP2:%.*]])
-+; CHECK:    ret void
-+;
-+entry:
-+  br label %for.body
-+
-+for.body:
-+  %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ]
-+  %tmp = trunc i64 %iv to i32
-+  %conv = sitofp i32 %tmp to float
-+  %call = tail call float @llvm.log.f32(float %conv)
-+  %arrayidx = getelementptr inbounds float, float* %varray, i64 %iv
-+  store float %call, float* %arrayidx, align 4
-+  %iv.next = add nuw nsw i64 %iv, 1
-+  %exitcond = icmp eq i64 %iv.next, 1000
-+  br i1 %exitcond, label %for.end, label %for.body
-+
-+for.end:
-+  ret void
-+}
-+
-+attributes #0 = { nounwind readnone }
-+
-diff --git a/test/Transforms/LoopVectorize/X86/svml-legal-codegen.ll b/test/Transforms/LoopVectorize/X86/svml-legal-codegen.ll
-new file mode 100644
-index 00000000..007eea7a
---- /dev/null
-+++ b/test/Transforms/LoopVectorize/X86/svml-legal-codegen.ll
-@@ -0,0 +1,61 @@
-+; Check that vector codegen splits illegal sin8 call to two sin4 calls on AVX for double datatype.
-+; The C code used to generate this test:
-+
-+; #include <math.h>
-+;
-+; void foo(double *a, int N){
-+;   int i;
-+; #pragma clang loop vectorize_width(8)
-+;   for (i=0;i<N;i++){
-+;     a[i] = sin(i);
-+;   }
-+; }
-+
-+; RUN: opt -O2 -vector-library=SVML -loop-vectorize -force-vector-width=8 -mattr=avx -S < %s | FileCheck %s
-+
-+; CHECK: [[I1:%.*]] = sitofp <8 x i32> [[I0:%.*]] to <8 x double>
-+; CHECK-NEXT: [[S1:%shuffle.*]] = shufflevector <8 x double> [[I1]], <8 x double> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
-+; CHECK-NEXT: [[I2:%.*]] = call fast intel_svmlcc <4 x double> @__svml_sin4(<4 x double> [[S1]])
-+; CHECK-NEXT: [[S2:%shuffle.*]] = shufflevector <8 x double> [[I1]], <8 x double> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
-+; CHECK-NEXT: [[I3:%.*]] = call fast intel_svmlcc <4 x double> @__svml_sin4(<4 x double> [[S2]])
-+; CHECK-NEXT: [[comb:%combined.*]] = shufflevector <4 x double> [[I2]], <4 x double> [[I3]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
-+; CHECK: store <8 x double> [[comb]], <8 x double>* [[TMP:%.*]], align 8
-+
-+
-+target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
-+target triple = "x86_64-unknown-linux-gnu"
-+
-+; Function Attrs: nounwind uwtable
-+define dso_local void @foo(double* nocapture %a, i32 %N) local_unnamed_addr #0 {
-+entry:
-+  %cmp5 = icmp sgt i32 %N, 0
-+  br i1 %cmp5, label %for.body.preheader, label %for.end
-+
-+for.body.preheader:                               ; preds = %entry
-+  %wide.trip.count = zext i32 %N to i64
-+  br label %for.body
-+
-+for.body:                                         ; preds = %for.body, %for.body.preheader
-+  %indvars.iv = phi i64 [ 0, %for.body.preheader ], [ %indvars.iv.next, %for.body ]
-+  %0 = trunc i64 %indvars.iv to i32
-+  %conv = sitofp i32 %0 to double
-+  %call = tail call fast double @sin(double %conv) #2
-+  %arrayidx = getelementptr inbounds double, double* %a, i64 %indvars.iv
-+  store double %call, double* %arrayidx, align 8, !tbaa !2
-+  %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
-+  %exitcond = icmp eq i64 %indvars.iv.next, %wide.trip.count
-+  br i1 %exitcond, label %for.end, label %for.body, !llvm.loop !6
-+
-+for.end:                                          ; preds = %for.body, %entry
-+  ret void
-+}
-+
-+; Function Attrs: nounwind
-+declare dso_local double @sin(double) local_unnamed_addr #1
-+
-+!2 = !{!3, !3, i64 0}
-+!3 = !{!"double", !4, i64 0}
-+!4 = !{!"omnipotent char", !5, i64 0}
-+!5 = !{!"Simple C/C++ TBAA"}
-+!6 = distinct !{!6, !7}
-+!7 = !{!"llvm.loop.vectorize.width", i32 8}
-diff --git a/utils/TableGen/CMakeLists.txt b/utils/TableGen/CMakeLists.txt
-index 0428249f..206ddcd0 100644
---- a/utils/TableGen/CMakeLists.txt
-+++ b/utils/TableGen/CMakeLists.txt
-@@ -38,6 +38,7 @@ add_tablegen(llvm-tblgen LLVM
-   SearchableTableEmitter.cpp
-   SubtargetEmitter.cpp
-   SubtargetFeatureInfo.cpp
-+  SVMLEmitter.cpp
-   TableGen.cpp
-   Types.cpp
-   X86DisassemblerTables.cpp
-diff --git a/utils/TableGen/SVMLEmitter.cpp b/utils/TableGen/SVMLEmitter.cpp
-new file mode 100644
-index 00000000..8800ca82
---- /dev/null
-+++ b/utils/TableGen/SVMLEmitter.cpp
-@@ -0,0 +1,110 @@
-+//===------ SVMLEmitter.cpp - Generate SVML function variants -------------===//
-+//
-+//                     The LLVM Compiler Infrastructure
-+//
-+// This file is distributed under the University of Illinois Open Source
-+// License. See LICENSE.TXT for details.
-+//
-+//===----------------------------------------------------------------------===//
-+//
-+// This tablegen backend emits the scalar to svml function map for TLI.
-+//
-+//===----------------------------------------------------------------------===//
-+
-+#include "CodeGenTarget.h"
-+#include "llvm/Support/Format.h"
-+#include "llvm/TableGen/Error.h"
-+#include "llvm/TableGen/Record.h"
-+#include "llvm/TableGen/TableGenBackend.h"
-+#include <map>
-+#include <vector>
-+
-+using namespace llvm;
-+
-+#define DEBUG_TYPE "SVMLVariants"
-+#include "llvm/Support/Debug.h"
-+
-+namespace {
-+
-+class SVMLVariantsEmitter {
-+
-+  RecordKeeper &Records;
-+
-+private:
-+  void emitSVMLVariants(raw_ostream &OS);
-+
-+public:
-+  SVMLVariantsEmitter(RecordKeeper &R) : Records(R) {}
-+
-+  void run(raw_ostream &OS);
-+};
-+} // End anonymous namespace
-+
-+/// \brief Emit the set of SVML variant function names.
-+// The default is to emit the high accuracy SVML variants until a mechanism is
-+// introduced to allow a selection of different variants through precision
-+// requirements specified by the user. This code generates mappings to svml
-+// that are in the scalar form of llvm intrinsics, math library calls, or the
-+// finite variants of math library calls.
-+void SVMLVariantsEmitter::emitSVMLVariants(raw_ostream &OS) {
-+
-+  const unsigned MinSinglePrecVL = 4;
-+  const unsigned MaxSinglePrecVL = 16;
-+  const unsigned MinDoublePrecVL = 2;
-+  const unsigned MaxDoublePrecVL = 8;
-+
-+  OS << "#ifdef GET_SVML_VARIANTS\n";
-+
-+  for (const auto &D : Records.getAllDerivedDefinitions("SvmlVariant")) {
-+    StringRef SvmlVariantNameStr = D->getName();
-+    // Single Precision SVML
-+    for (unsigned VL = MinSinglePrecVL; VL <= MaxSinglePrecVL; VL *= 2) {
-+      // Emit the scalar math library function to svml function entry.
-+      OS << "{\"" << SvmlVariantNameStr << "f" << "\", ";
-+      OS << "\"" << "__svml_" << SvmlVariantNameStr << "f" << VL << "\", "
-+         << VL << "},\n";
-+
-+      // Emit the scalar intrinsic to svml function entry.
-+      OS << "{\"" << "llvm." << SvmlVariantNameStr << ".f32" << "\", ";
-+      OS << "\"" << "__svml_" << SvmlVariantNameStr << "f" << VL << "\", "
-+         << VL << "},\n";
-+
-+      // Emit the finite math library function to svml function entry.
-+      OS << "{\"__" << SvmlVariantNameStr << "f_finite" << "\", ";
-+      OS << "\"" << "__svml_" << SvmlVariantNameStr << "f" << VL << "\", "
-+         << VL << "},\n";
-+    }
-+
-+    // Double Precision SVML
-+    for (unsigned VL = MinDoublePrecVL; VL <= MaxDoublePrecVL; VL *= 2) {
-+      // Emit the scalar math library function to svml function entry.
-+      OS << "{\"" << SvmlVariantNameStr << "\", ";
-+      OS << "\"" << "__svml_" << SvmlVariantNameStr << VL << "\", " << VL
-+         << "},\n";
-+
-+      // Emit the scalar intrinsic to svml function entry.
-+      OS << "{\"" << "llvm." << SvmlVariantNameStr << ".f64" << "\", ";
-+      OS << "\"" << "__svml_" << SvmlVariantNameStr << VL << "\", " << VL
-+         << "},\n";
-+
-+      // Emit the finite math library function to svml function entry.
-+      OS << "{\"__" << SvmlVariantNameStr << "_finite" << "\", ";
-+      OS << "\"" << "__svml_" << SvmlVariantNameStr << VL << "\", "
-+         << VL << "},\n";
-+    }
-+  }
-+
-+  OS << "#endif // GET_SVML_VARIANTS\n\n";
-+}
-+
-+void SVMLVariantsEmitter::run(raw_ostream &OS) {
-+  emitSVMLVariants(OS);
-+}
-+
-+namespace llvm {
-+
-+void EmitSVMLVariants(RecordKeeper &RK, raw_ostream &OS) {
-+  SVMLVariantsEmitter(RK).run(OS);
-+}
-+
-+} // End llvm namespace
-diff --git a/utils/TableGen/TableGen.cpp b/utils/TableGen/TableGen.cpp
-index b7826062..bbb164ea 100644
---- a/utils/TableGen/TableGen.cpp
-+++ b/utils/TableGen/TableGen.cpp
-@@ -53,6 +53,7 @@ enum ActionType {
-   GenX86EVEX2VEXTables,
-   GenX86FoldTables,
-   GenRegisterBank,
-+  GenSVMLVariants,
- };
- 
- namespace {
-@@ -117,7 +118,9 @@ namespace {
-                     clEnumValN(GenX86FoldTables, "gen-x86-fold-tables",
-                                "Generate X86 fold tables"),
-                     clEnumValN(GenRegisterBank, "gen-register-bank",
--                               "Generate registers bank descriptions")));
-+                               "Generate registers bank descriptions"),
-+                    clEnumValN(GenSVMLVariants, "gen-svml",
-+                               "Generate SVML variant function names")));
- 
-   cl::OptionCategory PrintEnumsCat("Options for -print-enums");
-   cl::opt<std::string>
-@@ -231,6 +234,9 @@ bool LLVMTableGenMain(raw_ostream &OS, RecordKeeper &Records) {
-   case GenX86FoldTables:
-     EmitX86FoldTables(Records, OS);
-     break;
-+  case GenSVMLVariants:
-+    EmitSVMLVariants(Records, OS);
-+    break;
-   }
- 
-   return false;
-diff --git a/utils/TableGen/TableGenBackends.h b/utils/TableGen/TableGenBackends.h
-index 1329a6d8..8b808de0 100644
---- a/utils/TableGen/TableGenBackends.h
-+++ b/utils/TableGen/TableGenBackends.h
-@@ -89,6 +89,7 @@ void EmitGlobalISel(RecordKeeper &RK, raw_ostream &OS);
- void EmitX86EVEX2VEXTables(RecordKeeper &RK, raw_ostream &OS);
- void EmitX86FoldTables(RecordKeeper &RK, raw_ostream &OS);
- void EmitRegisterBank(RecordKeeper &RK, raw_ostream &OS);
-+void EmitSVMLVariants(RecordKeeper &RK, raw_ostream &OS);
- 
- } // End llvm namespace
- 
-diff --git a/utils/vim/syntax/llvm.vim b/utils/vim/syntax/llvm.vim
-index d58ffb21..a9b60f12 100644
---- a/utils/vim/syntax/llvm.vim
-+++ b/utils/vim/syntax/llvm.vim
-@@ -94,6 +94,7 @@ syn keyword llvmKeyword
-       \ inreg
-       \ inteldialect
-       \ intel_ocl_bicc
-+      \ intel_svmlcc
-       \ internal
-       \ linkonce
-       \ linkonce_odr
diff --git a/conda-recipes/D47188-svml.patch b/conda-recipes/D47188-svml.patch
new file mode 100644
index 000000000..9d90ae87c
--- /dev/null
+++ b/conda-recipes/D47188-svml.patch
@@ -0,0 +1,821 @@
+From https://reviews.llvm.org/D47188 rebased on top of LLVM 6.0.0
+With additional hot-fix in LoopVectorize.cpp for numba/numba#3016
+
+diff --git a/include/llvm/Analysis/TargetLibraryInfo.h b/include/llvm/Analysis/TargetLibraryInfo.h
+index a3fe834..124b81d 100644
+--- a/include/llvm/Analysis/TargetLibraryInfo.h
++++ b/include/llvm/Analysis/TargetLibraryInfo.h
+@@ -38,6 +38,12 @@ struct VecDesc {
+     NumLibFuncs
+   };
+ 
++enum SVMLAccuracy {
++  SVML_DEFAULT,
++  SVML_HA,
++  SVML_EP
++};
++
+ /// Implementation of the target library information.
+ ///
+ /// This class constructs tables that hold the target library information and
+@@ -150,7 +156,8 @@ public:
+   /// Return true if the function F has a vector equivalent with vectorization
+   /// factor VF.
+   bool isFunctionVectorizable(StringRef F, unsigned VF) const {
+-    return !getVectorizedFunction(F, VF).empty();
++     bool IgnoreMeThere;
++     return !getVectorizedFunction(F, VF, IgnoreMeThere, false).empty();
+   }
+ 
+   /// Return true if the function F has a vector equivalent with any
+@@ -159,7 +166,8 @@ public:
+ 
+   /// Return the name of the equivalent of F, vectorized with factor VF. If no
+   /// such mapping exists, return the empty string.
+-  StringRef getVectorizedFunction(StringRef F, unsigned VF) const;
++  std::string getVectorizedFunction(StringRef F, unsigned VF, bool &FromSVML,
++                                    bool IsFast) const;
+ 
+   /// Return true if the function F has a scalar equivalent, and set VF to be
+   /// the vectorization factor.
+@@ -253,8 +261,9 @@ public:
+   bool isFunctionVectorizable(StringRef F) const {
+     return Impl->isFunctionVectorizable(F);
+   }
+-  StringRef getVectorizedFunction(StringRef F, unsigned VF) const {
+-    return Impl->getVectorizedFunction(F, VF);
++  std::string getVectorizedFunction(StringRef F, unsigned VF, bool &FromSVML,
++                                    bool IsFast) const {
++    return Impl->getVectorizedFunction(F, VF, FromSVML, IsFast);
+   }
+ 
+   /// Tests if the function is both available and a candidate for optimized code
+diff --git a/include/llvm/IR/CMakeLists.txt b/include/llvm/IR/CMakeLists.txt
+index cf75d58..374fd65 100644
+--- a/include/llvm/IR/CMakeLists.txt
++++ b/include/llvm/IR/CMakeLists.txt
+@@ -4,3 +4,7 @@ tablegen(LLVM Attributes.gen -gen-attrs)
+ set(LLVM_TARGET_DEFINITIONS Intrinsics.td)
+ tablegen(LLVM Intrinsics.gen -gen-intrinsic)
+ add_public_tablegen_target(intrinsics_gen)
++
++set(LLVM_TARGET_DEFINITIONS SVML.td)
++tablegen(LLVM SVML.gen -gen-svml)
++add_public_tablegen_target(svml_gen)
+diff --git a/include/llvm/IR/CallingConv.h b/include/llvm/IR/CallingConv.h
+index 84fe836..46700f0 100644
+--- a/include/llvm/IR/CallingConv.h
++++ b/include/llvm/IR/CallingConv.h
+@@ -220,6 +220,9 @@ namespace CallingConv {
+     /// shader if tessellation is in use, or otherwise the vertex shader.
+     AMDGPU_ES = 96,
+ 
++    /// Intel_SVML - Calling conventions for Intel Short Math Vector Library
++    Intel_SVML = 97,
++    //
+     /// The highest possible calling convention ID. Must be some 2^k - 1.
+     MaxID = 1023
+   };
+diff --git a/include/llvm/IR/SVML.td b/include/llvm/IR/SVML.td
+new file mode 100644
+index 0000000..90f2902
+--- /dev/null
++++ b/include/llvm/IR/SVML.td
+@@ -0,0 +1,62 @@
++//===-- Intel_SVML.td - Defines SVML call variants ---------*- tablegen -*-===//
++//
++//                     The LLVM Compiler Infrastructure
++//
++// This file is distributed under the University of Illinois Open Source
++// License. See LICENSE.TXT for details.
++//
++//===----------------------------------------------------------------------===//
++//
++// This file is used by TableGen to define the different typs of SVML function
++// variants used with -fveclib=SVML.
++//
++//===----------------------------------------------------------------------===//
++
++class SvmlVariant;
++
++def sin        : SvmlVariant;
++def cos        : SvmlVariant;
++def pow        : SvmlVariant;
++def exp        : SvmlVariant;
++def log        : SvmlVariant;
++def acos       : SvmlVariant;
++def acosh      : SvmlVariant;
++def asin       : SvmlVariant;
++def asinh      : SvmlVariant;
++def atan2      : SvmlVariant;
++def atan       : SvmlVariant;
++def atanh      : SvmlVariant;
++def cbrt       : SvmlVariant;
++def cdfnorm    : SvmlVariant;
++def cdfnorminv : SvmlVariant;
++def cosd       : SvmlVariant;
++def cosh       : SvmlVariant;
++def erf        : SvmlVariant;
++def erfc       : SvmlVariant;
++def erfcinv    : SvmlVariant;
++def erfinv     : SvmlVariant;
++def exp10      : SvmlVariant;
++def exp2       : SvmlVariant;
++def expm1      : SvmlVariant;
++def hypot      : SvmlVariant;
++def invsqrt    : SvmlVariant;
++def log10      : SvmlVariant;
++def log1p      : SvmlVariant;
++def log2       : SvmlVariant;
++def sind       : SvmlVariant;
++def sinh       : SvmlVariant;
++def sqrt       : SvmlVariant;
++def tan        : SvmlVariant;
++def tanh       : SvmlVariant;
++
++// While SVML doesn't provide _ha versions of
++// the following symbols let's disable their vectorization.
++
++// def nearbyint  : SvmlVariant;
++// def logb       : SvmlVariant;
++// def floor      : SvmlVariant;
++// def fmod       : SvmlVariant;
++// def ceil       : SvmlVariant;
++// def trunc      : SvmlVariant;
++// def rint       : SvmlVariant;
++// def round      : SvmlVariant;
+diff --git a/lib/Analysis/CMakeLists.txt b/lib/Analysis/CMakeLists.txt
+index af2e30d..fa8aaac 100644
+--- a/lib/Analysis/CMakeLists.txt
++++ b/lib/Analysis/CMakeLists.txt
+@@ -90,4 +90,5 @@ add_llvm_library(LLVMAnalysis
+ 
+   DEPENDS
+   intrinsics_gen
++  svml_gen
+   )
+diff --git a/lib/Analysis/TargetLibraryInfo.cpp b/lib/Analysis/TargetLibraryInfo.cpp
+index d18246a..3d108d8 100644
+--- a/lib/Analysis/TargetLibraryInfo.cpp
++++ b/lib/Analysis/TargetLibraryInfo.cpp
+@@ -50,6 +50,11 @@ static bool hasSinCosPiStret(const Triple &T) {
+   return true;
+ }
+ 
++std::string svmlMangle(StringRef FnName, const bool IsFast) {
++  std::string FullName = FnName;
++  return IsFast ? FullName : FullName + "_ha";
++}
++
+ /// Initialize the set of available library functions based on the specified
+ /// target triple. This should be carefully written so that a missing target
+ /// triple gets a sane set of defaults.
+@@ -1379,93 +1384,9 @@ void TargetLibraryInfoImpl::addVectorizableFunctionsFromVecLib(
+   }
+   case SVML: {
+     const VecDesc VecFuncs[] = {
+-        {"sin", "__svml_sin2", 2},
+-        {"sin", "__svml_sin4", 4},
+-        {"sin", "__svml_sin8", 8},
+-
+-        {"sinf", "__svml_sinf4", 4},
+-        {"sinf", "__svml_sinf8", 8},
+-        {"sinf", "__svml_sinf16", 16},
+-
+-        {"cos", "__svml_cos2", 2},
+-        {"cos", "__svml_cos4", 4},
+-        {"cos", "__svml_cos8", 8},
+-
+-        {"cosf", "__svml_cosf4", 4},
+-        {"cosf", "__svml_cosf8", 8},
+-        {"cosf", "__svml_cosf16", 16},
+-
+-        {"pow", "__svml_pow2", 2},
+-        {"pow", "__svml_pow4", 4},
+-        {"pow", "__svml_pow8", 8},
+-
+-        {"powf", "__svml_powf4", 4},
+-        {"powf", "__svml_powf8", 8},
+-        {"powf", "__svml_powf16", 16},
+-
+-        { "__pow_finite", "__svml_pow2", 2 },
+-        { "__pow_finite", "__svml_pow4", 4 },
+-        { "__pow_finite", "__svml_pow8", 8 },
+-
+-        { "__powf_finite", "__svml_powf4", 4 },
+-        { "__powf_finite", "__svml_powf8", 8 },
+-        { "__powf_finite", "__svml_powf16", 16 },
+-
+-        {"llvm.pow.f64", "__svml_pow2", 2},
+-        {"llvm.pow.f64", "__svml_pow4", 4},
+-        {"llvm.pow.f64", "__svml_pow8", 8},
+-
+-        {"llvm.pow.f32", "__svml_powf4", 4},
+-        {"llvm.pow.f32", "__svml_powf8", 8},
+-        {"llvm.pow.f32", "__svml_powf16", 16},
+-
+-        {"exp", "__svml_exp2", 2},
+-        {"exp", "__svml_exp4", 4},
+-        {"exp", "__svml_exp8", 8},
+-
+-        {"expf", "__svml_expf4", 4},
+-        {"expf", "__svml_expf8", 8},
+-        {"expf", "__svml_expf16", 16},
+-
+-        { "__exp_finite", "__svml_exp2", 2 },
+-        { "__exp_finite", "__svml_exp4", 4 },
+-        { "__exp_finite", "__svml_exp8", 8 },
+-
+-        { "__expf_finite", "__svml_expf4", 4 },
+-        { "__expf_finite", "__svml_expf8", 8 },
+-        { "__expf_finite", "__svml_expf16", 16 },
+-
+-        {"llvm.exp.f64", "__svml_exp2", 2},
+-        {"llvm.exp.f64", "__svml_exp4", 4},
+-        {"llvm.exp.f64", "__svml_exp8", 8},
+-
+-        {"llvm.exp.f32", "__svml_expf4", 4},
+-        {"llvm.exp.f32", "__svml_expf8", 8},
+-        {"llvm.exp.f32", "__svml_expf16", 16},
+-
+-        {"log", "__svml_log2", 2},
+-        {"log", "__svml_log4", 4},
+-        {"log", "__svml_log8", 8},
+-
+-        {"logf", "__svml_logf4", 4},
+-        {"logf", "__svml_logf8", 8},
+-        {"logf", "__svml_logf16", 16},
+-
+-        { "__log_finite", "__svml_log2", 2 },
+-        { "__log_finite", "__svml_log4", 4 },
+-        { "__log_finite", "__svml_log8", 8 },
+-
+-        { "__logf_finite", "__svml_logf4", 4 },
+-        { "__logf_finite", "__svml_logf8", 8 },
+-        { "__logf_finite", "__svml_logf16", 16 },
+-
+-        {"llvm.log.f64", "__svml_log2", 2},
+-        {"llvm.log.f64", "__svml_log4", 4},
+-        {"llvm.log.f64", "__svml_log8", 8},
+-
+-        {"llvm.log.f32", "__svml_logf4", 4},
+-        {"llvm.log.f32", "__svml_logf8", 8},
+-        {"llvm.log.f32", "__svml_logf16", 16},
++#define GET_SVML_VARIANTS
++#include "llvm/IR/SVML.gen"
++#undef GET_SVML_VARIANTS
+     };
+     addVectorizableFunctions(VecFuncs);
+     break;
+@@ -1486,16 +1407,21 @@ bool TargetLibraryInfoImpl::isFunctionVectorizable(StringRef funcName) const {
+   return I != VectorDescs.end() && StringRef(I->ScalarFnName) == funcName;
+ }
+ 
+-StringRef TargetLibraryInfoImpl::getVectorizedFunction(StringRef F,
+-                                                       unsigned VF) const {
++std::string TargetLibraryInfoImpl::getVectorizedFunction(StringRef F,
++                                                         unsigned VF, bool &FromSVML, bool IsFast) const {
++  FromSVML = ClVectorLibrary == SVML;
+   F = sanitizeFunctionName(F);
+   if (F.empty())
+     return F;
+   std::vector<VecDesc>::const_iterator I = std::lower_bound(
+       VectorDescs.begin(), VectorDescs.end(), F, compareWithScalarFnName);
+   while (I != VectorDescs.end() && StringRef(I->ScalarFnName) == F) {
+-    if (I->VectorizationFactor == VF)
++    if (I->VectorizationFactor == VF) {
++      if (FromSVML) {
++        return svmlMangle(I->VectorFnName, IsFast);
++      }
+       return I->VectorFnName;
++    }
+     ++I;
+   }
+   return StringRef();
+diff --git a/lib/AsmParser/LLLexer.cpp b/lib/AsmParser/LLLexer.cpp
+index d8be4ad..945d5f6 100644
+--- a/lib/AsmParser/LLLexer.cpp
++++ b/lib/AsmParser/LLLexer.cpp
+@@ -592,6 +592,7 @@ lltok::Kind LLLexer::LexIdentifier() {
+   KEYWORD(spir_kernel);
+   KEYWORD(spir_func);
+   KEYWORD(intel_ocl_bicc);
++  KEYWORD(intel_svmlcc);
+   KEYWORD(x86_64_sysvcc);
+   KEYWORD(win64cc);
+   KEYWORD(x86_regcallcc);
+diff --git a/lib/AsmParser/LLParser.cpp b/lib/AsmParser/LLParser.cpp
+index c3ab955..c1d9fa0 100644
+--- a/lib/AsmParser/LLParser.cpp
++++ b/lib/AsmParser/LLParser.cpp
+@@ -1711,6 +1711,7 @@ void LLParser::ParseOptionalDLLStorageClass(unsigned &Res) {
+ ///   ::= 'ccc'
+ ///   ::= 'fastcc'
+ ///   ::= 'intel_ocl_bicc'
++///   ::= 'intel_svmlcc'
+ ///   ::= 'coldcc'
+ ///   ::= 'x86_stdcallcc'
+ ///   ::= 'x86_fastcallcc'
+@@ -1770,6 +1771,7 @@ bool LLParser::ParseOptionalCallingConv(unsigned &CC) {
+   case lltok::kw_spir_kernel:    CC = CallingConv::SPIR_KERNEL; break;
+   case lltok::kw_spir_func:      CC = CallingConv::SPIR_FUNC; break;
+   case lltok::kw_intel_ocl_bicc: CC = CallingConv::Intel_OCL_BI; break;
++  case lltok::kw_intel_svmlcc:   CC = CallingConv::Intel_SVML; break;
+   case lltok::kw_x86_64_sysvcc:  CC = CallingConv::X86_64_SysV; break;
+   case lltok::kw_win64cc:        CC = CallingConv::Win64; break;
+   case lltok::kw_webkit_jscc:    CC = CallingConv::WebKit_JS; break;
+diff --git a/lib/AsmParser/LLToken.h b/lib/AsmParser/LLToken.h
+index ad826cc..08170f0 100644
+--- a/lib/AsmParser/LLToken.h
++++ b/lib/AsmParser/LLToken.h
+@@ -130,6 +130,7 @@ enum Kind {
+   kw_fastcc,
+   kw_coldcc,
+   kw_intel_ocl_bicc,
++  kw_intel_svmlcc,
+   kw_x86_stdcallcc,
+   kw_x86_fastcallcc,
+   kw_x86_thiscallcc,
+diff --git a/lib/IR/AsmWriter.cpp b/lib/IR/AsmWriter.cpp
+index 0fafe82..086aabc 100644
+--- a/lib/IR/AsmWriter.cpp
++++ b/lib/IR/AsmWriter.cpp
+@@ -356,6 +356,7 @@ static void PrintCallingConv(unsigned cc, raw_ostream &Out) {
+   case CallingConv::X86_RegCall:   Out << "x86_regcallcc"; break;
+   case CallingConv::X86_VectorCall:Out << "x86_vectorcallcc"; break;
+   case CallingConv::Intel_OCL_BI:  Out << "intel_ocl_bicc"; break;
++  case CallingConv::Intel_SVML:    Out << "intel_svmlcc"; break;
+   case CallingConv::ARM_APCS:      Out << "arm_apcscc"; break;
+   case CallingConv::ARM_AAPCS:     Out << "arm_aapcscc"; break;
+   case CallingConv::ARM_AAPCS_VFP: Out << "arm_aapcs_vfpcc"; break;
+diff --git a/lib/IR/Verifier.cpp b/lib/IR/Verifier.cpp
+index 1754f7d..77fbe7e 100644
+--- a/lib/IR/Verifier.cpp
++++ b/lib/IR/Verifier.cpp
+@@ -2025,6 +2025,7 @@ void Verifier::visitFunction(const Function &F) {
+   case CallingConv::Fast:
+   case CallingConv::Cold:
+   case CallingConv::Intel_OCL_BI:
++  case CallingConv::Intel_SVML:
+   case CallingConv::PTX_Kernel:
+   case CallingConv::PTX_Device:
+     Assert(!F.isVarArg(), "Calling convention does not support varargs or "
+diff --git a/lib/Target/X86/X86CallingConv.td b/lib/Target/X86/X86CallingConv.td
+index 5d806fe..5db30d9 100644
+--- a/lib/Target/X86/X86CallingConv.td
++++ b/lib/Target/X86/X86CallingConv.td
+@@ -469,12 +469,29 @@ def RetCC_X86_64 : CallingConv<[
+   CCDelegateTo<RetCC_X86_64_C>
+ ]>;
+ 
++// Intel_SVML return-value convention.
++def RetCC_Intel_SVML : CallingConv<[
++  // Vector types are returned in XMM0,XMM1
++  CCIfType<[v4f32, v2f64],
++            CCAssignToReg<[XMM0,XMM1]>>,
++
++  // 256-bit FP vectors
++  CCIfType<[v8f32, v4f64],
++            CCAssignToReg<[YMM0,YMM1]>>,
++
++  // 512-bit FP vectors
++  CCIfType<[v16f32, v8f64],
++            CCAssignToReg<[ZMM0,ZMM1]>>
++]>;
++
+ // This is the return-value convention used for the entire X86 backend.
+ def RetCC_X86 : CallingConv<[
+ 
+   // Check if this is the Intel OpenCL built-ins calling convention
+   CCIfCC<"CallingConv::Intel_OCL_BI", CCDelegateTo<RetCC_Intel_OCL_BI>>,
+ 
++  CCIfCC<"CallingConv::Intel_SVML", CCDelegateTo<RetCC_Intel_SVML>>,
++
+   CCIfSubtarget<"is64Bit()", CCDelegateTo<RetCC_X86_64>>,
+   CCDelegateTo<RetCC_X86_32>
+ ]>;
+@@ -971,6 +988,22 @@ def CC_Intel_OCL_BI : CallingConv<[
+   CCDelegateTo<CC_X86_32_C>
+ ]>;
+ 
++// X86-64 Intel Short Vector Math Library calling convention.
++def CC_Intel_SVML : CallingConv<[
++
++  // The SSE vector arguments are passed in XMM registers.
++  CCIfType<[v4f32, v2f64],
++           CCAssignToReg<[XMM0, XMM1, XMM2]>>,
++
++  // The 256-bit vector arguments are passed in YMM registers.
++  CCIfType<[v8f32, v4f64],
++           CCAssignToReg<[YMM0, YMM1, YMM2]>>,
++
++  // The 512-bit vector arguments are passed in ZMM registers.
++  CCIfType<[v16f32, v8f64],
++           CCAssignToReg<[ZMM0, ZMM1, ZMM2]>>
++]>;
++
+ def CC_X86_32_Intr : CallingConv<[
+   CCAssignToStack<4, 4>
+ ]>;
+@@ -1027,6 +1060,7 @@ def CC_X86_64 : CallingConv<[
+ // This is the argument convention used for the entire X86 backend.
+ def CC_X86 : CallingConv<[
+   CCIfCC<"CallingConv::Intel_OCL_BI", CCDelegateTo<CC_Intel_OCL_BI>>,
++  CCIfCC<"CallingConv::Intel_SVML", CCDelegateTo<CC_Intel_SVML>>,
+   CCIfSubtarget<"is64Bit()", CCDelegateTo<CC_X86_64>>,
+   CCDelegateTo<CC_X86_32>
+ ]>;
+@@ -1135,4 +1169,27 @@ def CSR_SysV64_RegCall_NoSSE : CalleeSavedRegs<(add RBX, RBP, RSP,
+                                                (sequence "R%u", 12, 15))>;
+ def CSR_SysV64_RegCall       : CalleeSavedRegs<(add CSR_SysV64_RegCall_NoSSE,               
+                                                (sequence "XMM%u", 8, 15))>;
+-                                               
++
++// SVML calling convention 
++def CSR_32_Intel_SVML        : CalleeSavedRegs<(add CSR_32_RegCall_NoSSE)>;
++def CSR_32_Intel_SVML_AVX512 : CalleeSavedRegs<(add CSR_32_Intel_SVML,
++                                                K4, K5, K6, K7)>;
++
++def CSR_64_Intel_SVML_NoSSE : CalleeSavedRegs<(add RBX, RSI, RDI, RBP, RSP, R12, R13, R14, R15)>;
++
++def CSR_64_Intel_SVML       : CalleeSavedRegs<(add CSR_64_Intel_SVML_NoSSE,
++                                               (sequence "XMM%u", 8, 15))>;
++def CSR_Win64_Intel_SVML    : CalleeSavedRegs<(add CSR_64_Intel_SVML_NoSSE,
++                                               (sequence "XMM%u", 6, 15))>;
++
++def CSR_64_Intel_SVML_AVX        : CalleeSavedRegs<(add CSR_64_Intel_SVML_NoSSE,
++                                                    (sequence "YMM%u", 8, 15))>;
++def CSR_Win64_Intel_SVML_AVX     : CalleeSavedRegs<(add CSR_64_Intel_SVML_NoSSE,
++                                                    (sequence "YMM%u", 6, 15))>;
++
++def CSR_64_Intel_SVML_AVX512     : CalleeSavedRegs<(add CSR_64_Intel_SVML_NoSSE,
++                                                    (sequence "ZMM%u", 16, 31),
++                                                    K4, K5, K6, K7)>;
++def CSR_Win64_Intel_SVML_AVX512  : CalleeSavedRegs<(add CSR_64_Intel_SVML_NoSSE,
++                                                    (sequence "ZMM%u", 6, 21),
++                                                    K4, K5, K6, K7)>;
+diff --git a/lib/Target/X86/X86ISelLowering.cpp b/lib/Target/X86/X86ISelLowering.cpp
+index 10e19f9..5af236a 100644
+--- a/lib/Target/X86/X86ISelLowering.cpp
++++ b/lib/Target/X86/X86ISelLowering.cpp
+@@ -3203,7 +3203,8 @@ SDValue X86TargetLowering::LowerFormalArguments(
+     // FIXME: Only some x86_32 calling conventions support AVX512.
+     if (Subtarget.hasAVX512() &&
+         (Is64Bit || (CallConv == CallingConv::X86_VectorCall ||
+-                     CallConv == CallingConv::Intel_OCL_BI)))
++                     CallConv == CallingConv::Intel_OCL_BI   ||
++                     CallConv == CallingConv::Intel_SVML)))
+       VecVT = MVT::v16f32;
+     else if (Subtarget.hasAVX())
+       VecVT = MVT::v8f32;
+diff --git a/lib/Target/X86/X86RegisterInfo.cpp b/lib/Target/X86/X86RegisterInfo.cpp
+index bc31e95..a8b1fa6 100644
+--- a/lib/Target/X86/X86RegisterInfo.cpp
++++ b/lib/Target/X86/X86RegisterInfo.cpp
+@@ -311,6 +311,23 @@ X86RegisterInfo::getCalleeSavedRegs(const MachineFunction *MF) const {
+       return CSR_64_Intel_OCL_BI_SaveList;
+     break;
+   }
++  case CallingConv::Intel_SVML: {
++    if (Is64Bit) {
++      if (HasAVX512)
++        return IsWin64 ? CSR_Win64_Intel_SVML_AVX512_SaveList :
++                         CSR_64_Intel_SVML_AVX512_SaveList;
++      if (HasAVX)
++        return IsWin64 ? CSR_Win64_Intel_SVML_AVX_SaveList :
++                         CSR_64_Intel_SVML_AVX_SaveList;
++
++      return IsWin64 ? CSR_Win64_Intel_SVML_SaveList :
++                       CSR_64_Intel_SVML_SaveList;
++    } else { // Is32Bit
++        if (HasAVX512)
++            return CSR_32_Intel_SVML_AVX512_SaveList;
++        return CSR_32_Intel_SVML_SaveList;
++    }
++  }
+   case CallingConv::HHVM:
+     return CSR_64_HHVM_SaveList;
+   case CallingConv::X86_RegCall:
+@@ -425,6 +442,23 @@ X86RegisterInfo::getCallPreservedMask(const MachineFunction &MF,
+       return CSR_64_Intel_OCL_BI_RegMask;
+     break;
+   }
++  case CallingConv::Intel_SVML: {
++    if (Is64Bit) {
++      if (HasAVX512)
++        return IsWin64 ? CSR_Win64_Intel_SVML_AVX512_RegMask :
++                         CSR_64_Intel_SVML_AVX512_RegMask;
++      if (HasAVX)
++        return IsWin64 ? CSR_Win64_Intel_SVML_AVX_RegMask :
++                         CSR_64_Intel_SVML_AVX_RegMask;
++
++      return IsWin64 ? CSR_Win64_Intel_SVML_RegMask :
++                       CSR_64_Intel_SVML_RegMask;
++    } else { // Is32Bit
++        if (HasAVX512)
++            return CSR_32_Intel_SVML_AVX512_RegMask;
++        return CSR_32_Intel_SVML_RegMask;
++    }
++  }
+   case CallingConv::HHVM:
+     return CSR_64_HHVM_RegMask;
+   case CallingConv::X86_RegCall:
+diff --git a/lib/Target/X86/X86Subtarget.h b/lib/Target/X86/X86Subtarget.h
+index 37ffac1..8ad2131 100644
+--- a/lib/Target/X86/X86Subtarget.h
++++ b/lib/Target/X86/X86Subtarget.h
+@@ -673,6 +673,7 @@ public:
+     case CallingConv::X86_ThisCall:
+     case CallingConv::X86_VectorCall:
+     case CallingConv::Intel_OCL_BI:
++    case CallingConv::Intel_SVML:
+       return isTargetWin64();
+     // This convention allows using the Win64 convention on other targets.
+     case CallingConv::Win64:
+diff --git a/lib/Transforms/Vectorize/LoopVectorize.cpp b/lib/Transforms/Vectorize/LoopVectorize.cpp
+index 5bcf0c0..cef0009 100644
+--- a/lib/Transforms/Vectorize/LoopVectorize.cpp
++++ b/lib/Transforms/Vectorize/LoopVectorize.cpp
+@@ -3974,6 +3974,17 @@ static unsigned getVectorCallCost(CallInst *CI, unsigned VF,
+   if (!TLI || !TLI->isFunctionVectorizable(FnName, VF) || CI->isNoBuiltin())
+     return Cost;
+ 
++  // this goes against LLVM coding philosophy, but it'll stop bleeding
++  bool IgnoreMe;
++  StringRef FuncName = TLI->getVectorizedFunction(FnName, VF, IgnoreMe, true);
++#if LLVM_ON_WIN32
++  StringRef SvmlPrefix("\0_svml", 6); // nobody knows why symbols are like this
++#else
++  StringRef SvmlPrefix("__svml");
++#endif
++  if (FuncName.startswith(SvmlPrefix) && !TTI.isTypeLegal(RetTy))
++    return Cost;
++
+   // If the corresponding vector cost is cheaper, return its cost.
+   unsigned VectorCallCost = TTI.getCallInstrCost(nullptr, RetTy, Tys);
+   if (VectorCallCost < Cost) {
+@@ -4917,6 +4923,7 @@ void InnerLoopVectorizer::widenInstruction(Instruction &I) {
+       }
+ 
+       Function *VectorF;
++      bool FromSVML = false;
+       if (UseVectorIntrinsic) {
+         // Use vector version of the intrinsic.
+         Type *TysForDecl[] = {CI->getType()};
+@@ -4925,7 +4932,8 @@ void InnerLoopVectorizer::widenInstruction(Instruction &I) {
+         VectorF = Intrinsic::getDeclaration(M, ID, TysForDecl);
+       } else {
+         // Use vector version of the library call.
+-        StringRef VFnName = TLI->getVectorizedFunction(FnName, VF);
++        bool IsFast = CI->getFastMathFlags().isFast();
++        std::string VFnName = TLI->getVectorizedFunction(FnName, VF, FromSVML, IsFast);
+         assert(!VFnName.empty() && "Vector function name is empty.");
+         VectorF = M->getFunction(VFnName);
+         if (!VectorF) {
+@@ -4944,7 +4952,7 @@ void InnerLoopVectorizer::widenInstruction(Instruction &I) {
+ 
+       if (isa<FPMathOperator>(V))
+         V->copyFastMathFlags(CI);
+-
++      if (FromSVML) V->setCallingConv(CallingConv::Intel_SVML);
+       VectorLoopValueMap.setVectorValue(&I, Part, V);
+       addMetadata(V, &I);
+     }
+diff --git a/test/Transforms/LoopVectorize/X86/svml-calls.ll b/test/Transforms/LoopVectorize/X86/svml-calls.ll
+index 6342a9d..39797c6 100644
+--- a/test/Transforms/LoopVectorize/X86/svml-calls.ll
++++ b/test/Transforms/LoopVectorize/X86/svml-calls.ll
+@@ -182,4 +182,44 @@ for.end:                                          ; preds = %for.body
+   ret void
+ }
+ 
++; CHECK-LABEL: @atan2_finite
++; CHECK: <8 x double> @__svml_atan28
++; CHECK: ret
++
++declare double @__atan2_finite(double, double) local_unnamed_addr #0
++
++define void @atan2_finite([100 x double]* nocapture %varray) local_unnamed_addr #0 {
++entry:
++  br label %for.cond1.preheader
++
++for.cond1.preheader:                              ; preds = %for.inc7, %entry
++  %indvars.iv19 = phi i64 [ 0, %entry ], [ %indvars.iv.next20, %for.inc7 ]
++  %0 = trunc i64 %indvars.iv19 to i32
++  %conv = sitofp i32 %0 to double
++  br label %for.body3
++
++for.body3:                                        ; preds = %for.body3, %for.cond1.preheader
++  %indvars.iv = phi i64 [ 0, %for.cond1.preheader ], [ %indvars.iv.next, %for.body3 ]
++  %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
++  %1 = trunc i64 %indvars.iv.next to i32
++  %conv4 = sitofp i32 %1 to double
++  %call = tail call fast double @__atan2_finite(double %conv, double %conv4)
++  %arrayidx6 = getelementptr inbounds [100 x double], [100 x double]* %varray, i64 %indvars.iv19, i64 %indvars.iv
++  store double %call, double* %arrayidx6, align 8
++  %exitcond = icmp eq i64 %indvars.iv.next, 100
++  br i1 %exitcond, label %for.inc7, label %for.body3, !llvm.loop !5
++
++for.inc7:                                         ; preds = %for.body3
++  %indvars.iv.next20 = add nuw nsw i64 %indvars.iv19, 1
++  %exitcond21 = icmp eq i64 %indvars.iv.next20, 100
++  br i1 %exitcond21, label %for.end9, label %for.cond1.preheader
++
++for.end9:                                         ; preds = %for.inc7
++  ret void
++}
++
+ attributes #0 = { nounwind readnone }
++
++!5 = distinct !{!5, !6, !7}
++!6 = !{!"llvm.loop.vectorize.width", i32 8}
++!7 = !{!"llvm.loop.vectorize.enable", i1 true}
+diff --git a/utils/TableGen/CMakeLists.txt b/utils/TableGen/CMakeLists.txt
+index 0944d54..7b68420 100644
+--- a/utils/TableGen/CMakeLists.txt
++++ b/utils/TableGen/CMakeLists.txt
+@@ -36,6 +36,7 @@ add_tablegen(llvm-tblgen LLVM
+   SearchableTableEmitter.cpp
+   SubtargetEmitter.cpp
+   SubtargetFeatureInfo.cpp
++  SVMLEmitter.cpp
+   TableGen.cpp
+   Types.cpp
+   X86DisassemblerTables.cpp
+diff --git a/utils/TableGen/SVMLEmitter.cpp b/utils/TableGen/SVMLEmitter.cpp
+new file mode 100644
+index 0000000..c80f055
+--- /dev/null
++++ b/utils/TableGen/SVMLEmitter.cpp
+@@ -0,0 +1,114 @@
++//===------ SVMLEmitter.cpp - Generate SVML function variants -------------===//
++//
++//                     The LLVM Compiler Infrastructure
++//
++// This file is distributed under the University of Illinois Open Source
++// License. See LICENSE.TXT for details.
++//
++//===----------------------------------------------------------------------===//
++//
++// This tablegen backend emits the scalar to svml function map for TLI.
++//
++//===----------------------------------------------------------------------===//
++
++#include "CodeGenTarget.h"
++#include "llvm/Support/Format.h"
++#include "llvm/TableGen/Error.h"
++#include "llvm/TableGen/Record.h"
++#include "llvm/TableGen/TableGenBackend.h"
++#include <map>
++#include <vector>
++
++using namespace llvm;
++
++#define DEBUG_TYPE "SVMLVariants"
++#include "llvm/Support/Debug.h"
++
++namespace {
++
++class SVMLVariantsEmitter {
++
++  RecordKeeper &Records;
++
++private:
++  void emitSVMLVariants(raw_ostream &OS);
++
++public:
++  SVMLVariantsEmitter(RecordKeeper &R) : Records(R) {}
++
++  void run(raw_ostream &OS);
++};
++} // End anonymous namespace
++
++/// \brief Emit the set of SVML variant function names.
++// The default is to emit the high accuracy SVML variants until a mechanism is
++// introduced to allow a selection of different variants through precision
++// requirements specified by the user. This code generates mappings to svml
++// that are in the scalar form of llvm intrinsics, math library calls, or the
++// finite variants of math library calls.
++void SVMLVariantsEmitter::emitSVMLVariants(raw_ostream &OS) {
++
++  unsigned MinSinglePrecVL = 4;
++  unsigned MaxSinglePrecVL = 16;
++  unsigned MinDoublePrecVL = 2;
++  unsigned MaxDoublePrecVL = 8;
++
++  Record *SvmlVariantsClass = Records.getClass("SvmlVariant");
++  assert(SvmlVariantsClass &&
++         "SvmlVariant class not found in target description file!");
++
++  OS << "#ifdef GET_SVML_VARIANTS\n";
++
++  for (const auto &D : Records.getDefs()) {
++    std::string SvmlVariantNameStr = D.first;
++    // Single Precision SVML
++    for (unsigned VL = MinSinglePrecVL; VL <= MaxSinglePrecVL; VL *= 2) {
++      // Emit the scalar math library function to svml function entry.
++      OS << "{\"" << SvmlVariantNameStr << "f" << "\", ";
++      OS << "\"" << "__svml_" << SvmlVariantNameStr << "f" << VL << "\", "
++         << VL << "},\n";
++
++      // Emit the scalar intrinsic to svml function entry.
++      OS << "{\"" << "llvm." << SvmlVariantNameStr << ".f32" << "\", ";
++      OS << "\"" << "__svml_" << SvmlVariantNameStr << "f" << VL << "\", "
++         << VL << "},\n";
++
++      // Emit the finite math library function to svml function entry.
++      OS << "{\"__" << SvmlVariantNameStr << "f_finite" << "\", ";
++      OS << "\"" << "__svml_" << SvmlVariantNameStr << "f" << VL << "\", "
++         << VL << "},\n";
++    }
++
++    // Double Precision SVML
++    for (unsigned VL = MinDoublePrecVL; VL <= MaxDoublePrecVL; VL *= 2) {
++      // Emit the scalar math library function to svml function entry.
++      OS << "{\"" << SvmlVariantNameStr << "\", ";
++      OS << "\"" << "__svml_" << SvmlVariantNameStr << VL << "\", " << VL
++         << "},\n";
++
++      // Emit the scalar intrinsic to svml function entry.
++      OS << "{\"" << "llvm." << SvmlVariantNameStr << ".f64" << "\", ";
++      OS << "\"" << "__svml_" << SvmlVariantNameStr << VL << "\", " << VL
++         << "},\n";
++
++      // Emit the finite math library function to svml function entry.
++      OS << "{\"__" << SvmlVariantNameStr << "_finite" << "\", ";
++      OS << "\"" << "__svml_" << SvmlVariantNameStr << VL << "\", "
++         << VL << "},\n";
++    }
++  }
++
++  OS << "#endif // GET_SVML_VARIANTS\n\n";
++}
++
++void SVMLVariantsEmitter::run(raw_ostream &OS) {
++  emitSVMLVariants(OS);
++}
++
++namespace llvm {
++
++void EmitSVMLVariants(RecordKeeper &RK, raw_ostream &OS) {
++  SVMLVariantsEmitter(RK).run(OS);
++}
++
++} // End llvm namespace
+diff --git a/utils/TableGen/TableGen.cpp b/utils/TableGen/TableGen.cpp
+index b0e0385..3e8cd88 100644
+--- a/utils/TableGen/TableGen.cpp
++++ b/utils/TableGen/TableGen.cpp
+@@ -49,6 +49,7 @@ enum ActionType {
+   GenX86EVEX2VEXTables,
+   GenX86FoldTables,
+   GenRegisterBank,
++  GenSVMLVariants,
+ };
+ 
+ namespace {
+@@ -105,7 +106,9 @@ namespace {
+                     clEnumValN(GenX86FoldTables, "gen-x86-fold-tables",
+                                "Generate X86 fold tables"),
+                     clEnumValN(GenRegisterBank, "gen-register-bank",
+-                               "Generate registers bank descriptions")));
++                               "Generate registers bank descriptions"),
++                    clEnumValN(GenSVMLVariants, "gen-svml",
++                               "Generate SVML variant function names")));
+ 
+   cl::OptionCategory PrintEnumsCat("Options for -print-enums");
+   cl::opt<std::string>
+@@ -207,6 +210,9 @@ bool LLVMTableGenMain(raw_ostream &OS, RecordKeeper &Records) {
+   case GenX86FoldTables:
+     EmitX86FoldTables(Records, OS);
+     break;
++  case GenSVMLVariants:
++    EmitSVMLVariants(Records, OS);
++    break;
+   }
+ 
+   return false;
+diff --git a/utils/TableGen/TableGenBackends.h b/utils/TableGen/TableGenBackends.h
+index 914cd5a..bdf8b4d 100644
+--- a/utils/TableGen/TableGenBackends.h
++++ b/utils/TableGen/TableGenBackends.h
+@@ -85,6 +85,7 @@ void EmitGlobalISel(RecordKeeper &RK, raw_ostream &OS);
+ void EmitX86EVEX2VEXTables(RecordKeeper &RK, raw_ostream &OS);
+ void EmitX86FoldTables(RecordKeeper &RK, raw_ostream &OS);
+ void EmitRegisterBank(RecordKeeper &RK, raw_ostream &OS);
++void EmitSVMLVariants(RecordKeeper &RK, raw_ostream &OS);
+ 
+ } // End llvm namespace
+ 
+diff --git a/utils/vim/syntax/llvm.vim b/utils/vim/syntax/llvm.vim
+index 42a4cf3..9198a6f 100644
+--- a/utils/vim/syntax/llvm.vim
++++ b/utils/vim/syntax/llvm.vim
+@@ -92,6 +92,7 @@ syn keyword llvmKeyword
+       \ inreg
+       \ inteldialect
+       \ intel_ocl_bicc
++      \ intel_svmlcc
+       \ internal
+       \ linkonce
+       \ linkonce_odr
diff --git a/conda-recipes/llvmdev/meta.yaml b/conda-recipes/llvmdev/meta.yaml
index 0789a009d..08f39df37 100644
--- a/conda-recipes/llvmdev/meta.yaml
+++ b/conda-recipes/llvmdev/meta.yaml
@@ -1,7 +1,18 @@
-{% set shortversion = "7.0" %}
-{% set version = "7.0.0" %}
-{% set sha256 = "8bc1f844e6cbde1b652c19c1edebc1864456fd9c78b8c1bea038e51b363fe222" %}
-{% set build_number = "0" %}
+{% set shortversion = "6.0" %}
+
+{% if ppc64le %}
+
+{% set version = "6.0.1" %}
+{% set sha256 = "b6d6c324f9c71494c0ccaf3dac1f16236d970002b42bb24a6c9e1634f7d0f4e2" %}
+{% set build_number = "1" %}
+
+{% else %}
+
+{% set version = "6.0.0" %}
+{% set sha256 = "1ff53c915b4e761ef400b803f07261ade637b0c269d99569f18040f3dcee4408" %}
+{% set build_number = "5" %}
+
+{% endif %}
 
 package:
   name: llvmdev
@@ -15,11 +26,9 @@ source:
     # http://lists.llvm.org/pipermail/llvm-dev/2016-January/094520.html
     - ../llvm-lto-static.patch   # [win]
     # Intel SVML optimizations
-    # The second part of this patch was published as:
-    # https://reviews.llvm.org/D53035
-    # (the first, as mentioned in the patch itself, was:
-    # https://reviews.llvm.org/D47188)
-    - ../D47188-svml-VF.patch
+    - ../D47188-svml.patch
+    # https://reviews.llvm.org/D44140 Fix LLVM-C symbol export, backport to 6.0.0 from upstream
+    - ../0001-Transforms-Add-missing-header-for-InstructionCombini.patch # [not ppc64le]
     # undefined behavior bug due to Twine usage
     - ../twine_cfg_undefined_behavior.patch
 
diff --git a/conda-recipes/llvmdev_manylinux1/meta.yaml b/conda-recipes/llvmdev_manylinux1/meta.yaml
index bb4e10dd5..03b6e98c8 100644
--- a/conda-recipes/llvmdev_manylinux1/meta.yaml
+++ b/conda-recipes/llvmdev_manylinux1/meta.yaml
@@ -1,8 +1,7 @@
-{% set shortversion = "7.0" %}
-{% set version = "7.0.0" %}
-{% set sha256 = "8bc1f844e6cbde1b652c19c1edebc1864456fd9c78b8c1bea038e51b363fe222" %}
-{% set build_number = "0" %}
-
+{% set shortversion = "6.0" %}
+{% set version = "6.0.0" %}
+{% set sha256 = "1ff53c915b4e761ef400b803f07261ade637b0c269d99569f18040f3dcee4408" %}
+{% set build_number = "2" %}
 
 package:
   name: llvmdev
@@ -16,7 +15,9 @@ source:
     # http://lists.llvm.org/pipermail/llvm-dev/2016-January/094520.html
     - ../llvm-lto-static.patch   # [win]
     # Intel SVML optimizations
-    - ../D47188-svml-VF.patch
+    - ../D47188-svml.patch
+    # https://reviews.llvm.org/D44140 Fix LLVM-C symbol export
+    - ../0001-Transforms-Add-missing-header-for-InstructionCombini.patch
     # undefined behavior bug due to Twine usage
     - ../twine_cfg_undefined_behavior.patch
 
diff --git a/conda-recipes/llvmlite/meta.yaml b/conda-recipes/llvmlite/meta.yaml
index 4e41a91ed..7d61a99bb 100644
--- a/conda-recipes/llvmlite/meta.yaml
+++ b/conda-recipes/llvmlite/meta.yaml
@@ -25,7 +25,7 @@ requirements:
   host:
     - python
     # On channel https://anaconda.org/numba/
-    - llvmdev 7.0*
+    - llvmdev 6.0*
     - vs2015_runtime # [win]
     - enum34         # [py27]
     # llvmdev is built with libz compression support
diff --git a/docs/source/admin-guide/install.rst b/docs/source/admin-guide/install.rst
index a08d47dad..e0beb41f0 100644
--- a/docs/source/admin-guide/install.rst
+++ b/docs/source/admin-guide/install.rst
@@ -73,14 +73,16 @@ The LLVM build process is fully scripted by conda-build_, and the `llvmdev recip
 
 The manual instructions below describe the main steps, but refer to the recipe for details:
 
-#. Download the `LLVM 7.0.0 source code <http://releases.llvm.org/7.0.0/llvm-7.0.0.src.tar.xz>`_.
+#. Download the `LLVM 6.0.0 source code <http://releases.llvm.org/6.0.0/llvm-6.0.0.src.tar.xz>`_.
+(Note that PPC64LE requires LLVM 6.0.1 for specific bug fixes.)
 
 #. Download or git checkout the `llvmlite source code <https://github.com/numba/llvmlite>`_.
 
 #. Decompress the LLVM tar file and apply the following patches from the ``llvmlite/conda-recipes/`` directory:
 
     #. ``llvm-lto-static.patch``: Fix issue with LTO shared library on Windows
-    #. ``D47188-svml-VF.patch``: Add support for vectorized math functions via Intel SVML
+    #. ``D47188-svml.patch``: Add support for vectorized math functions via Intel SVML
+    #. ``0001-Transforms-Add-missing-header-for-InstructionCombini.patch``: Fix release bug with LLVM 6.0.0, skip on LLVM 6.0.1.
     #. ``twine_cfg_undefined_behavior.patch``: Fix obscure memory corruption bug in LLVM that hasn't been fixed in master yet
 
 #. For Linux/macOS:
diff --git a/docs/source/conf.py b/docs/source/conf.py
index 0f70f4b49..787d70b50 100644
--- a/docs/source/conf.py
+++ b/docs/source/conf.py
@@ -58,9 +58,9 @@
 # built documents.
 #
 # The short X.Y version.
-version = '0.25.0'
+version = '0.24.0'
 # The full version, including alpha/beta/rc tags.
-release = '0.25.0'
+release = '0.24.0'
 
 # The language for content autogenerated by Sphinx. Refer to documentation
 # for a list of supported languages.
@@ -301,5 +301,5 @@
 # Example configuration for intersphinx: refer to the Python standard library.
 intersphinx_mapping = {
     'python': ('https://docs.python.org/3', None),
-    'llvm': ('http://llvm.org/releases/7.0.0/docs', None),
+    'llvm': ('http://llvm.org/releases/6.0.0/docs', None),
     }
diff --git a/docs/source/user-guide/ir/index.rst b/docs/source/user-guide/ir/index.rst
index 3eebc5e69..26ed9d053 100644
--- a/docs/source/user-guide/ir/index.rst
+++ b/docs/source/user-guide/ir/index.rst
@@ -18,7 +18,7 @@ construct a pure Python representation of the IR.
 
 To use this module, you should be familiar with the concepts
 in the `LLVM Language Reference
-<http://llvm.org/releases/7.0.0/docs/LangRef.html>`_.
+<http://llvm.org/releases/6.0.0/docs/LangRef.html>`_.
 
 .. toctree::
    :maxdepth: 1
diff --git a/ffi/build.py b/ffi/build.py
index 5933fc3c4..643ff07f2 100755
--- a/ffi/build.py
+++ b/ffi/build.py
@@ -109,9 +109,9 @@ def main_posix(kind, library_ext):
 
     out = out.decode('latin1')
     print(out)
-    if not out.startswith('7.0.'):
+    if not out.startswith('6.0.'):
         msg = (
-            "Building llvmlite requires LLVM 7.0.x. Be sure to "
+            "Building llvmlite requires LLVM 6.0.x. Be sure to "
             "set LLVM_CONFIG to the right executable path.\n"
             "Read the documentation at http://llvmlite.pydata.org/ for more "
             "information about building llvmlite.\n"
diff --git a/llvmlite/tests/test_binding.py b/llvmlite/tests/test_binding.py
index 0b50ca603..0f3468d88 100644
--- a/llvmlite/tests/test_binding.py
+++ b/llvmlite/tests/test_binding.py
@@ -333,7 +333,7 @@ def test_set_option(self):
 
     def test_version(self):
         major, minor, patch = llvm.llvm_version_info
-        self.assertEqual((major, minor), (7, 0))
+        self.assertEqual((major, minor), (6, 0))
         self.assertIn(patch, range(10))
 
     def test_check_jit_execution(self):
@@ -1041,29 +1041,9 @@ def test_run(self):
         orig_asm = str(mod)
         pm.run(mod)
         opt_asm = str(mod)
-        # Quick check that optimizations were run, should get:
-        # define i32 @sum(i32 %.1, i32 %.2) local_unnamed_addr #0 {
-        # %.X = add i32 %.2, %.1
-        # ret i32 %.X
-        # }
-        # where X in %.X is 3 or 4
-        opt_asm_split = opt_asm.splitlines()
-        for idx, l in enumerate(opt_asm_split):
-            if l.strip().startswith('ret i32'):
-                toks = {'%.3', '%.4'}
-                for t in toks:
-                    if t in l:
-                        break
-                else:
-                    raise RuntimeError("expected tokens not found")
-                add_line = opt_asm_split[idx]
-                othertoken = (toks ^ {t}).pop()
-
-                self.assertIn("%.3", orig_asm)
-                self.assertNotIn(othertoken, opt_asm)
-                break
-        else:
-            raise RuntimeError("expected IR not found")
+        # Quick check that optimizations were run
+        self.assertIn("%.3", orig_asm)
+        self.assertNotIn("%.3", opt_asm)
 
 
 class TestFunctionPassManager(BaseTest, PassManagerTestMixin):