From 5a424741f5bdfe8993989bd9e05b384820228f72 Mon Sep 17 00:00:00 2001
From: Yichao Yu <yyc1992@gmail.com>
Date: Sat, 3 Jun 2017 13:42:07 -0400
Subject: [PATCH] Backport LLVM patches to fix AVX on i686

This fixes a bug in the patch that fixes #19976 causing encoding error on 32bit x86
and segfault when AVX/AVX2 is enabled.

Ref LLVM bug report https://bugs.llvm.org//show_bug.cgi?id=29010
LLVM commit https://github.com/llvm-mirror/llvm/commit/83260f239481dfb40d325cf35005c20eeb767b6c
Also ref where I saw this issue in https://github.com/JuliaLang/julia/pull/21849#issuecomment-305950641
---
 deps/llvm.mk                             |  1 +
 deps/patches/llvm-PR29010-i386-xmm.patch | 80 ++++++++++++++++++++++++
 2 files changed, 81 insertions(+)
 create mode 100644 deps/patches/llvm-PR29010-i386-xmm.patch

diff --git a/deps/llvm.mk b/deps/llvm.mk
index 5e530f4e2cd7e..e8ea59fcd72f4 100644
--- a/deps/llvm.mk
+++ b/deps/llvm.mk
@@ -499,6 +499,7 @@ $(eval $(call LLVM_PATCH,llvm-D28786-callclearance))
 $(eval $(call LLVM_PATCH,llvm-rL293230-icc17-cmake)) # Remove for 4.0
 $(eval $(call LLVM_PATCH,llvm-D32593))
 $(eval $(call LLVM_PATCH,llvm-D33179))
+$(eval $(call LLVM_PATCH,llvm-PR29010-i386-xmm)) # Remove for 4.0
 endif # LLVM_VER
 
 ifeq ($(LLVM_VER),3.7.1)
diff --git a/deps/patches/llvm-PR29010-i386-xmm.patch b/deps/patches/llvm-PR29010-i386-xmm.patch
new file mode 100644
index 0000000000000..b31f70d365cf0
--- /dev/null
+++ b/deps/patches/llvm-PR29010-i386-xmm.patch
@@ -0,0 +1,80 @@
+From 83260f239481dfb40d325cf35005c20eeb767b6c Mon Sep 17 00:00:00 2001
+From: Marina Yatsina <marina.yatsina@intel.com>
+Date: Wed, 17 Aug 2016 19:07:40 +0000
+Subject: [PATCH] Fix for PR29010
+
+This is a fix for https://llvm.org/bugs/show_bug.cgi?id=29010
+Root cause of the bug is that the register class of the machine instruction operand does not fully reflect if this registers that can be allocated.
+Both for i386 and x86_64 the operand's register class is VR128RegClass and thus contains xmm0-xmm15, though in i386 we can only use xmm0-xmm8.
+In order to get the actual allocable registers of the class we need to use RegisterClassInfo.
+
+Differential Revision: https://reviews.llvm.org/D23613
+
+
+
+git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@278954 91177308-0d34-0410-b5e6-96231b3b80d8
+---
+ lib/CodeGen/ExecutionDepsFix.cpp |  6 +++++-
+ test/CodeGen/X86/pr29010.ll      | 12 ++++++++++++
+ 2 files changed, 17 insertions(+), 1 deletion(-)
+ create mode 100644 test/CodeGen/X86/pr29010.ll
+
+diff --git a/lib/CodeGen/ExecutionDepsFix.cpp b/lib/CodeGen/ExecutionDepsFix.cpp
+index 213dd58a31d..2f173f84d73 100644
+--- a/lib/CodeGen/ExecutionDepsFix.cpp
++++ b/lib/CodeGen/ExecutionDepsFix.cpp
+@@ -26,6 +26,7 @@
+ #include "llvm/CodeGen/LivePhysRegs.h"
+ #include "llvm/CodeGen/MachineFunctionPass.h"
+ #include "llvm/CodeGen/MachineRegisterInfo.h"
++#include "llvm/CodeGen/RegisterClassInfo.h"
+ #include "llvm/Support/Allocator.h"
+ #include "llvm/Support/Debug.h"
+ #include "llvm/Support/raw_ostream.h"
+@@ -137,6 +138,7 @@ class ExeDepsFix : public MachineFunctionPass {
+   MachineFunction *MF;
+   const TargetInstrInfo *TII;
+   const TargetRegisterInfo *TRI;
++  RegisterClassInfo RegClassInfo;
+   std::vector<SmallVector<int, 1>> AliasMap;
+   const unsigned NumRegs;
+   LiveReg *LiveRegs;
+@@ -509,7 +511,8 @@ void ExeDepsFix::pickBestRegisterForUndef(MachineInstr *MI, unsigned OpIdx,
+   // max clearance or clearance higher than Pref.
+   unsigned MaxClearance = 0;
+   unsigned MaxClearanceReg = OriginalReg;
+-  for (auto Reg : OpRC->getRegisters()) {
++  ArrayRef<MCPhysReg> Order = RegClassInfo.getOrder(OpRC);
++  for (auto Reg : Order) {
+     assert(AliasMap[Reg].size() == 1 &&
+            "Reg is expected to be mapped to a single index");
+     int RCrx = *regIndices(Reg).begin();
+@@ -785,6 +788,7 @@ bool ExeDepsFix::runOnMachineFunction(MachineFunction &mf) {
+   MF = &mf;
+   TII = MF->getSubtarget().getInstrInfo();
+   TRI = MF->getSubtarget().getRegisterInfo();
++  RegClassInfo.runOnMachineFunction(mf);
+   LiveRegs = nullptr;
+   assert(NumRegs == RC->getNumRegs() && "Bad regclass");
+ 
+diff --git a/test/CodeGen/X86/pr29010.ll b/test/CodeGen/X86/pr29010.ll
+new file mode 100644
+index 00000000000..a2d5ff69a35
+--- /dev/null
++++ b/test/CodeGen/X86/pr29010.ll
+@@ -0,0 +1,12 @@
++; RUN: llc < %s -mtriple=i386-linux -mattr=+avx | FileCheck %s
++
++; In i386 there are only 8 XMMs (xmm0-xmm7), make sure we we are not creating illegal XMM
++define float @only_xmm0_7(i32 %arg) {
++top:
++  tail call void asm sideeffect "", "~{xmm0},~{xmm1},~{xmm2},~{xmm3},~{dirflag},~{fpsr},~{flags}"()
++  tail call void asm sideeffect "", "~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{dirflag},~{fpsr},~{flags}"()
++  %tmp1 = sitofp i32 %arg to float
++  ret float %tmp1
++;CHECK-LABEL:@only_xmm0_7
++;CHECK: vcvtsi2ssl {{.*}}, {{%xmm[0-7]+}}, {{%xmm[0-7]+}}
++}
+-- 
+2.13.0
+