diff --git a/llvm/lib/Transforms/Vectorize/LoadStoreVectorizer.cpp b/llvm/lib/Transforms/Vectorize/LoadStoreVectorizer.cpp
index 02ec1d5c259cd61..dd37c95eca61a38 100644
--- a/llvm/lib/Transforms/Vectorize/LoadStoreVectorizer.cpp
+++ b/llvm/lib/Transforms/Vectorize/LoadStoreVectorizer.cpp
@@ -998,10 +998,32 @@ bool Vectorizer::isSafeToMove(
   LLVM_DEBUG(dbgs() << "LSV: isSafeToMove(" << *ChainElem << " -> "
                     << *ChainBegin << ")\n");
 
-  assert(isa<LoadInst>(ChainElem) == IsLoadChain);
+  assert(isa<LoadInst>(ChainElem) == IsLoadChain &&
+         isa<LoadInst>(ChainBegin) == IsLoadChain);
+
   if (ChainElem == ChainBegin)
     return true;
 
+  if constexpr (IsLoadChain) {
+    // If ChainElem depends on ChainBegin, they're not safe to reorder.
+    SmallVector<Instruction *, 8> Worklist;
+    Worklist.emplace_back(ChainElem);
+    while (!Worklist.empty()) {
+      Instruction *I = Worklist.pop_back_val();
+      for (Use &O : I->operands()) {
+        if (isa<PHINode>(O))
+          continue;
+        if (auto *J = dyn_cast<Instruction>(O)) {
+          if (J == ChainBegin) {
+            LLVM_DEBUG(dbgs() << "LSV: dependent loads; not safe to reorder\n");
+            return false;
+          }
+          Worklist.emplace_back(J);
+        }
+      }
+    }
+  }
+
   // Invariant loads can always be reordered; by definition they are not
   // clobbered by stores.
   if (isInvariantLoad(ChainElem))
diff --git a/llvm/test/Transforms/LoadStoreVectorizer/AArch64/pr37865.ll b/llvm/test/Transforms/LoadStoreVectorizer/AArch64/pr37865.ll
index 833e70814c2917f..0beca8c15305f6d 100644
--- a/llvm/test/Transforms/LoadStoreVectorizer/AArch64/pr37865.ll
+++ b/llvm/test/Transforms/LoadStoreVectorizer/AArch64/pr37865.ll
@@ -1,9 +1,22 @@
-; REQUIRES: asserts
-; RUN: not --crash opt -mtriple=aarch64 -passes=load-store-vectorizer \
-; RUN:   -disable-output %s 2>&1 | FileCheck %s
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5
+; RUN: opt -mtriple=aarch64 -passes=load-store-vectorizer -S %s | FileCheck %s
+
+; LSV was attempting to vectorize this earlier, but crashed while re-ordering
+; instructions due to the load-load cycle. Now, the candidate loads are no
+; longer considered safe for reordering.
 
 define i32 @load_cycle(ptr %x) {
-; CHECK: Unexpected cycle while re-ordering instructions
+; CHECK-LABEL: define i32 @load_cycle(
+; CHECK-SAME: ptr [[X:%.*]]) {
+; CHECK-NEXT:  [[ENTRY:.*:]]
+; CHECK-NEXT:    [[GEP_X_1:%.*]] = getelementptr inbounds [2 x i32], ptr [[X]], i32 0, i32 1
+; CHECK-NEXT:    [[LOAD_X_1:%.*]] = load i32, ptr [[GEP_X_1]], align 4
+; CHECK-NEXT:    [[REM:%.*]] = urem i32 [[LOAD_X_1]], 1
+; CHECK-NEXT:    [[GEP_X_2:%.*]] = getelementptr inbounds [2 x i32], ptr [[X]], i32 [[REM]], i32 0
+; CHECK-NEXT:    [[LOAD_X_2:%.*]] = load i32, ptr [[GEP_X_2]], align 4
+; CHECK-NEXT:    [[RET:%.*]] = add i32 [[LOAD_X_2]], [[LOAD_X_1]]
+; CHECK-NEXT:    ret i32 [[RET]]
+;
 entry:
   %gep.x.1 = getelementptr inbounds [2 x i32], ptr %x, i32 0, i32 1
   %load.x.1 = load i32, ptr %gep.x.1
@@ -13,3 +26,61 @@ entry:
   %ret = add i32 %load.x.2, %load.x.1
   ret i32 %ret
 }
+
+define i32 @load_cycle2(ptr %x, i32 %y) {
+; CHECK-LABEL: define i32 @load_cycle2(
+; CHECK-SAME: ptr [[X:%.*]], i32 [[Y:%.*]]) {
+; CHECK-NEXT:  [[ENTRY:.*:]]
+; CHECK-NEXT:    [[GEP_X_1:%.*]] = getelementptr inbounds [2 x i32], ptr [[X]], i32 [[Y]], i32 1
+; CHECK-NEXT:    [[LOAD_X_1:%.*]] = load i32, ptr [[GEP_X_1]], align 4
+; CHECK-NEXT:    [[MUL:%.*]] = mul i32 [[LOAD_X_1]], 2
+; CHECK-NEXT:    [[ADD:%.*]] = add i32 [[Y]], [[MUL]]
+; CHECK-NEXT:    [[SUB_1:%.*]] = sub i32 [[ADD]], [[LOAD_X_1]]
+; CHECK-NEXT:    [[SUB_2:%.*]] = sub i32 [[SUB_1]], [[LOAD_X_1]]
+; CHECK-NEXT:    [[GEP_X_2:%.*]] = getelementptr inbounds [2 x i32], ptr [[X]], i32 [[SUB_2]], i32 0
+; CHECK-NEXT:    [[LOAD_X_2:%.*]] = load i32, ptr [[GEP_X_2]], align 4
+; CHECK-NEXT:    [[RET:%.*]] = add i32 [[LOAD_X_2]], [[LOAD_X_1]]
+; CHECK-NEXT:    ret i32 [[RET]]
+;
+entry:
+  %gep.x.1 = getelementptr inbounds [2 x i32], ptr %x, i32 %y, i32 1
+  %load.x.1 = load i32, ptr %gep.x.1
+  %mul = mul i32 %load.x.1, 2
+  %add = add i32 %y, %mul
+  %sub.1 = sub i32 %add, %load.x.1
+  %sub.2 = sub i32 %sub.1, %load.x.1
+  %gep.x.2 = getelementptr inbounds [2 x i32], ptr %x, i32 %sub.2, i32 0
+  %load.x.2 = load i32, ptr %gep.x.2
+  %ret = add i32 %load.x.2, %load.x.1
+  ret i32 %ret
+}
+
+@global.1 = global i32 0
+@global.2 = global [1 x [3 x i32]] zeroinitializer
+
+define i16 @load_cycle3() {
+; CHECK-LABEL: define i16 @load_cycle3() {
+; CHECK-NEXT:  [[ENTRY:.*:]]
+; CHECK-NEXT:    [[LOAD_1:%.*]] = load i32, ptr @global.1, align 4
+; CHECK-NEXT:    [[UREM_1:%.*]] = urem i32 [[LOAD_1]], 1
+; CHECK-NEXT:    [[GEP_1:%.*]] = getelementptr inbounds [1 x [3 x i32]], ptr @global.2, i32 0, i32 [[UREM_1]]
+; CHECK-NEXT:    [[GEP_2:%.*]] = getelementptr inbounds [3 x i32], ptr [[GEP_1]], i32 0, i32 2
+; CHECK-NEXT:    [[LOAD_2:%.*]] = load i32, ptr [[GEP_2]], align 4
+; CHECK-NEXT:    [[UREM_2:%.*]] = urem i32 [[LOAD_2]], 1
+; CHECK-NEXT:    [[GEP_3:%.*]] = getelementptr inbounds [1 x [3 x i32]], ptr @global.2, i32 0, i32 [[UREM_2]]
+; CHECK-NEXT:    [[GEP_4:%.*]] = getelementptr inbounds [3 x i32], ptr [[GEP_3]], i32 0, i32 1
+; CHECK-NEXT:    [[LOAD_3:%.*]] = load i32, ptr [[GEP_4]], align 4
+; CHECK-NEXT:    ret i16 0
+;
+entry:
+  %load.1 = load i32, ptr @global.1
+  %urem.1 = urem i32 %load.1, 1
+  %gep.1 = getelementptr inbounds [1 x [3 x i32]], ptr @global.2, i32 0, i32 %urem.1
+  %gep.2 = getelementptr inbounds [3 x i32], ptr %gep.1, i32 0, i32 2
+  %load.2 = load i32, ptr %gep.2
+  %urem.2 = urem i32 %load.2, 1
+  %gep.3 = getelementptr inbounds [1 x [3 x i32]], ptr @global.2, i32 0, i32 %urem.2
+  %gep.4 = getelementptr inbounds [3 x i32], ptr %gep.3, i32 0, i32 1
+  %load.3 = load i32, ptr %gep.4
+  ret i16 0
+}