1
   2
   3
   4
   5
   6
   7
   8
   9
  10
  11
  12
  13
  14
  15
  16
  17
  18
  19
  20
  21
  22
  23
  24
  25
  26
  27
  28
  29
  30
  31
  32
  33
  34
  35
  36
  37
  38
  39
  40
  41
  42
  43
  44
  45
  46
  47
  48
  49
  50
  51
  52
  53
  54
  55
  56
  57
  58
  59
  60
  61
  62
  63
  64
  65
  66
  67
  68
  69
  70
  71
  72
  73
  74
  75
  76
  77
  78
  79
  80
  81
  82
  83
  84
  85
  86
  87
  88
  89
  90
  91
  92
  93
  94
  95
  96
  97
  98
  99
 100
 101
 102
 103
 104
 105
 106
 107
 108
 109
 110
 111
 112
 113
 114
 115
 116
 117
 118
 119
 120
 121
 122
 123
 124
 125
 126
 127
 128
 129
 130
 131
 132
 133
 134
 135
 136
 137
 138
 139
 140
 141
 142
 143
 144
 145
 146
 147
 148
 149
 150
 151
 152
 153
 154
 155
 156
 157
 158
 159
 160
 161
 162
 163
 164
 165
 166
 167
 168
 169
 170
 171
 172
 173
 174
 175
 176
 177
 178
 179
 180
 181
 182
 183
 184
 185
 186
 187
 188
 189
 190
 191
 192
 193
 194
 195
 196
 197
 198
 199
 200
 201
 202
 203
 204
 205
 206
 207
 208
 209
 210
 211
 212
 213
 214
 215
 216
 217
 218
 219
 220
 221
 222
 223
 224
 225
 226
 227
 228
 229
 230
 231
 232
 233
 234
 235
 236
 237
 238
 239
 240
 241
 242
 243
 244
 245
 246
 247
 248
 249
 250
 251
 252
 253
 254
 255
 256
 257
 258
 259
 260
 261
 262
 263
 264
 265
 266
 267
 268
 269
 270
 271
 272
 273
 274
 275
 276
 277
 278
 279
 280
 281
 282
 283
 284
 285
 286
 287
 288
 289
 290
 291
 292
 293
 294
 295
 296
 297
 298
 299
 300
 301
 302
 303
 304
 305
 306
 307
 308
 309
 310
 311
 312
 313
 314
 315
 316
 317
 318
 319
 320
 321
 322
 323
 324
 325
 326
 327
 328
 329
 330
 331
 332
 333
 334
 335
 336
 337
 338
 339
 340
 341
 342
 343
 344
 345
 346
 347
 348
 349
 350
 351
 352
 353
 354
 355
 356
 357
 358
 359
 360
 361
 362
 363
 364
 365
 366
 367
 368
 369
 370
 371
 372
 373
 374
 375
 376
 377
 378
 379
 380
 381
 382
 383
 384
 385
 386
 387
 388
 389
 390
 391
 392
 393
 394
 395
 396
 397
 398
 399
 400
 401
 402
 403
 404
 405
 406
 407
 408
 409
 410
 411
 412
 413
 414
 415
 416
 417
 418
 419
 420
 421
 422
 423
 424
 425
 426
 427
 428
 429
 430
 431
 432
 433
 434
 435
 436
 437
 438
 439
 440
 441
 442
 443
 444
 445
 446
 447
 448
 449
 450
 451
 452
 453
 454
 455
 456
 457
 458
 459
 460
 461
 462
 463
 464
 465
 466
 467
 468
 469
 470
 471
 472
 473
 474
 475
 476
 477
 478
 479
 480
 481
 482
 483
 484
 485
 486
 487
 488
 489
 490
 491
 492
 493
 494
 495
 496
 497
 498
 499
 500
 501
 502
 503
 504
 505
 506
 507
 508
 509
 510
 511
 512
 513
 514
 515
 516
 517
 518
 519
 520
 521
 522
 523
 524
 525
 526
 527
 528
 529
 530
 531
 532
 533
 534
 535
 536
 537
 538
 539
 540
 541
 542
 543
 544
 545
 546
 547
 548
 549
 550
 551
 552
 553
 554
 555
 556
 557
 558
 559
 560
 561
 562
 563
 564
 565
 566
 567
 568
 569
 570
 571
 572
 573
 574
 575
 576
 577
 578
 579
 580
 581
 582
 583
 584
 585
 586
 587
 588
 589
 590
 591
 592
 593
 594
 595
 596
 597
 598
 599
 600
 601
 602
 603
 604
 605
 606
 607
 608
 609
 610
 611
 612
 613
 614
 615
 616
 617
 618
 619
 620
 621
 622
 623
 624
 625
 626
 627
 628
 629
 630
 631
 632
 633
 634
 635
 636
 637
 638
 639
 640
 641
 642
 643
 644
 645
 646
 647
 648
 649
 650
 651
 652
 653
 654
 655
 656
 657
 658
 659
 660
 661
 662
 663
 664
 665
 666
 667
 668
 669
 670
 671
 672
 673
 674
 675
 676
 677
 678
 679
 680
 681
 682
 683
 684
 685
 686
 687
 688
 689
 690
 691
 692
 693
 694
 695
 696
 697
 698
 699
 700
 701
 702
 703
 704
 705
 706
 707
 708
 709
 710
 711
 712
 713
 714
 715
 716
 717
 718
 719
 720
 721
 722
 723
 724
 725
 726
 727
 728
 729
 730
 731
 732
 733
 734
 735
 736
 737
 738
 739
 740
 741
 742
 743
 744
 745
 746
 747
 748
 749
 750
 751
 752
 753
 754
 755
 756
 757
 758
 759
 760
 761
 762
 763
 764
 765
 766
 767
 768
 769
 770
 771
 772
 773
 774
 775
 776
 777
 778
 779
 780
 781
 782
 783
 784
 785
 786
 787
 788
 789
 790
 791
 792
 793
 794
 795
 796
 797
 798
 799
 800
 801
 802
 803
 804
 805
 806
 807
 808
 809
 810
 811
 812
 813
 814
 815
 816
 817
 818
 819
 820
 821
 822
 823
 824
 825
 826
 827
 828
 829
 830
 831
 832
 833
 834
 835
 836
 837
 838
 839
 840
 841
 842
 843
 844
 845
 846
 847
 848
 849
 850
 851
 852
 853
 854
 855
 856
 857
 858
 859
 860
 861
 862
 863
 864
 865
 866
 867
 868
 869
 870
 871
 872
 873
 874
 875
 876
 877
 878
 879
 880
 881
 882
 883
 884
 885
 886
 887
 888
 889
 890
 891
 892
 893
 894
 895
 896
 897
 898
 899
 900
 901
 902
 903
 904
 905
 906
 907
 908
 909
 910
 911
 912
 913
 914
 915
 916
 917
 918
 919
 920
 921
 922
 923
 924
 925
 926
 927
 928
 929
 930
 931
 932
 933
 934
 935
 936
 937
 938
 939
 940
 941
 942
 943
 944
 945
 946
 947
 948
 949
 950
 951
 952
 953
 954
 955
 956
 957
 958
 959
 960
 961
 962
 963
 964
 965
 966
 967
 968
 969
 970
 971
 972
 973
 974
 975
 976
 977
 978
 979
 980
 981
 982
 983
 984
 985
 986
 987
 988
 989
 990
 991
 992
 993
 994
 995
 996
 997
 998
 999
1000
1001
1002
1003
1004
1005
1006
1007
1008
1009
1010
1011
1012
1013
1014
1015
1016
1017
1018
1019
1020
1021
1022
1023
1024
1025
1026
1027
1028
1029
1030
1031
1032
1033
1034
1035
1036
1037
1038
1039
1040
1041
1042
1043
1044
1045
1046
1047
1048
1049
1050
1051
1052
1053
1054
1055
1056
1057
1058
1059
1060
1061
1062
1063
1064
1065
1066
1067
1068
1069
1070
1071
1072
1073
1074
1075
1076
1077
1078
1079
1080
1081
1082
1083
1084
/*
Bullet Continuous Collision Detection and Physics Library
Copyright (c) 2003-2006 Erwin Coumans  http://continuousphysics.com/Bullet/

This software is provided 'as-is', without any express or implied warranty.
In no event will the authors be held liable for any damages arising from the use of this software.
Permission is granted to anyone to use this software for any purpose,
including commercial applications, and to alter it and redistribute it freely,
subject to the following restrictions:

1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required.
2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software.
3. This notice may not be removed or altered from any source distribution.
*/

#include "btBatchedConstraints.h"

#include "LinearMath/btIDebugDraw.h"
#include "LinearMath/btMinMax.h"
#include "LinearMath/btStackAlloc.h"
#include "LinearMath/btQuickprof.h"

#include <string.h>  //for memset

#include <cmath>

const int kNoMerge = -1;

bool btBatchedConstraints::s_debugDrawBatches = false;

struct btBatchedConstraintInfo
{
	int constraintIndex;
	int numConstraintRows;
	int bodyIds[2];
};

struct btBatchInfo
{
	int numConstraints;
	int mergeIndex;

	btBatchInfo() : numConstraints(0), mergeIndex(kNoMerge) {}
};

bool btBatchedConstraints::validate(btConstraintArray* constraints, const btAlignedObjectArray<btSolverBody>& bodies) const
{
	//
	// validate: for debugging only. Verify coloring of bodies, that no body is touched by more than one batch in any given phase
	//
	int errors = 0;
	const int kUnassignedBatch = -1;

	btAlignedObjectArray<int> bodyBatchId;
	for (int iPhase = 0; iPhase < m_phases.size(); ++iPhase)
	{
		bodyBatchId.resizeNoInitialize(0);
		bodyBatchId.resize(bodies.size(), kUnassignedBatch);
		const Range& phase = m_phases[iPhase];
		for (int iBatch = phase.begin; iBatch < phase.end; ++iBatch)
		{
			const Range& batch = m_batches[iBatch];
			for (int iiCons = batch.begin; iiCons < batch.end; ++iiCons)
			{
				int iCons = m_constraintIndices[iiCons];
				const btSolverConstraint& cons = constraints->at(iCons);
				const btSolverBody& bodyA = bodies[cons.m_solverBodyIdA];
				const btSolverBody& bodyB = bodies[cons.m_solverBodyIdB];
				if (!bodyA.internalGetInvMass().isZero())
				{
					int thisBodyBatchId = bodyBatchId[cons.m_solverBodyIdA];
					if (thisBodyBatchId == kUnassignedBatch)
					{
						bodyBatchId[cons.m_solverBodyIdA] = iBatch;
					}
					else if (thisBodyBatchId != iBatch)
					{
						btAssert(!"dynamic body is used in 2 different batches in the same phase");
						errors++;
					}
				}
				if (!bodyB.internalGetInvMass().isZero())
				{
					int thisBodyBatchId = bodyBatchId[cons.m_solverBodyIdB];
					if (thisBodyBatchId == kUnassignedBatch)
					{
						bodyBatchId[cons.m_solverBodyIdB] = iBatch;
					}
					else if (thisBodyBatchId != iBatch)
					{
						btAssert(!"dynamic body is used in 2 different batches in the same phase");
						errors++;
					}
				}
			}
		}
	}
	return errors == 0;
}

static void debugDrawSingleBatch(const btBatchedConstraints* bc,
								 btConstraintArray* constraints,
								 const btAlignedObjectArray<btSolverBody>& bodies,
								 int iBatch,
								 const btVector3& color,
								 const btVector3& offset)
{
	if (bc && bc->m_debugDrawer && iBatch < bc->m_batches.size())
	{
		const btBatchedConstraints::Range& b = bc->m_batches[iBatch];
		for (int iiCon = b.begin; iiCon < b.end; ++iiCon)
		{
			int iCon = bc->m_constraintIndices[iiCon];
			const btSolverConstraint& con = constraints->at(iCon);
			int iBody0 = con.m_solverBodyIdA;
			int iBody1 = con.m_solverBodyIdB;
			btVector3 pos0 = bodies[iBody0].getWorldTransform().getOrigin() + offset;
			btVector3 pos1 = bodies[iBody1].getWorldTransform().getOrigin() + offset;
			bc->m_debugDrawer->drawLine(pos0, pos1, color);
		}
	}
}

static void debugDrawPhase(const btBatchedConstraints* bc,
						   btConstraintArray* constraints,
						   const btAlignedObjectArray<btSolverBody>& bodies,
						   int iPhase,
						   const btVector3& color0,
						   const btVector3& color1,
						   const btVector3& offset)
{
	BT_PROFILE("debugDrawPhase");
	if (bc && bc->m_debugDrawer && iPhase < bc->m_phases.size())
	{
		const btBatchedConstraints::Range& phase = bc->m_phases[iPhase];
		for (int iBatch = phase.begin; iBatch < phase.end; ++iBatch)
		{
			float tt = float(iBatch - phase.begin) / float(btMax(1, phase.end - phase.begin - 1));
			btVector3 col = lerp(color0, color1, tt);
			debugDrawSingleBatch(bc, constraints, bodies, iBatch, col, offset);
		}
	}
}

static void debugDrawAllBatches(const btBatchedConstraints* bc,
								btConstraintArray* constraints,
								const btAlignedObjectArray<btSolverBody>& bodies)
{
	BT_PROFILE("debugDrawAllBatches");
	if (bc && bc->m_debugDrawer && bc->m_phases.size() > 0)
	{
		btVector3 bboxMin(BT_LARGE_FLOAT, BT_LARGE_FLOAT, BT_LARGE_FLOAT);
		btVector3 bboxMax = -bboxMin;
		for (int iBody = 0; iBody < bodies.size(); ++iBody)
		{
			const btVector3& pos = bodies[iBody].getWorldTransform().getOrigin();
			bboxMin.setMin(pos);
			bboxMax.setMax(pos);
		}
		btVector3 bboxExtent = bboxMax - bboxMin;
		btVector3 offsetBase = btVector3(0, bboxExtent.y() * 1.1f, 0);
		btVector3 offsetStep = btVector3(0, 0, bboxExtent.z() * 1.1f);
		int numPhases = bc->m_phases.size();
		for (int iPhase = 0; iPhase < numPhases; ++iPhase)
		{
			float b = float(iPhase) / float(numPhases - 1);
			btVector3 color0 = btVector3(1, 0, b);
			btVector3 color1 = btVector3(0, 1, b);
			btVector3 offset = offsetBase + offsetStep * (float(iPhase) - float(numPhases - 1) * 0.5);
			debugDrawPhase(bc, constraints, bodies, iPhase, color0, color1, offset);
		}
	}
}

static void initBatchedBodyDynamicFlags(btAlignedObjectArray<bool>* outBodyDynamicFlags, const btAlignedObjectArray<btSolverBody>& bodies)
{
	BT_PROFILE("initBatchedBodyDynamicFlags");
	btAlignedObjectArray<bool>& bodyDynamicFlags = *outBodyDynamicFlags;
	bodyDynamicFlags.resizeNoInitialize(bodies.size());
	for (int i = 0; i < bodies.size(); ++i)
	{
		const btSolverBody& body = bodies[i];
		bodyDynamicFlags[i] = (body.internalGetInvMass().x() > btScalar(0));
	}
}

static int runLengthEncodeConstraintInfo(btBatchedConstraintInfo* outConInfos, int numConstraints)
{
	BT_PROFILE("runLengthEncodeConstraintInfo");
	// detect and run-length encode constraint rows that repeat the same bodies
	int iDest = 0;
	int iSrc = 0;
	while (iSrc < numConstraints)
	{
		const btBatchedConstraintInfo& srcConInfo = outConInfos[iSrc];
		btBatchedConstraintInfo& conInfo = outConInfos[iDest];
		conInfo.constraintIndex = iSrc;
		conInfo.bodyIds[0] = srcConInfo.bodyIds[0];
		conInfo.bodyIds[1] = srcConInfo.bodyIds[1];
		while (iSrc < numConstraints && outConInfos[iSrc].bodyIds[0] == srcConInfo.bodyIds[0] && outConInfos[iSrc].bodyIds[1] == srcConInfo.bodyIds[1])
		{
			++iSrc;
		}
		conInfo.numConstraintRows = iSrc - conInfo.constraintIndex;
		++iDest;
	}
	return iDest;
}

struct ReadSolverConstraintsLoop : public btIParallelForBody
{
	btBatchedConstraintInfo* m_outConInfos;
	btConstraintArray* m_constraints;

	ReadSolverConstraintsLoop(btBatchedConstraintInfo* outConInfos, btConstraintArray* constraints)
	{
		m_outConInfos = outConInfos;
		m_constraints = constraints;
	}
	void forLoop(int iBegin, int iEnd) const BT_OVERRIDE
	{
		for (int i = iBegin; i < iEnd; ++i)
		{
			btBatchedConstraintInfo& conInfo = m_outConInfos[i];
			const btSolverConstraint& con = m_constraints->at(i);
			conInfo.bodyIds[0] = con.m_solverBodyIdA;
			conInfo.bodyIds[1] = con.m_solverBodyIdB;
			conInfo.constraintIndex = i;
			conInfo.numConstraintRows = 1;
		}
	}
};

static int initBatchedConstraintInfo(btBatchedConstraintInfo* outConInfos, btConstraintArray* constraints)
{
	BT_PROFILE("initBatchedConstraintInfo");
	int numConstraints = constraints->size();
	bool inParallel = true;<--- Assignment 'inParallel=true', assigned value is 1
	if (inParallel)<--- Condition 'inParallel' is always true
	{
		ReadSolverConstraintsLoop loop(outConInfos, constraints);
		int grainSize = 1200;
		btParallelFor(0, numConstraints, grainSize, loop);
	}
	else
	{
		for (int i = 0; i < numConstraints; ++i)
		{
			btBatchedConstraintInfo& conInfo = outConInfos[i];
			const btSolverConstraint& con = constraints->at(i);
			conInfo.bodyIds[0] = con.m_solverBodyIdA;
			conInfo.bodyIds[1] = con.m_solverBodyIdB;
			conInfo.constraintIndex = i;
			conInfo.numConstraintRows = 1;
		}
	}
	bool useRunLengthEncoding = true;<--- Assignment 'useRunLengthEncoding=true', assigned value is 1
	if (useRunLengthEncoding)<--- Condition 'useRunLengthEncoding' is always true
	{
		numConstraints = runLengthEncodeConstraintInfo(outConInfos, numConstraints);
	}
	return numConstraints;
}

static void expandConstraintRowsInPlace(int* constraintBatchIds, const btBatchedConstraintInfo* conInfos, int numConstraints, int numConstraintRows)
{
	BT_PROFILE("expandConstraintRowsInPlace");
	if (numConstraintRows > numConstraints)
	{
		// we walk the array in reverse to avoid overwriteing
		for (int iCon = numConstraints - 1; iCon >= 0; --iCon)
		{
			const btBatchedConstraintInfo& conInfo = conInfos[iCon];
			int iBatch = constraintBatchIds[iCon];
			for (int i = conInfo.numConstraintRows - 1; i >= 0; --i)
			{
				int iDest = conInfo.constraintIndex + i;
				btAssert(iDest >= iCon);
				btAssert(iDest >= 0 && iDest < numConstraintRows);
				constraintBatchIds[iDest] = iBatch;
			}
		}
	}
}

static void expandConstraintRows(int* destConstraintBatchIds, const int* srcConstraintBatchIds, const btBatchedConstraintInfo* conInfos, int numConstraints, int numConstraintRows)
{
	BT_PROFILE("expandConstraintRows");
	for (int iCon = 0; iCon < numConstraints; ++iCon)
	{
		const btBatchedConstraintInfo& conInfo = conInfos[iCon];
		int iBatch = srcConstraintBatchIds[iCon];
		for (int i = 0; i < conInfo.numConstraintRows; ++i)
		{
			int iDest = conInfo.constraintIndex + i;
			btAssert(iDest >= iCon);
			btAssert(iDest >= 0 && iDest < numConstraintRows);
			destConstraintBatchIds[iDest] = iBatch;
		}
	}
}

struct ExpandConstraintRowsLoop : public btIParallelForBody
{
	int* m_destConstraintBatchIds;
	const int* m_srcConstraintBatchIds;
	const btBatchedConstraintInfo* m_conInfos;
	int m_numConstraintRows;

	ExpandConstraintRowsLoop(int* destConstraintBatchIds, const int* srcConstraintBatchIds, const btBatchedConstraintInfo* conInfos, int numConstraintRows)
	{
		m_destConstraintBatchIds = destConstraintBatchIds;
		m_srcConstraintBatchIds = srcConstraintBatchIds;
		m_conInfos = conInfos;
		m_numConstraintRows = numConstraintRows;
	}
	void forLoop(int iBegin, int iEnd) const BT_OVERRIDE
	{
		expandConstraintRows(m_destConstraintBatchIds, m_srcConstraintBatchIds + iBegin, m_conInfos + iBegin, iEnd - iBegin, m_numConstraintRows);
	}
};

static void expandConstraintRowsMt(int* destConstraintBatchIds, const int* srcConstraintBatchIds, const btBatchedConstraintInfo* conInfos, int numConstraints, int numConstraintRows)
{
	BT_PROFILE("expandConstraintRowsMt");
	ExpandConstraintRowsLoop loop(destConstraintBatchIds, srcConstraintBatchIds, conInfos, numConstraintRows);
	int grainSize = 600;
	btParallelFor(0, numConstraints, grainSize, loop);
}

static void initBatchedConstraintInfoArray(btAlignedObjectArray<btBatchedConstraintInfo>* outConInfos, btConstraintArray* constraints)
{
	BT_PROFILE("initBatchedConstraintInfoArray");
	btAlignedObjectArray<btBatchedConstraintInfo>& conInfos = *outConInfos;
	int numConstraints = constraints->size();
	conInfos.resizeNoInitialize(numConstraints);

	int newSize = initBatchedConstraintInfo(&outConInfos->at(0), constraints);
	conInfos.resizeNoInitialize(newSize);
}

static void mergeSmallBatches(btBatchInfo* batches, int iBeginBatch, int iEndBatch, int minBatchSize, int maxBatchSize)
{
	BT_PROFILE("mergeSmallBatches");
	for (int iBatch = iEndBatch - 1; iBatch >= iBeginBatch; --iBatch)
	{
		btBatchInfo& batch = batches[iBatch];
		if (batch.mergeIndex == kNoMerge && batch.numConstraints > 0 && batch.numConstraints < minBatchSize)
		{
			for (int iDestBatch = iBatch - 1; iDestBatch >= iBeginBatch; --iDestBatch)
			{
				btBatchInfo& destBatch = batches[iDestBatch];
				if (destBatch.mergeIndex == kNoMerge && (destBatch.numConstraints + batch.numConstraints) < maxBatchSize)
				{
					destBatch.numConstraints += batch.numConstraints;
					batch.numConstraints = 0;
					batch.mergeIndex = iDestBatch;
					break;
				}
			}
		}
	}
	// flatten mergeIndexes
	// e.g. in case where A was merged into B and then B was merged into C, we need A to point to C instead of B
	// Note: loop goes forward through batches because batches always merge from higher indexes to lower,
	//     so by going from low to high it reduces the amount of trail-following
	for (int iBatch = iBeginBatch; iBatch < iEndBatch; ++iBatch)
	{
		btBatchInfo& batch = batches[iBatch];
		if (batch.mergeIndex != kNoMerge)
		{
			int iMergeDest = batches[batch.mergeIndex].mergeIndex;
			// follow trail of merges to the end
			while (iMergeDest != kNoMerge)
			{
				int iNext = batches[iMergeDest].mergeIndex;
				if (iNext == kNoMerge)
				{
					batch.mergeIndex = iMergeDest;
					break;
				}
				iMergeDest = iNext;
			}
		}
	}
}

static void updateConstraintBatchIdsForMerges(int* constraintBatchIds, int numConstraints, const btBatchInfo* batches, int numBatches)
{
	BT_PROFILE("updateConstraintBatchIdsForMerges");
	// update batchIds to account for merges
	for (int i = 0; i < numConstraints; ++i)
	{
		int iBatch = constraintBatchIds[i];
		btAssert(iBatch < numBatches);
		// if this constraint references a batch that was merged into another batch
		if (batches[iBatch].mergeIndex != kNoMerge)
		{
			// update batchId
			constraintBatchIds[i] = batches[iBatch].mergeIndex;
		}
	}
}

struct UpdateConstraintBatchIdsForMergesLoop : public btIParallelForBody
{
	int* m_constraintBatchIds;
	const btBatchInfo* m_batches;
	int m_numBatches;

	UpdateConstraintBatchIdsForMergesLoop(int* constraintBatchIds, const btBatchInfo* batches, int numBatches)
	{
		m_constraintBatchIds = constraintBatchIds;
		m_batches = batches;
		m_numBatches = numBatches;
	}
	void forLoop(int iBegin, int iEnd) const BT_OVERRIDE
	{
		BT_PROFILE("UpdateConstraintBatchIdsForMergesLoop");
		updateConstraintBatchIdsForMerges(m_constraintBatchIds + iBegin, iEnd - iBegin, m_batches, m_numBatches);
	}
};

static void updateConstraintBatchIdsForMergesMt(int* constraintBatchIds, int numConstraints, const btBatchInfo* batches, int numBatches)
{
	BT_PROFILE("updateConstraintBatchIdsForMergesMt");
	UpdateConstraintBatchIdsForMergesLoop loop(constraintBatchIds, batches, numBatches);
	int grainSize = 800;
	btParallelFor(0, numConstraints, grainSize, loop);
}

inline bool BatchCompare(const btBatchedConstraints::Range& a, const btBatchedConstraints::Range& b)
{
	int lenA = a.end - a.begin;
	int lenB = b.end - b.begin;
	return lenA > lenB;
}

static void writeOutConstraintIndicesForRangeOfBatches(btBatchedConstraints* bc,
													   const int* constraintBatchIds,
													   int numConstraints,
													   int* constraintIdPerBatch,
													   int batchBegin,
													   int batchEnd)
{
	BT_PROFILE("writeOutConstraintIndicesForRangeOfBatches");
	for (int iCon = 0; iCon < numConstraints; ++iCon)
	{
		int iBatch = constraintBatchIds[iCon];
		if (iBatch >= batchBegin && iBatch < batchEnd)
		{
			int iDestCon = constraintIdPerBatch[iBatch];
			constraintIdPerBatch[iBatch] = iDestCon + 1;
			bc->m_constraintIndices[iDestCon] = iCon;
		}
	}
}

struct WriteOutConstraintIndicesLoop : public btIParallelForBody
{
	btBatchedConstraints* m_batchedConstraints;
	const int* m_constraintBatchIds;
	int m_numConstraints;
	int* m_constraintIdPerBatch;
	int m_maxNumBatchesPerPhase;

	WriteOutConstraintIndicesLoop(btBatchedConstraints* bc, const int* constraintBatchIds, int numConstraints, int* constraintIdPerBatch, int maxNumBatchesPerPhase)
	{
		m_batchedConstraints = bc;
		m_constraintBatchIds = constraintBatchIds;
		m_numConstraints = numConstraints;
		m_constraintIdPerBatch = constraintIdPerBatch;
		m_maxNumBatchesPerPhase = maxNumBatchesPerPhase;
	}
	void forLoop(int iBegin, int iEnd) const BT_OVERRIDE
	{
		BT_PROFILE("WriteOutConstraintIndicesLoop");
		int batchBegin = iBegin * m_maxNumBatchesPerPhase;
		int batchEnd = iEnd * m_maxNumBatchesPerPhase;
		writeOutConstraintIndicesForRangeOfBatches(m_batchedConstraints,
												   m_constraintBatchIds,
												   m_numConstraints,
												   m_constraintIdPerBatch,
												   batchBegin,
												   batchEnd);
	}
};

static void writeOutConstraintIndicesMt(btBatchedConstraints* bc,
										const int* constraintBatchIds,
										int numConstraints,
										int* constraintIdPerBatch,
										int maxNumBatchesPerPhase,
										int numPhases)
{
	BT_PROFILE("writeOutConstraintIndicesMt");
	bool inParallel = true;<--- Assignment 'inParallel=true', assigned value is 1
	if (inParallel)<--- Condition 'inParallel' is always true
	{
		WriteOutConstraintIndicesLoop loop(bc, constraintBatchIds, numConstraints, constraintIdPerBatch, maxNumBatchesPerPhase);
		btParallelFor(0, numPhases, 1, loop);
	}
	else
	{
		for (int iCon = 0; iCon < numConstraints; ++iCon)
		{
			int iBatch = constraintBatchIds[iCon];
			int iDestCon = constraintIdPerBatch[iBatch];
			constraintIdPerBatch[iBatch] = iDestCon + 1;
			bc->m_constraintIndices[iDestCon] = iCon;
		}
	}
}

static void writeGrainSizes(btBatchedConstraints* bc)
{
	typedef btBatchedConstraints::Range Range;
	int numPhases = bc->m_phases.size();
	bc->m_phaseGrainSize.resizeNoInitialize(numPhases);
	int numThreads = btGetTaskScheduler()->getNumThreads();
	for (int iPhase = 0; iPhase < numPhases; ++iPhase)
	{
		const Range& phase = bc->m_phases[iPhase];
		int numBatches = phase.end - phase.begin;
		float grainSize = std::floor((0.25f * numBatches / float(numThreads)) + 0.0f);
		bc->m_phaseGrainSize[iPhase] = btMax(1, int(grainSize));
	}
}

static void writeOutBatches(btBatchedConstraints* bc,
							const int* constraintBatchIds,
							int numConstraints,<--- Shadowed declaration
							const btBatchInfo* batches,
							int* batchWork,
							int maxNumBatchesPerPhase,
							int numPhases)
{
	BT_PROFILE("writeOutBatches");
	typedef btBatchedConstraints::Range Range;
	bc->m_constraintIndices.reserve(numConstraints);
	bc->m_batches.resizeNoInitialize(0);
	bc->m_phases.resizeNoInitialize(0);

	//int maxNumBatches = numPhases * maxNumBatchesPerPhase;
	{
		int* constraintIdPerBatch = batchWork;  // for each batch, keep an index into the next available slot in the m_constraintIndices array
		int iConstraint = 0;
		for (int iPhase = 0; iPhase < numPhases; ++iPhase)
		{
			int curPhaseBegin = bc->m_batches.size();
			int iBegin = iPhase * maxNumBatchesPerPhase;
			int iEnd = iBegin + maxNumBatchesPerPhase;
			for (int i = iBegin; i < iEnd; ++i)
			{
				const btBatchInfo& batch = batches[i];
				int curBatchBegin = iConstraint;
				constraintIdPerBatch[i] = curBatchBegin;  // record the start of each batch in m_constraintIndices array
				int numConstraints = batch.numConstraints;<--- Shadow variable
				iConstraint += numConstraints;
				if (numConstraints > 0)
				{
					bc->m_batches.push_back(Range(curBatchBegin, iConstraint));
				}
			}
			// if any batches were emitted this phase,
			if (bc->m_batches.size() > curPhaseBegin)
			{
				// output phase
				bc->m_phases.push_back(Range(curPhaseBegin, bc->m_batches.size()));
			}
		}

		btAssert(iConstraint == numConstraints);
		bc->m_constraintIndices.resizeNoInitialize(numConstraints);
		writeOutConstraintIndicesMt(bc, constraintBatchIds, numConstraints, constraintIdPerBatch, maxNumBatchesPerPhase, numPhases);
	}
	// for each phase
	for (int iPhase = 0; iPhase < bc->m_phases.size(); ++iPhase)
	{
		// sort the batches from largest to smallest (can be helpful to some task schedulers)
		const Range& curBatches = bc->m_phases[iPhase];
		bc->m_batches.quickSortInternal(BatchCompare, curBatches.begin, curBatches.end - 1);
	}
	bc->m_phaseOrder.resize(bc->m_phases.size());
	for (int i = 0; i < bc->m_phases.size(); ++i)
	{
		bc->m_phaseOrder[i] = i;
	}
	writeGrainSizes(bc);
}

//
// PreallocatedMemoryHelper -- helper object for allocating a number of chunks of memory in a single contiguous block.
//                             It is generally more efficient to do a single larger allocation than many smaller allocations.
//
// Example Usage:
//
//  btVector3* bodyPositions = NULL;
//  btBatchedConstraintInfo* conInfos = NULL;
//  {
//    PreallocatedMemoryHelper<8> memHelper;
//    memHelper.addChunk( (void**) &bodyPositions, sizeof( btVector3 ) * bodies.size() );
//    memHelper.addChunk( (void**) &conInfos, sizeof( btBatchedConstraintInfo ) * numConstraints );
//    void* memPtr = malloc( memHelper.getSizeToAllocate() );  // allocate the memory
//    memHelper.setChunkPointers( memPtr );  // update pointers to chunks
//  }
template <int N>
class PreallocatedMemoryHelper
{
	struct Chunk
	{
		void** ptr;
		size_t size;
	};
	Chunk m_chunks[N];
	int m_numChunks;

public:
	PreallocatedMemoryHelper() { m_numChunks = 0; }<--- Member variable 'PreallocatedMemoryHelper < 10 >::m_chunks' is not initialized in the constructor.
	void addChunk(void** ptr, size_t sz)
	{
		btAssert(m_numChunks < N);
		if (m_numChunks < N)
		{
			Chunk& chunk = m_chunks[m_numChunks];
			chunk.ptr = ptr;
			chunk.size = sz;
			m_numChunks++;
		}
	}
	size_t getSizeToAllocate() const
	{
		size_t totalSize = 0;
		for (int i = 0; i < m_numChunks; ++i)
		{
			totalSize += m_chunks[i].size;
		}
		return totalSize;
	}
	void setChunkPointers(void* mem) const
	{
		size_t totalSize = 0;
		for (int i = 0; i < m_numChunks; ++i)
		{
			const Chunk& chunk = m_chunks[i];
			char* chunkPtr = static_cast<char*>(mem) + totalSize;
			*chunk.ptr = chunkPtr;
			totalSize += chunk.size;
		}
	}
};

static btVector3 findMaxDynamicConstraintExtent(
	btVector3* bodyPositions,
	bool* bodyDynamicFlags,
	btBatchedConstraintInfo* conInfos,
	int numConstraints,
	int numBodies)
{
	BT_PROFILE("findMaxDynamicConstraintExtent");
	btVector3 consExtent = btVector3(1, 1, 1) * 0.001;
	for (int iCon = 0; iCon < numConstraints; ++iCon)
	{
		const btBatchedConstraintInfo& con = conInfos[iCon];
		int iBody0 = con.bodyIds[0];
		int iBody1 = con.bodyIds[1];
		btAssert(iBody0 >= 0 && iBody0 < numBodies);
		btAssert(iBody1 >= 0 && iBody1 < numBodies);
		// is it a dynamic constraint?
		if (bodyDynamicFlags[iBody0] && bodyDynamicFlags[iBody1])
		{
			btVector3 delta = bodyPositions[iBody1] - bodyPositions[iBody0];
			consExtent.setMax(delta.absolute());
		}
	}
	return consExtent;
}

struct btIntVec3
{
	int m_ints[3];

	SIMD_FORCE_INLINE const int& operator[](int i) const { return m_ints[i]; }
	SIMD_FORCE_INLINE int& operator[](int i) { return m_ints[i]; }
};

struct AssignConstraintsToGridBatchesParams
{
	bool* bodyDynamicFlags;
	btIntVec3* bodyGridCoords;
	int numBodies;
	btBatchedConstraintInfo* conInfos;
	int* constraintBatchIds;
	btIntVec3 gridChunkDim;
	int maxNumBatchesPerPhase;
	int numPhases;
	int phaseMask;

	AssignConstraintsToGridBatchesParams()
	{
		memset(this, 0, sizeof(*this));
	}
};

static void assignConstraintsToGridBatches(const AssignConstraintsToGridBatchesParams& params, int iConBegin, int iConEnd)
{
	BT_PROFILE("assignConstraintsToGridBatches");
	// (can be done in parallel)
	for (int iCon = iConBegin; iCon < iConEnd; ++iCon)
	{
		const btBatchedConstraintInfo& con = params.conInfos[iCon];
		int iBody0 = con.bodyIds[0];
		int iBody1 = con.bodyIds[1];
		int iPhase = iCon;  //iBody0; // pseudorandom choice to distribute evenly amongst phases
		iPhase &= params.phaseMask;
		int gridCoord[3];
		// is it a dynamic constraint?
		if (params.bodyDynamicFlags[iBody0] && params.bodyDynamicFlags[iBody1])
		{
			const btIntVec3& body0Coords = params.bodyGridCoords[iBody0];
			const btIntVec3& body1Coords = params.bodyGridCoords[iBody1];
			// for each dimension x,y,z,
			for (int i = 0; i < 3; ++i)
			{
				int coordMin = btMin(body0Coords.m_ints[i], body1Coords.m_ints[i]);
				int coordMax = btMax(body0Coords.m_ints[i], body1Coords.m_ints[i]);
				if (coordMin != coordMax)
				{
					btAssert(coordMax == coordMin + 1);
					if ((coordMin & 1) == 0)
					{
						iPhase &= ~(1 << i);  // force bit off
					}
					else
					{
						iPhase |= (1 << i);  // force bit on
						iPhase &= params.phaseMask;
					}
				}
				gridCoord[i] = coordMin;
			}
		}
		else
		{
			if (!params.bodyDynamicFlags[iBody0])
			{
				iBody0 = con.bodyIds[1];
			}
			btAssert(params.bodyDynamicFlags[iBody0]);
			const btIntVec3& body0Coords = params.bodyGridCoords[iBody0];
			// for each dimension x,y,z,
			for (int i = 0; i < 3; ++i)
			{
				gridCoord[i] = body0Coords.m_ints[i];
			}
		}
		// calculate chunk coordinates
		int chunkCoord[3];
		btIntVec3 gridChunkDim = params.gridChunkDim;
		// for each dimension x,y,z,
		for (int i = 0; i < 3; ++i)
		{
			int coordOffset = (iPhase >> i) & 1;
			chunkCoord[i] = (gridCoord[i] - coordOffset) / 2;
			btClamp(chunkCoord[i], 0, gridChunkDim[i] - 1);
			btAssert(chunkCoord[i] < gridChunkDim[i]);
		}
		int iBatch = iPhase * params.maxNumBatchesPerPhase + chunkCoord[0] + chunkCoord[1] * gridChunkDim[0] + chunkCoord[2] * gridChunkDim[0] * gridChunkDim[1];
		btAssert(iBatch >= 0 && iBatch < params.maxNumBatchesPerPhase * params.numPhases);
		params.constraintBatchIds[iCon] = iBatch;
	}
}

struct AssignConstraintsToGridBatchesLoop : public btIParallelForBody
{
	const AssignConstraintsToGridBatchesParams* m_params;

	AssignConstraintsToGridBatchesLoop(const AssignConstraintsToGridBatchesParams& params)
	{
		m_params = &params;
	}
	void forLoop(int iBegin, int iEnd) const BT_OVERRIDE
	{
		assignConstraintsToGridBatches(*m_params, iBegin, iEnd);
	}
};

//
// setupSpatialGridBatchesMt -- generate batches using a uniform 3D grid
//
/*

Bodies are treated as 3D points at their center of mass. We only consider dynamic bodies at this stage,
because only dynamic bodies are mutated when a constraint is solved, thus subject to race conditions.

1. Compute a bounding box around all dynamic bodies
2. Compute the maximum extent of all dynamic constraints. Each dynamic constraint is treated as a line segment, and we need the size of
   box that will fully enclose any single dynamic constraint

3. Establish the cell size of our grid, the cell size in each dimension must be at least as large as the dynamic constraints max-extent,
   so that no dynamic constraint can span more than 2 cells of our grid on any axis of the grid. The cell size should be adjusted
   larger in order to keep the total number of cells from being excessively high

Key idea: Given that each constraint spans 1 or 2 grid cells in each dimension, we can handle all constraints by processing
          in chunks of 2x2x2 cells with 8 different 1-cell offsets ((0,0,0),(0,0,1),(0,1,0),(0,1,1),(1,0,0)...).
          For each of the 8 offsets, we create a phase, and for each 2x2x2 chunk with dynamic constraints becomes a batch in that phase.

4. Once the grid is established, we can calculate for each constraint which phase and batch it belongs in.

5. Do a merge small batches on the batches of each phase separately, to try to even out the sizes of batches

Optionally, we can "collapse" one dimension of our 3D grid to turn it into a 2D grid, which reduces the number of phases
to 4. With fewer phases, there are more constraints per phase and this makes it easier to create batches of a useful size.
*/
//
static void setupSpatialGridBatchesMt(
	btBatchedConstraints* batchedConstraints,
	btAlignedObjectArray<char>* scratchMemory,
	btConstraintArray* constraints,
	const btAlignedObjectArray<btSolverBody>& bodies,
	int minBatchSize,
	int maxBatchSize,
	bool use2DGrid)
{
	BT_PROFILE("setupSpatialGridBatchesMt");
	const int numPhases = 8;
	int numConstraints = constraints->size();
	int numConstraintRows = constraints->size();

	const int maxGridChunkCount = 128;
	int allocNumBatchesPerPhase = maxGridChunkCount;
	int minNumBatchesPerPhase = 16;<--- Variable 'minNumBatchesPerPhase' is assigned a value that is never used.
	int allocNumBatches = allocNumBatchesPerPhase * numPhases;

	btVector3* bodyPositions = NULL;
	bool* bodyDynamicFlags = NULL;
	btIntVec3* bodyGridCoords = NULL;
	btBatchInfo* batches = NULL;
	int* batchWork = NULL;
	btBatchedConstraintInfo* conInfos = NULL;
	int* constraintBatchIds = NULL;
	int* constraintRowBatchIds = NULL;
	{
		PreallocatedMemoryHelper<10> memHelper;
		memHelper.addChunk((void**)&bodyPositions, sizeof(btVector3) * bodies.size());
		memHelper.addChunk((void**)&bodyDynamicFlags, sizeof(bool) * bodies.size());
		memHelper.addChunk((void**)&bodyGridCoords, sizeof(btIntVec3) * bodies.size());
		memHelper.addChunk((void**)&batches, sizeof(btBatchInfo) * allocNumBatches);
		memHelper.addChunk((void**)&batchWork, sizeof(int) * allocNumBatches);
		memHelper.addChunk((void**)&conInfos, sizeof(btBatchedConstraintInfo) * numConstraints);
		memHelper.addChunk((void**)&constraintBatchIds, sizeof(int) * numConstraints);
		memHelper.addChunk((void**)&constraintRowBatchIds, sizeof(int) * numConstraintRows);
		size_t scratchSize = memHelper.getSizeToAllocate();
		// if we need to reallocate
		if (scratchMemory->capacity() < scratchSize)
		{
			// allocate 6.25% extra to avoid repeated reallocs
			scratchMemory->reserve(scratchSize + scratchSize / 16);
		}
		scratchMemory->resizeNoInitialize(scratchSize);
		char* memPtr = &scratchMemory->at(0);
		memHelper.setChunkPointers(memPtr);
	}

	numConstraints = initBatchedConstraintInfo(conInfos, constraints);

	// compute bounding box around all dynamic bodies
	// (could be done in parallel)
	btVector3 bboxMin(BT_LARGE_FLOAT, BT_LARGE_FLOAT, BT_LARGE_FLOAT);
	btVector3 bboxMax = -bboxMin;
	//int dynamicBodyCount = 0;
	for (int i = 0; i < bodies.size(); ++i)
	{
		const btSolverBody& body = bodies[i];
		btVector3 bodyPos = body.getWorldTransform().getOrigin();
		bool isDynamic = (body.internalGetInvMass().x() > btScalar(0));
		bodyPositions[i] = bodyPos;
		bodyDynamicFlags[i] = isDynamic;
		if (isDynamic)
		{
			//dynamicBodyCount++;
			bboxMin.setMin(bodyPos);
			bboxMax.setMax(bodyPos);
		}
	}

	// find max extent of all dynamic constraints
	// (could be done in parallel)
	btVector3 consExtent = findMaxDynamicConstraintExtent(bodyPositions, bodyDynamicFlags, conInfos, numConstraints, bodies.size());

	btVector3 gridExtent = bboxMax - bboxMin;

	gridExtent.setMax(btVector3(btScalar(1), btScalar(1), btScalar(1)));

	btVector3 gridCellSize = consExtent;
	int gridDim[3];
	gridDim[0] = int(1.0 + gridExtent.x() / gridCellSize.x());
	gridDim[1] = int(1.0 + gridExtent.y() / gridCellSize.y());
	gridDim[2] = int(1.0 + gridExtent.z() / gridCellSize.z());

	// if we can collapse an axis, it will cut our number of phases in half which could be more efficient
	int phaseMask = 7;
	bool collapseAxis = use2DGrid;
	if (collapseAxis)
	{
		// pick the smallest axis to collapse, leaving us with the greatest number of cells in our grid
		int iAxisToCollapse = 0;
		int axisDim = gridDim[iAxisToCollapse];
		//for each dimension
		for (int i = 0; i < 3; ++i)
		{
			if (gridDim[i] < axisDim)
			{
				iAxisToCollapse = i;
				axisDim = gridDim[i];
			}
		}
		// collapse it
		gridCellSize[iAxisToCollapse] = gridExtent[iAxisToCollapse] * 2.0f;
		phaseMask &= ~(1 << iAxisToCollapse);
	}

	int numGridChunks = 0;
	btIntVec3 gridChunkDim;  // each chunk is 2x2x2 group of cells
	while (true)
	{
		gridDim[0] = int(1.0 + gridExtent.x() / gridCellSize.x());
		gridDim[1] = int(1.0 + gridExtent.y() / gridCellSize.y());
		gridDim[2] = int(1.0 + gridExtent.z() / gridCellSize.z());
		gridChunkDim[0] = btMax(1, (gridDim[0] + 0) / 2);
		gridChunkDim[1] = btMax(1, (gridDim[1] + 0) / 2);
		gridChunkDim[2] = btMax(1, (gridDim[2] + 0) / 2);
		numGridChunks = gridChunkDim[0] * gridChunkDim[1] * gridChunkDim[2];
		float nChunks = float(gridChunkDim[0]) * float(gridChunkDim[1]) * float(gridChunkDim[2]);  // suceptible to integer overflow
		if (numGridChunks <= maxGridChunkCount && nChunks <= maxGridChunkCount)
		{
			break;
		}
		gridCellSize *= 1.25;  // should roughly cut numCells in half
	}
	btAssert(numGridChunks <= maxGridChunkCount);
	int maxNumBatchesPerPhase = numGridChunks;

	// for each dynamic body, compute grid coords
	btVector3 invGridCellSize = btVector3(1, 1, 1) / gridCellSize;
	// (can be done in parallel)
	for (int iBody = 0; iBody < bodies.size(); ++iBody)
	{
		btIntVec3& coords = bodyGridCoords[iBody];
		if (bodyDynamicFlags[iBody])
		{
			btVector3 v = (bodyPositions[iBody] - bboxMin) * invGridCellSize;
			coords.m_ints[0] = int(v.x());
			coords.m_ints[1] = int(v.y());
			coords.m_ints[2] = int(v.z());
			btAssert(coords.m_ints[0] >= 0 && coords.m_ints[0] < gridDim[0]);
			btAssert(coords.m_ints[1] >= 0 && coords.m_ints[1] < gridDim[1]);
			btAssert(coords.m_ints[2] >= 0 && coords.m_ints[2] < gridDim[2]);
		}
		else
		{
			coords.m_ints[0] = -1;
			coords.m_ints[1] = -1;
			coords.m_ints[2] = -1;
		}
	}

	for (int iPhase = 0; iPhase < numPhases; ++iPhase)
	{
		int batchBegin = iPhase * maxNumBatchesPerPhase;
		int batchEnd = batchBegin + maxNumBatchesPerPhase;
		for (int iBatch = batchBegin; iBatch < batchEnd; ++iBatch)
		{
			btBatchInfo& batch = batches[iBatch];
			batch = btBatchInfo();
		}
	}

	{
		AssignConstraintsToGridBatchesParams params;
		params.bodyDynamicFlags = bodyDynamicFlags;
		params.bodyGridCoords = bodyGridCoords;
		params.numBodies = bodies.size();
		params.conInfos = conInfos;
		params.constraintBatchIds = constraintBatchIds;
		params.gridChunkDim = gridChunkDim;
		params.maxNumBatchesPerPhase = maxNumBatchesPerPhase;
		params.numPhases = numPhases;
		params.phaseMask = phaseMask;
		bool inParallel = true;<--- Assignment 'inParallel=true', assigned value is 1
		if (inParallel)<--- Condition 'inParallel' is always true
		{
			AssignConstraintsToGridBatchesLoop loop(params);
			int grainSize = 250;
			btParallelFor(0, numConstraints, grainSize, loop);
		}
		else
		{
			assignConstraintsToGridBatches(params, 0, numConstraints);
		}
	}
	for (int iCon = 0; iCon < numConstraints; ++iCon)
	{
		const btBatchedConstraintInfo& con = conInfos[iCon];
		int iBatch = constraintBatchIds[iCon];
		btBatchInfo& batch = batches[iBatch];
		batch.numConstraints += con.numConstraintRows;
	}

	for (int iPhase = 0; iPhase < numPhases; ++iPhase)
	{
		// if phase is legit,
		if (iPhase == (iPhase & phaseMask))
		{
			int iBeginBatch = iPhase * maxNumBatchesPerPhase;
			int iEndBatch = iBeginBatch + maxNumBatchesPerPhase;
			mergeSmallBatches(batches, iBeginBatch, iEndBatch, minBatchSize, maxBatchSize);
		}
	}
	// all constraints have been assigned a batchId
	updateConstraintBatchIdsForMergesMt(constraintBatchIds, numConstraints, batches, maxNumBatchesPerPhase * numPhases);

	if (numConstraintRows > numConstraints)
	{
		expandConstraintRowsMt(&constraintRowBatchIds[0], &constraintBatchIds[0], &conInfos[0], numConstraints, numConstraintRows);
	}
	else
	{
		constraintRowBatchIds = constraintBatchIds;
	}

	writeOutBatches(batchedConstraints, constraintRowBatchIds, numConstraintRows, batches, batchWork, maxNumBatchesPerPhase, numPhases);
	btAssert(batchedConstraints->validate(constraints, bodies));
}

static void setupSingleBatch(
	btBatchedConstraints* bc,
	int numConstraints)
{
	BT_PROFILE("setupSingleBatch");
	typedef btBatchedConstraints::Range Range;

	bc->m_constraintIndices.resize(numConstraints);
	for (int i = 0; i < numConstraints; ++i)
	{
		bc->m_constraintIndices[i] = i;
	}

	bc->m_batches.resizeNoInitialize(0);
	bc->m_phases.resizeNoInitialize(0);
	bc->m_phaseOrder.resizeNoInitialize(0);
	bc->m_phaseGrainSize.resizeNoInitialize(0);

	if (numConstraints > 0)
	{
		bc->m_batches.push_back(Range(0, numConstraints));
		bc->m_phases.push_back(Range(0, 1));
		bc->m_phaseOrder.push_back(0);
		bc->m_phaseGrainSize.push_back(1);
	}
}

void btBatchedConstraints::setup(
	btConstraintArray* constraints,
	const btAlignedObjectArray<btSolverBody>& bodies,
	BatchingMethod batchingMethod,
	int minBatchSize,
	int maxBatchSize,
	btAlignedObjectArray<char>* scratchMemory)
{
	if (constraints->size() >= minBatchSize * 4)
	{
		bool use2DGrid = batchingMethod == BATCHING_METHOD_SPATIAL_GRID_2D;
		setupSpatialGridBatchesMt(this, scratchMemory, constraints, bodies, minBatchSize, maxBatchSize, use2DGrid);
		if (s_debugDrawBatches)
		{
			debugDrawAllBatches(this, constraints, bodies);
		}
	}
	else
	{
		setupSingleBatch(this, constraints->size());
	}
}