From c19f8fba5c2bcc787c2dfa23d87a9217288173b1 Mon Sep 17 00:00:00 2001 From: tiancaiamao Date: Tue, 9 Oct 2018 16:31:37 +0800 Subject: [PATCH] store/tikv,executor: redesign the latch scheduler (#7711) Check maxCommitTS on each key, instead of each slot, so hash collision will not lead to transaction retry. --- executor/write_test.go | 3 +- store/tikv/latch/latch.go | 206 ++++++++++++++++++----------- store/tikv/latch/latch_test.go | 57 +++++++- store/tikv/latch/scheduler.go | 38 ++++-- store/tikv/latch/scheduler_test.go | 2 +- store/tikv/txn.go | 3 - 6 files changed, 211 insertions(+), 98 deletions(-) diff --git a/executor/write_test.go b/executor/write_test.go index eb444a305b7a1..033798c754a82 100644 --- a/executor/write_test.go +++ b/executor/write_test.go @@ -1815,8 +1815,7 @@ func (s *testBypassSuite) TestBypassLatch(c *C) { // txn1 and txn2 data range do not overlap, but using latches result in txn conflict. fn() - _, err = tk1.Exec("commit") - c.Assert(err, NotNil) + tk1.MustExec("commit") tk1.MustExec("truncate table t") fn() diff --git a/store/tikv/latch/latch.go b/store/tikv/latch/latch.go index a8ca718947490..f6f57a460b106 100644 --- a/store/tikv/latch/latch.go +++ b/store/tikv/latch/latch.go @@ -14,6 +14,7 @@ package latch import ( + "bytes" "math/bits" "sort" "sync" @@ -22,32 +23,26 @@ import ( "github.com/spaolacci/murmur3" ) -// latch stores a key's waiting transactions information. -type latch struct { - // Whether there is any transaction in waitingQueue except head. - hasMoreWaiting bool - // The startTS of the transaction which is the head of waiting transactions. - waitingQueueHead uint64 - maxCommitTS uint64 - sync.Mutex -} +type node struct { + slotID int + key []byte + maxCommitTS uint64 + value *Lock -func (l *latch) isEmpty() bool { - return l.waitingQueueHead == 0 && !l.hasMoreWaiting + next *node } -func (l *latch) free() { - l.waitingQueueHead = 0 -} - -func (l *latch) refreshCommitTS(commitTS uint64) { - l.Lock() - defer l.Unlock() - l.maxCommitTS = mathutil.MaxUint64(commitTS, l.maxCommitTS) +// latch stores a key's waiting transactions information. +type latch struct { + queue *node + count int + waiting []*Lock + sync.Mutex } // Lock is the locks' information required for a transaction. type Lock struct { + keys [][]byte // The slot IDs of the latches(keys) that a startTS must acquire before being able to processed. requiredSlots []int // The number of latches that the transaction has acquired. For status is stale, it include the @@ -96,9 +91,20 @@ func (l *Lock) SetCommitTS(commitTS uint64) { // but conceptually a latch is a queue, and a slot is an index to the queue type Latches struct { slots []latch - // The waiting queue for each slot(slotID => slice of Lock). - waitingQueues map[int][]*Lock - sync.RWMutex +} + +type bytesSlice [][]byte + +func (s bytesSlice) Len() int { + return len(s) +} + +func (s bytesSlice) Swap(i, j int) { + s[i], s[j] = s[j], s[i] +} + +func (s bytesSlice) Less(i, j int) bool { + return bytes.Compare(s[i], s[j]) < 0 } // NewLatches create a Latches with fixed length, @@ -107,14 +113,15 @@ func NewLatches(size uint) *Latches { powerOfTwoSize := 1 << uint32(bits.Len32(uint32(size-1))) slots := make([]latch, powerOfTwoSize) return &Latches{ - slots: slots, - waitingQueues: make(map[int][]*Lock), + slots: slots, } } // genLock generates Lock for the transaction with startTS and keys. func (latches *Latches) genLock(startTS uint64, keys [][]byte) *Lock { + sort.Sort(bytesSlice(keys)) return &Lock{ + keys: keys, requiredSlots: latches.genSlotIDs(keys), acquiredCount: 0, startTS: startTS, @@ -126,17 +133,7 @@ func (latches *Latches) genSlotIDs(keys [][]byte) []int { for _, key := range keys { slots = append(slots, latches.slotID(key)) } - sort.Ints(slots) - if len(slots) <= 1 { - return slots - } - dedup := slots[:1] - for i := 1; i < len(slots); i++ { - if slots[i] != slots[i-1] { - dedup = append(dedup, slots[i]) - } - } - return dedup + return slots } // slotID return slotID for current key. @@ -150,8 +147,7 @@ func (latches *Latches) acquire(lock *Lock) acquireResult { return acquireStale } for lock.acquiredCount < len(lock.requiredSlots) { - slotID := lock.requiredSlots[lock.acquiredCount] - status := latches.acquireSlot(slotID, lock) + status := latches.acquireSlot(lock) if status != acquireSuccess { return status } @@ -161,75 +157,129 @@ func (latches *Latches) acquire(lock *Lock) acquireResult { // release releases all latches owned by the `lock` and returns the wakeup list. // Preconditions: the caller must ensure the transaction's status is not locked. -func (latches *Latches) release(lock *Lock, commitTS uint64, wakeupList []*Lock) []*Lock { +func (latches *Latches) release(lock *Lock, wakeupList []*Lock) []*Lock { wakeupList = wakeupList[:0] - for i := 0; i < lock.acquiredCount; i++ { - slotID := lock.requiredSlots[i] - if nextLock := latches.releaseSlot(slotID, commitTS); nextLock != nil { + for lock.acquiredCount > 0 { + if nextLock := latches.releaseSlot(lock); nextLock != nil { wakeupList = append(wakeupList, nextLock) } } return wakeupList } -// refreshCommitTS refreshes commitTS for keys. -func (latches *Latches) refreshCommitTS(keys [][]byte, commitTS uint64) { - slotIDs := latches.genSlotIDs(keys) - for _, slotID := range slotIDs { - latches.slots[slotID].refreshCommitTS(commitTS) - } -} - -func (latches *Latches) releaseSlot(slotID int, commitTS uint64) (nextLock *Lock) { +func (latches *Latches) releaseSlot(lock *Lock) (nextLock *Lock) { + key := lock.keys[lock.acquiredCount-1] + slotID := lock.requiredSlots[lock.acquiredCount-1] latch := &latches.slots[slotID] + lock.acquiredCount-- latch.Lock() defer latch.Unlock() - latch.maxCommitTS = mathutil.MaxUint64(latch.maxCommitTS, commitTS) - if !latch.hasMoreWaiting { - latch.free() + + find := findNode(latch.queue, key) + if find.value != lock { + panic("releaseSlot wrong") + } + find.maxCommitTS = mathutil.MaxUint64(find.maxCommitTS, lock.commitTS) + find.value = nil + if len(latch.waiting) == 0 { return nil } - nextLock, latch.hasMoreWaiting = latches.popFromWaitingQueue(slotID) - latch.waitingQueueHead = nextLock.startTS - nextLock.acquiredCount++ - if latch.maxCommitTS > nextLock.startTS { - nextLock.isStale = true + + var idx int + for idx = 0; idx < len(latch.waiting); idx++ { + waiting := latch.waiting[idx] + if bytes.Compare(waiting.keys[waiting.acquiredCount], key) == 0 { + break + } } - return nextLock -} + // Wake up the first one in waiting queue. + if idx < len(latch.waiting) { + nextLock = latch.waiting[idx] + // Delete element latch.waiting[idx] from the array. + copy(latch.waiting[idx:], latch.waiting[idx+1:]) + latch.waiting[len(latch.waiting)-1] = nil + latch.waiting = latch.waiting[:len(latch.waiting)-1] -func (latches *Latches) popFromWaitingQueue(slotID int) (front *Lock, hasMoreWaiting bool) { - latches.Lock() - defer latches.Unlock() - waiting := latches.waitingQueues[slotID] - front = waiting[0] - if len(waiting) == 1 { - delete(latches.waitingQueues, slotID) - } else { - latches.waitingQueues[slotID] = waiting[1:] - hasMoreWaiting = true + if find.maxCommitTS > nextLock.startTS { + nextLock.isStale = true + } } + return } -func (latches *Latches) acquireSlot(slotID int, lock *Lock) acquireResult { +func (latches *Latches) acquireSlot(lock *Lock) acquireResult { + key := lock.keys[lock.acquiredCount] + slotID := lock.requiredSlots[lock.acquiredCount] latch := &latches.slots[slotID] latch.Lock() defer latch.Unlock() - if latch.maxCommitTS > lock.startTS { + + // Try to recycle to limit the memory usage. + if latch.count >= latchListCount { + latch.recycle(lock.startTS) + } + + find := findNode(latch.queue, key) + if find == nil { + tmp := &node{ + slotID: slotID, + key: key, + value: lock, + } + tmp.next = latch.queue + latch.queue = tmp + latch.count++ + + lock.acquiredCount++ + return acquireSuccess + } + + if find.maxCommitTS > lock.startTS { lock.isStale = true return acquireStale } - if latch.isEmpty() { - latch.waitingQueueHead = lock.startTS + if find.value == nil { + find.value = lock lock.acquiredCount++ return acquireSuccess } + // Push the current transaction into waitingQueue. - latch.hasMoreWaiting = true - latches.Lock() - defer latches.Unlock() - latches.waitingQueues[slotID] = append(latches.waitingQueues[slotID], lock) + latch.waiting = append(latch.waiting, lock) return acquireLocked } + +// recycle is not thread safe, the latch should acquire its lock before executing this function. +func (l *latch) recycle(currentTS uint64) { + fakeHead := node{next: l.queue} + prev := &fakeHead + for curr := prev.next; curr != nil; curr = curr.next { + if tsoSub(currentTS, curr.maxCommitTS) >= expireDuration && curr.value == nil { + l.count-- + prev.next = curr.next + } else { + prev = curr + } + } + l.queue = fakeHead.next +} + +func (latches *Latches) recycle(currentTS uint64) { + for i := 0; i < len(latches.slots); i++ { + latch := &latches.slots[i] + latch.Lock() + latch.recycle(currentTS) + latch.Unlock() + } +} + +func findNode(list *node, key []byte) *node { + for n := list; n != nil; n = n.next { + if bytes.Compare(n.key, key) == 0 { + return n + } + } + return nil +} diff --git a/store/tikv/latch/latch_test.go b/store/tikv/latch/latch_test.go index 17178dd5c7850..951a9e3de1802 100644 --- a/store/tikv/latch/latch_test.go +++ b/store/tikv/latch/latch_test.go @@ -16,8 +16,10 @@ package latch import ( "sync/atomic" "testing" + "time" . "github.com/pingcap/check" + "github.com/pingcap/tidb/store/tikv/oracle" ) func TestT(t *testing.T) { @@ -48,7 +50,7 @@ func getTso() uint64 { func (s *testLatchSuite) TestWakeUp(c *C) { keysA := [][]byte{ - []byte("a"), []byte("b"), []byte("c"), []byte("c")} + []byte("a"), []byte("b"), []byte("c")} _, lockA := s.newLock(keysA) keysB := [][]byte{[]byte("d"), []byte("e"), []byte("a"), []byte("c")} @@ -65,7 +67,8 @@ func (s *testLatchSuite) TestWakeUp(c *C) { // A release lock, and get wakeup list. commitTSA := getTso() wakeupList := make([]*Lock, 0) - wakeupList = s.latches.release(lockA, commitTSA, wakeupList) + lockA.SetCommitTS(commitTSA) + wakeupList = s.latches.release(lockA, wakeupList) c.Assert(wakeupList[0].startTS, Equals, startTSB) // B acquire failed since startTSB has stale for some keys. @@ -73,7 +76,7 @@ func (s *testLatchSuite) TestWakeUp(c *C) { c.Assert(result, Equals, acquireStale) // B release lock since it received a stale. - wakeupList = s.latches.release(lockB, 0, wakeupList) + wakeupList = s.latches.release(lockB, wakeupList) c.Assert(wakeupList, HasLen, 0) // B restart:get a new startTS. @@ -85,7 +88,7 @@ func (s *testLatchSuite) TestWakeUp(c *C) { func (s *testLatchSuite) TestFirstAcquireFailedWithStale(c *C) { keys := [][]byte{ - []byte("a"), []byte("b"), []byte("c"), []byte("c")} + []byte("a"), []byte("b"), []byte("c")} _, lockA := s.newLock(keys) startTSB, lockB := s.newLock(keys) // acquire lockA success @@ -94,11 +97,53 @@ func (s *testLatchSuite) TestFirstAcquireFailedWithStale(c *C) { // release lockA commitTSA := getTso() wakeupList := make([]*Lock, 0) - s.latches.release(lockA, commitTSA, wakeupList) + lockA.SetCommitTS(commitTSA) + s.latches.release(lockA, wakeupList) c.Assert(commitTSA, Greater, startTSB) // acquire lockB first time, should be failed with stale since commitTSA > startTSB result = s.latches.acquire(lockB) c.Assert(result, Equals, acquireStale) - s.latches.release(lockB, 0, wakeupList) + s.latches.release(lockB, wakeupList) +} + +func (s *testLatchSuite) TestRecycle(c *C) { + latches := NewLatches(8) + now := time.Now() + startTS := oracle.ComposeTS(oracle.GetPhysical(now), 0) + lock := latches.genLock(startTS, [][]byte{ + []byte("a"), []byte("b"), + }) + lock1 := latches.genLock(startTS, [][]byte{ + []byte("b"), []byte("c"), + }) + c.Assert(latches.acquire(lock), Equals, acquireSuccess) + c.Assert(latches.acquire(lock1), Equals, acquireLocked) + lock.SetCommitTS(startTS + 1) + var wakeupList []*Lock + latches.release(lock, wakeupList) + + lock2 := latches.genLock(startTS+3, [][]byte{ + []byte("b"), []byte("c"), + }) + c.Assert(latches.acquire(lock2), Equals, acquireSuccess) + wakeupList = wakeupList[:0] + latches.release(lock2, wakeupList) + + allEmpty := true + for i := 0; i < len(latches.slots); i++ { + latch := &latches.slots[i] + if latch.queue != nil { + allEmpty = false + } + } + c.Assert(allEmpty, IsFalse) + + currentTS := oracle.ComposeTS(oracle.GetPhysical(now.Add(expireDuration)), 3) + latches.recycle(currentTS) + + for i := 0; i < len(latches.slots); i++ { + latch := &latches.slots[i] + c.Assert(latch.queue, IsNil) + } } diff --git a/store/tikv/latch/scheduler.go b/store/tikv/latch/scheduler.go index f3ffad7a77d9f..5f4e84cd7760d 100644 --- a/store/tikv/latch/scheduler.go +++ b/store/tikv/latch/scheduler.go @@ -15,15 +15,19 @@ package latch import ( "sync" + "time" + + "github.com/pingcap/tidb/store/tikv/oracle" ) const lockChanSize = 100 // LatchesScheduler is used to schedule latches for transactions. type LatchesScheduler struct { - latches *Latches - unlockCh chan *Lock - closed bool + latches *Latches + unlockCh chan *Lock + closed bool + lastRecycleTime uint64 sync.RWMutex } @@ -40,13 +44,31 @@ func NewScheduler(size uint) *LatchesScheduler { return scheduler } +// A transaction can last for at most 10 minutes, see also gcworker. +const expireDuration = 10 * time.Minute +const checkInterval = 5 * time.Minute +const checkCounter = 50000 +const latchListCount = 5 + func (scheduler *LatchesScheduler) run() { + var counter int wakeupList := make([]*Lock, 0) for lock := range scheduler.unlockCh { - wakeupList = scheduler.latches.release(lock, lock.commitTS, wakeupList) + wakeupList = scheduler.latches.release(lock, wakeupList) if len(wakeupList) > 0 { scheduler.wakeup(wakeupList) } + + if lock.commitTS > lock.startTS { + currentTS := lock.commitTS + elapsed := tsoSub(currentTS, scheduler.lastRecycleTime) + if elapsed > checkInterval && counter > checkCounter { + go scheduler.latches.recycle(lock.commitTS) + scheduler.lastRecycleTime = currentTS + counter = 0 + } + } + counter++ } } @@ -92,8 +114,8 @@ func (scheduler *LatchesScheduler) UnLock(lock *Lock) { } } -// RefreshCommitTS refreshes commitTS for keys. It could be used for the transaction not retryable, -// which would do 2PC directly and wouldn't get a lock. -func (scheduler *LatchesScheduler) RefreshCommitTS(keys [][]byte, commitTS uint64) { - scheduler.latches.refreshCommitTS(keys, commitTS) +func tsoSub(ts1, ts2 uint64) time.Duration { + t1 := oracle.GetTimeFromTS(ts1) + t2 := oracle.GetTimeFromTS(ts2) + return t1.Sub(t2) } diff --git a/store/tikv/latch/scheduler_test.go b/store/tikv/latch/scheduler_test.go index d57737fb9512d..fc2d5b1514d0f 100644 --- a/store/tikv/latch/scheduler_test.go +++ b/store/tikv/latch/scheduler_test.go @@ -29,7 +29,7 @@ func (s *testSchedulerSuite) SetUpTest(c *C) { func (s *testSchedulerSuite) TestWithConcurrency(c *C) { txns := [][][]byte{ - {[]byte("a"), []byte("a"), []byte("b"), []byte("c")}, + {[]byte("a"), []byte("b"), []byte("c")}, {[]byte("a"), []byte("d"), []byte("e"), []byte("f")}, {[]byte("e"), []byte("f"), []byte("g"), []byte("h")}, } diff --git a/store/tikv/txn.go b/store/tikv/txn.go index b1edef7da5b97..18b7e1a17d31f 100644 --- a/store/tikv/txn.go +++ b/store/tikv/txn.go @@ -199,9 +199,6 @@ func (txn *tikvTxn) Commit(ctx context.Context) error { // When bypassLatch flag is true, commit directly. if bypassLatch { err = committer.executeAndWriteFinishBinlog(ctx) - if err == nil { - txn.store.txnLatches.RefreshCommitTS(committer.keys, committer.commitTS) - } log.Debug("[kv]", connID, " txnLatches enabled while txn not retryable, 2pc directly:", err) return errors.Trace(err) }