Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Check download status before overwriting a block #1499

Merged
merged 3 commits into from
Aug 12, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@
- Flush shall only sync the blocks to storage and not delete them from local cache.
- Random write has been re-enabled in block cache.
- Writing to an uncommitted block which has been deleted from the in-memory cache.
- Check download status of a block before updating and return error if it failed to download.

## 2.3.1 (Unreleased)
**NOTICE**
Expand Down
28 changes: 20 additions & 8 deletions component/block_cache/block_cache.go
Original file line number Diff line number Diff line change
Expand Up @@ -610,10 +610,7 @@ func (bc *BlockCache) getBlock(handle *handlemap.Handle, readoffset uint64) (*Bl
log.Err("BlockCache::getBlock : Failed to download block %v=>%s (offset %v, index %v)", handle.ID, handle.Path, readoffset, index)

// Remove this node from handle so that next read retries to download the block again
_ = handle.Buffers.Cooking.Remove(block.node)
handle.RemoveValue(fmt.Sprintf("%v", block.id))
block.ReUse()
bc.blockPool.Release(block)
bc.releaseFailedBlock(handle, block)
return nil, fmt.Errorf("failed to download block")
}

Expand Down Expand Up @@ -1036,10 +1033,7 @@ func (bc *BlockCache) getOrCreateBlock(handle *handlemap.Handle, offset uint64)
log.Err("BlockCache::getOrCreateBlock : Failed to download block %v for %v=>%s", block.id, handle.ID, handle.Path)

// Remove this node from handle so that next read retries to download the block again
_ = handle.Buffers.Cooking.Remove(block.node)
handle.RemoveValue(fmt.Sprintf("%v", block.id))
block.ReUse()
bc.blockPool.Release(block)
bc.releaseFailedBlock(handle, block)
return nil, fmt.Errorf("failed to download block")
}
} else {
Expand Down Expand Up @@ -1078,6 +1072,15 @@ func (bc *BlockCache) getOrCreateBlock(handle *handlemap.Handle, offset uint64)
log.Debug("BlockCache::getOrCreateBlock : Waiting for download to finish for committed block %v for %v=>%s", block.id, handle.ID, handle.Path)
<-block.state
block.Unblock()

// if the block failed to download, it can't be used for overwriting
if block.IsFailed() {
log.Err("BlockCache::getOrCreateBlock : Failed to download block %v for %v=>%s", block.id, handle.ID, handle.Path)

// Remove this node from handle so that next read retries to download the block again
bc.releaseFailedBlock(handle, block)
return nil, fmt.Errorf("failed to download block")
}
} else if block.flags.IsSet(BlockFlagUploading) {
// If the block is being staged, then wait till it is uploaded,
// and then write to the same block and move it back to cooking queue
Expand All @@ -1090,6 +1093,7 @@ func (bc *BlockCache) getOrCreateBlock(handle *handlemap.Handle, offset uint64)
}
block.node = handle.Buffers.Cooking.PushBack(block)
}

block.flags.Clear(BlockFlagUploading)
block.flags.Clear(BlockFlagDownloading)
block.flags.Clear(BlockFlagSynced)
Expand Down Expand Up @@ -1123,6 +1127,14 @@ func (bc *BlockCache) stageBlocks(handle *handlemap.Handle, cnt int) error {
return nil
}

// remove the block which failed to download so that it can be used again
func (bc *BlockCache) releaseFailedBlock(handle *handlemap.Handle, block *Block) {
_ = handle.Buffers.Cooking.Remove(block.node)
handle.RemoveValue(fmt.Sprintf("%v", block.id))
block.ReUse()
bc.blockPool.Release(block)
}

func (bc *BlockCache) printCooking(handle *handlemap.Handle) { //nolint
nodeList := handle.Buffers.Cooking
node := nodeList.Front()
Expand Down
57 changes: 56 additions & 1 deletion component/block_cache/block_cache_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -2255,13 +2255,68 @@ func (suite *blockCacheTestSuite) TestBlockFailOverwrite() {
size: _1MB,
}

// write offset 0 where block 0 download will fail
// write at offset 0 where block 0 download will fail
n, err := tobj.blockCache.WriteFile(internal.WriteFileOptions{Handle: h, Offset: 0, Data: dataBuff[:1*_1MB]})
suite.assert.NotNil(err)
suite.assert.Contains(err.Error(), "failed to download block")
suite.assert.Equal(n, 0)
suite.assert.False(h.Dirty())

err = tobj.blockCache.CloseFile(internal.CloseFileOptions{Handle: h})
suite.assert.Nil(err)

fs, err := os.Stat(storagePath)
suite.assert.Nil(err)
suite.assert.Equal(fs.Size(), int64(0))
}

func (suite *blockCacheTestSuite) TestBlockDownloadFailed() {
cfg := "block_cache:\n block-size-mb: 1\n mem-size-mb: 20\n prefetch: 12\n parallelism: 10"
tobj, err := setupPipeline(cfg)
defer tobj.cleanupPipeline()

suite.assert.Nil(err)
suite.assert.NotNil(tobj.blockCache)

path := getTestFileName(suite.T().Name())
storagePath := filepath.Join(tobj.fake_storage_path, path)

// write using block cache
options := internal.CreateFileOptions{Name: path, Mode: 0777}
h, err := tobj.blockCache.CreateFile(options)
suite.assert.Nil(err)
suite.assert.NotNil(h)
suite.assert.Equal(h.Size, int64(0))
suite.assert.False(h.Dirty())

h, err = tobj.blockCache.OpenFile(internal.OpenFileOptions{Name: path, Flags: os.O_RDWR})
suite.assert.Nil(err)
suite.assert.NotNil(h)
suite.assert.Equal(h.Size, int64(0))
suite.assert.False(h.Dirty())

// updating the size to replicate the download failure
h.Size = int64(4 * _1MB)

data := make([]byte, _1MB)
n, err := tobj.blockCache.ReadInBuffer(internal.ReadInBufferOptions{Handle: h, Offset: 0, Data: data})
suite.assert.NotNil(err)
suite.assert.Contains(err.Error(), "failed to download block")
suite.assert.Equal(n, 0)

// 1-4MB data being prefetched in blocks 1-3
suite.assert.Equal(h.Buffers.Cooking.Len(), 3)

// write at offset 1MB where block 1 download will fail
n, err = tobj.blockCache.WriteFile(internal.WriteFileOptions{Handle: h, Offset: int64(_1MB), Data: dataBuff[:1*_1MB]})
suite.assert.NotNil(err)
suite.assert.Contains(err.Error(), "failed to download block")
suite.assert.Equal(n, 0)
suite.assert.False(h.Dirty())

err = tobj.blockCache.CloseFile(internal.CloseFileOptions{Handle: h})
suite.assert.Nil(err)

fs, err := os.Stat(storagePath)
suite.assert.Nil(err)
suite.assert.Equal(fs.Size(), int64(0))
Expand Down