diff --git a/core/image/format.go b/core/image/format.go
index 22f8f67eeb..e1dddaeb41 100644
--- a/core/image/format.go
+++ b/core/image/format.go
@@ -64,6 +64,10 @@ var _ = []format{
 	&FmtETC2_RG_U11_NORM{},
 	&FmtETC2_R_S11_NORM{},
 	&FmtETC2_RG_S11_NORM{},
+	&FmtRGTC1_BC4_R_U8_NORM{},
+	&FmtRGTC1_BC4_R_S8_NORM{},
+	&FmtRGTC2_BC5_RG_U8_NORM{},
+	&FmtRGTC2_BC5_RG_S8_NORM{},
 	&FmtS3_DXT1_RGB{},
 	&FmtS3_DXT1_RGBA{},
 	&FmtS3_DXT3_RGBA{},
diff --git a/gapii/cc/vulkan_mid_execution.cpp b/gapii/cc/vulkan_mid_execution.cpp
index 3cad28ae10..f5bf8c8a58 100644
--- a/gapii/cc/vulkan_mid_execution.cpp
+++ b/gapii/cc/vulkan_mid_execution.cpp
@@ -452,38 +452,76 @@ void VulkanSpy::serializeGPUBuffers(StateSerializer *serializer) {
     };
 
     struct pitch {
-      uint32_t height_pitch;
-      uint32_t depth_pitch;
+      size_t row_pitch;
+      size_t depth_pitch;
+      size_t linear_layout_row_pitch;
+      size_t linear_layout_depth_pitch;
       uint32_t texel_width;
       uint32_t texel_height;
       uint32_t element_size;
     };
 
-    // block pitch is calculated with the in-image element size.
-    auto block_pitch = [this, &get_element_size](
-                           const VkExtent3D &extent, uint32_t format,
-                           uint32_t mip_level, uint32_t aspect_bit) -> pitch {
+    auto level_pitch = [this, &get_element_size](
+                           gapil::Ref<gapii::ImageObject> img,
+                           uint32_t aspect_bit, uint32_t layer,
+                           uint32_t level) -> pitch {
+      auto &info = img->mInfo;
+      auto &lev = img->mAspects[aspect_bit]->mLayers[layer]->mLevels[level];
+      const bool has_linear_layout =
+          (lev->mLinearLayout != nullptr) && (lev->mLinearLayout->msize != 0);
       auto elementAndTexelBlockSize =
-          subGetElementAndTexelBlockSize(nullptr, nullptr, format);
+          subGetElementAndTexelBlockSize(nullptr, nullptr, info.mFormat);
       const uint32_t texel_width =
           elementAndTexelBlockSize.mTexelBlockSize.mWidth;
       const uint32_t texel_height =
           elementAndTexelBlockSize.mTexelBlockSize.mHeight;
 
       const uint32_t width =
-          subGetMipSize(nullptr, nullptr, extent.mWidth, mip_level);
+          subGetMipSize(nullptr, nullptr, info.mExtent.mWidth, level);
       const uint32_t height =
-          subGetMipSize(nullptr, nullptr, extent.mHeight, mip_level);
+          subGetMipSize(nullptr, nullptr, info.mExtent.mHeight, level);
       const uint32_t width_in_blocks =
           subRoundUpTo(nullptr, nullptr, width, texel_width);
       const uint32_t height_in_blocks =
           subRoundUpTo(nullptr, nullptr, height, texel_height);
+      const uint32_t element_size =
+          get_element_size(info.mFormat, aspect_bit, false);
+      const size_t row_pitch = width_in_blocks * element_size;
+      const size_t depth_pitch =
+          width_in_blocks * height_in_blocks * element_size;
+      pitch p{row_pitch,   depth_pitch,  0,           0,
+              texel_width, texel_height, element_size};
+      if (has_linear_layout) {
+        if (lev->mLinearLayout->mdepthPitch != 0) {
+          p.linear_layout_depth_pitch = lev->mLinearLayout->mdepthPitch;
+        }
+        if (lev->mLinearLayout->mrowPitch != 0) {
+          p.linear_layout_row_pitch = lev->mLinearLayout->mrowPitch;
+        }
+      }
+      return p;
+    };
+
+    // extent pitch is calculated with the in-image element size.
+    auto extent_pitch = [this, &get_element_size](
+                            const VkExtent3D &extent, uint32_t format,
+                            uint32_t aspect_bit) -> pitch {
+      auto elementAndTexelBlockSize =
+          subGetElementAndTexelBlockSize(nullptr, nullptr, format);
+      const uint32_t texel_width =
+          elementAndTexelBlockSize.mTexelBlockSize.mWidth;
+      const uint32_t texel_height =
+          elementAndTexelBlockSize.mTexelBlockSize.mHeight;
+
+      const uint32_t width_in_blocks =
+          subRoundUpTo(nullptr, nullptr, extent.mWidth, texel_width);
+      const uint32_t height_in_blocks =
+          subRoundUpTo(nullptr, nullptr, extent.mHeight, texel_height);
       const uint32_t element_size = get_element_size(format, aspect_bit, false);
-      const size_t size = width_in_blocks * height_in_blocks * element_size;
 
       return pitch{
           uint32_t(width_in_blocks * element_size),
-          uint32_t(size),
+          uint32_t(width_in_blocks * height_in_blocks * element_size),
           uint32_t(elementAndTexelBlockSize.mTexelBlockSize.mWidth),
           uint32_t(elementAndTexelBlockSize.mTexelBlockSize.mHeight),
           uint32_t(element_size),
@@ -552,8 +590,12 @@ void VulkanSpy::serializeGPUBuffers(StateSerializer *serializer) {
               img->mAspects[aspect]->mLayers[layer]->mLevels[level];
           level_sizes[img_level.get()] =
               level_size(img->mInfo.mExtent, img->mInfo.mFormat, level, aspect);
-          serializer->encodeBuffer(level_sizes[img_level.get()].level_size,
-                                   &img_level->mData, nullptr);
+          uint64_t pool_size = level_sizes[img_level.get()].level_size;
+          if (img_level->mLinearLayout != nullptr &&
+              img_level->mLinearLayout->msize > pool_size) {
+            pool_size = img_level->mLinearLayout->msize;
+          }
+          serializer->encodeBuffer(pool_size, &img_level->mData, nullptr);
         });
 
     if (img->mIsSwapchainImage) {
@@ -777,29 +819,33 @@ void VulkanSpy::serializeGPUBuffers(StateSerializer *serializer) {
                                 this](
                                    const std::vector<VkBufferImageCopy> &copies,
                                    gapil::Ref<QueueObject> queue) {
+        const uint32_t queue_family = queue->mFamily;
         StagingCommandBuffer commandBuffer(device_functions, img->mDevice,
-                                           queue->mFamily);
+                                           queue_family);
         std::vector<VkImageMemoryBarrier> img_barriers;
         std::vector<uint32_t> old_layouts;
         walkImageSubRng(
             img, img_whole_rng,
-            [&img, &img_barriers, &old_layouts](
+            [&img, &img_barriers, &old_layouts, queue_family](
                 uint32_t aspect_bit, uint32_t layer, uint32_t level) {
               auto &img_level =
                   img->mAspects[aspect_bit]->mLayers[layer]->mLevels[level];
-              img_barriers.push_back(VkImageMemoryBarrier{
-                  VkStructureType::VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER,
-                  nullptr,
-                  (VkAccessFlagBits::VK_ACCESS_MEMORY_WRITE_BIT << 1) - 1,
-                  VkAccessFlagBits::VK_ACCESS_TRANSFER_READ_BIT,
-                  img_level->mLayout,
-                  VkImageLayout::VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL,
-                  kQueueFamilyIgnore,
-                  kQueueFamilyIgnore,
-                  img->mVulkanHandle,
-                  {VkImageAspectFlags(aspect_bit), level, 1, layer, 1},
-              });
-              old_layouts.push_back(img_level->mLayout);
+              if (img_level->mLastBoundQueue != nullptr &&
+                  img_level->mLastBoundQueue->mFamily == queue_family) {
+                img_barriers.push_back(VkImageMemoryBarrier{
+                    VkStructureType::VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER,
+                    nullptr,
+                    (VkAccessFlagBits::VK_ACCESS_MEMORY_WRITE_BIT << 1) - 1,
+                    VkAccessFlagBits::VK_ACCESS_TRANSFER_READ_BIT,
+                    img_level->mLayout,
+                    VkImageLayout::VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL,
+                    kQueueFamilyIgnore,
+                    kQueueFamilyIgnore,
+                    img->mVulkanHandle,
+                    {VkImageAspectFlags(aspect_bit), level, 1, layer, 1},
+                });
+                old_layouts.push_back(img_level->mLayout);
+              }
             });
         device_functions.vkCmdPipelineBarrier(
             commandBuffer.GetBuffer(),
@@ -838,19 +884,10 @@ void VulkanSpy::serializeGPUBuffers(StateSerializer *serializer) {
                                  : copies_in_order[i + 1].mbufferOffset;
         const uint32_t aspect_bit =
             (uint32_t)copy.mimageSubresource.maspectMask;
+        const uint32_t mip_level = copy.mimageSubresource.mmipLevel;
+        const uint32_t array_layer = copy.mimageSubresource.mbaseArrayLayer;
         byte_size_and_extent e =
             level_size(copy.mimageExtent, image_info.mFormat, 0, aspect_bit);
-        auto bp = block_pitch(copy.mimageExtent, image_info.mFormat,
-                              copy.mimageSubresource.mmipLevel, aspect_bit);
-
-        if ((copy.mimageOffset.mx % bp.texel_width != 0) ||
-            (copy.mimageOffset.my % bp.texel_height != 0)) {
-          // We cannot place partial blocks
-          return;
-        }
-        uint32_t x = (copy.mimageOffset.mx / bp.texel_width) * bp.element_size;
-        uint32_t y = (copy.mimageOffset.my / bp.texel_height) * bp.height_pitch;
-        uint32_t z = copy.mimageOffset.mz * bp.depth_pitch;
 
         if ((image_info.mFormat == VkFormat::VK_FORMAT_X8_D24_UNORM_PACK32 ||
              image_info.mFormat == VkFormat::VK_FORMAT_D24_UNORM_S8_UINT) &&
@@ -881,16 +918,56 @@ void VulkanSpy::serializeGPUBuffers(StateSerializer *serializer) {
           }
         }
 
-        memory::Observation observation;
-        const uint32_t mip_level = copy.mimageSubresource.mmipLevel;
-        const uint32_t array_layer = copy.mimageSubresource.mbaseArrayLayer;
-        observation.set_base(x + y + z);
-        observation.set_pool(img->mAspects[aspect_bit]
-                                 ->mLayers[array_layer]
-                                 ->mLevels[mip_level]
-                                 ->mData.pool_id());
-        serializer->sendData(&observation, true, pData + new_offset,
-                             e.level_size);
+        auto bp = level_pitch(img, aspect_bit, array_layer, mip_level);
+        if ((copy.mimageOffset.mx % bp.texel_width != 0) ||
+            (copy.mimageOffset.my % bp.texel_height != 0)) {
+          // We cannot place partial blocks
+          return;
+        }
+        auto &img_level =
+            img->mAspects[aspect_bit]->mLayers[array_layer]->mLevels[mip_level];
+        // If the image has linear layout and its row pitch and depth pitch is
+        // larger than the piches for tightly packed image, we need to set the
+        // observation row by row. Otherwise, we can use just one observation
+        // for the extent of this copy.
+        if (bp.linear_layout_depth_pitch <= bp.depth_pitch &&
+            bp.linear_layout_row_pitch <= bp.row_pitch) {
+          uint32_t x =
+              (copy.mimageOffset.mx / bp.texel_width) * bp.element_size;
+          uint32_t y = (copy.mimageOffset.my / bp.texel_height) * bp.row_pitch;
+          uint32_t z = copy.mimageOffset.mz * bp.depth_pitch;
+          memory::Observation observation;
+          observation.set_base(x + y + z);
+          observation.set_pool(img_level->mData.pool_id());
+          serializer->sendData(&observation, true, pData + new_offset,
+                               e.level_size);
+
+        } else {
+          // Need to set base row by row for linear layout images which have
+          // larger row pitch and depth pitch
+          pitch ep =
+              extent_pitch(copy.mimageExtent, img->mInfo.mFormat, aspect_bit);
+          for (uint32_t zd = 0; zd < copy.mimageExtent.mDepth; zd++) {
+            for (uint32_t yd = 0;
+                 yd < subRoundUpTo(nullptr, nullptr, copy.mimageExtent.mHeight,
+                                   bp.texel_height);
+                 yd++) {
+              uint32_t x =
+                  (copy.mimageOffset.mx / bp.texel_width) * bp.element_size;
+              uint32_t y = ((copy.mimageOffset.my / bp.texel_height) + yd) *
+                           bp.linear_layout_row_pitch;
+              uint32_t z =
+                  (copy.mimageOffset.mz + zd) * bp.linear_layout_depth_pitch;
+              uint32_t mem_row_offset =
+                  zd * ep.depth_pitch + yd * ep.row_pitch + new_offset;
+              memory::Observation observation;
+              observation.set_base(x + y + z);
+              observation.set_pool(img_level->mData.pool_id());
+              serializer->sendData(&observation, true, pData + mem_row_offset,
+                                   ep.row_pitch);
+            }
+          }
+        }
         new_offset = next_offset;
       }
     }
diff --git a/gapis/api/vulkan/api/image.api b/gapis/api/vulkan/api/image.api
index 57f7327495..7f7d31e9fb 100644
--- a/gapis/api/vulkan/api/image.api
+++ b/gapis/api/vulkan/api/image.api
@@ -352,23 +352,25 @@ cmd VkResult vkBindImageMemory(
             // stencil element is always 1 byte wide
             as!u32(1)
         }
-        size := widthInBlocks * heightInBlocks * level.Depth * elementSize
+        tightlyPackedSize := widthInBlocks * heightInBlocks * level.Depth * elementSize
 
         // If the image has LINEAR tiling and the image level has layout
-        // PREINITIALIZED, link the data back to the bound device memory.
-        // Otherwise creates its own shadow memory pool.
+        // PREINITIALIZED and size larger than our calculated tightly packed
+        // size, link the data back to the bound device memory. Otherwise
+        // creates its own shadow memory pool.
         // TODO: If the image as a whole requires more memory than we
         // calculated, we should link the data back to the bound device memory
         // no matter whether the tiling is LINEAR or OPTIMAL. But we need to
-        // come up with a 'linear layout' used in GAPID.
+        // come up with a 'linear layout' in GAPID.
         if (imageObject.Info.Tiling == VK_IMAGE_TILING_LINEAR) &&
             (level.Layout == VK_IMAGE_LAYOUT_PREINITIALIZED) &&
-            (level.LinearLayout != null) {
+            (level.LinearLayout != null) &&
+            (as!u64(level.LinearLayout.size) > as!u64(tightlyPackedSize)) {
           loffset := as!u64(memoryOffset + level.LinearLayout.offset)
           lsize := as!u64(level.LinearLayout.size)
           level.Data = imageObject.BoundMemory.Data[loffset:loffset + lsize]
         } else {
-          level.Data = make!u8(size)
+          level.Data = make!u8(tightlyPackedSize)
         }
       }
     }
diff --git a/gapis/api/vulkan/resources.go b/gapis/api/vulkan/resources.go
index a4b6e3dabe..6c04e680ee 100644
--- a/gapis/api/vulkan/resources.go
+++ b/gapis/api/vulkan/resources.go
@@ -523,10 +523,12 @@ func setCubemapFace(img *image.Info, cubeMap *api.CubemapLevel, layerIndex uint3
 	return true
 }
 
-func (t ImageObjectʳ) imageInfo(ctx context.Context, s *api.GlobalState, format *image.Format, layer, level uint32) *image.Info {
+func (t ImageObjectʳ) imageInfo(ctx context.Context, s *api.GlobalState, vkFmt VkFormat, layer, level uint32) *image.Info {
 	if t.Info().ArrayLayers() <= layer || t.Info().MipLevels() <= level {
 		return nil
 	}
+	format, _ := getImageFormatFromVulkanFormat(vkFmt)
+
 	switch VkImageAspectFlagBits(t.ImageAspect()) {
 	case VkImageAspectFlagBits_VK_IMAGE_ASPECT_COLOR_BIT,
 		VkImageAspectFlagBits_VK_IMAGE_ASPECT_DEPTH_BIT,
@@ -535,13 +537,46 @@ func (t ImageObjectʳ) imageInfo(ctx context.Context, s *api.GlobalState, format
 		if l.Data().Size() == 0 {
 			return nil
 		}
-		return &image.Info{
+		ll := l.LinearLayout()
+		expectedSize := format.Size(int(l.Width()), int(l.Height()), int(l.Depth()))
+		if ll.IsNil() || ll.Size() == VkDeviceSize(expectedSize) {
+			return &image.Info{
+				Format: format,
+				Width:  l.Width(),
+				Height: l.Height(),
+				Depth:  l.Depth(),
+				Bytes:  image.NewID(l.Data().ResourceID(ctx, s)),
+			}
+		}
+		elementAndTexelBlockSize, err := subGetElementAndTexelBlockSize(ctx, nil, api.CmdNoID, nil, s, nil, 0, nil, vkFmt)
+		if err != nil {
+			log.Errf(ctx, err, "[Trim linear image data for image: %v]", t.VulkanHandle())
+			return nil
+		}
+		texelHeight := elementAndTexelBlockSize.TexelBlockSize().Height()
+		heightInBlocks, _ := subRoundUpTo(ctx, nil, api.CmdNoID, nil, s, nil, 0, nil, l.Height(), texelHeight)
+		colorData := make([]uint8, 0, expectedSize)
+		colorRawSize := uint64(format.Size(int(l.Width()), 1, 1))
+		levelData := l.Data().MustRead(ctx, nil, s, nil)
+		for z := uint64(0); z < uint64(l.Depth()); z++ {
+			for y := uint64(0); y < uint64(heightInBlocks); y++ {
+				offset := z*uint64(ll.DepthPitch()) + y*uint64(ll.RowPitch())
+				colorData = append(colorData, levelData[offset:offset+colorRawSize]...)
+			}
+		}
+		imgData := &image.Data{
 			Format: format,
 			Width:  l.Width(),
 			Height: l.Height(),
 			Depth:  l.Depth(),
-			Bytes:  image.NewID(l.Data().ResourceID(ctx, s)),
+			Bytes:  colorData[:],
 		}
+		info, err := imgData.NewInfo(ctx)
+		if err != nil {
+			log.Errf(ctx, err, "[Trim linear image data for image: %v]", t.VulkanHandle())
+			return nil
+		}
+		return info
 
 	case VkImageAspectFlagBits_VK_IMAGE_ASPECT_DEPTH_BIT | VkImageAspectFlagBits_VK_IMAGE_ASPECT_STENCIL_BIT:
 		depthLevel := t.Aspects().Get(VkImageAspectFlagBits_VK_IMAGE_ASPECT_DEPTH_BIT).Layers().Get(layer).Levels().Get(level)
@@ -601,7 +636,7 @@ func (t ImageObjectʳ) imageInfo(ctx context.Context, s *api.GlobalState, format
 func (t ImageObjectʳ) ResourceData(ctx context.Context, s *api.GlobalState) (*api.ResourceData, error) {
 	ctx = log.Enter(ctx, "ImageObject.ResourceData()")
 	vkFmt := t.Info().Fmt()
-	format, err := getImageFormatFromVulkanFormat(vkFmt)
+	_, err := getImageFormatFromVulkanFormat(vkFmt)
 	if err != nil {
 		return nil, &service.ErrDataUnavailable{Reason: messages.ErrNoTextureData(t.ResourceHandle())}
 	}
@@ -617,7 +652,7 @@ func (t ImageObjectʳ) ResourceData(ctx context.Context, s *api.GlobalState) (*a
 			}
 			for layer := uint32(0); layer < t.Info().ArrayLayers(); layer++ {
 				for level := uint32(0); level < t.Info().MipLevels(); level++ {
-					info := t.imageInfo(ctx, s, format, layer, level)
+					info := t.imageInfo(ctx, s, vkFmt, layer, level)
 					if info == nil {
 						continue
 					}
@@ -636,7 +671,7 @@ func (t ImageObjectʳ) ResourceData(ctx context.Context, s *api.GlobalState) (*a
 			for layer := uint32(0); layer < t.Info().ArrayLayers(); layer++ {
 				levels := make([]*image.Info, t.Info().MipLevels())
 				for level := uint32(0); level < t.Info().MipLevels(); level++ {
-					info := t.imageInfo(ctx, s, format, layer, level)
+					info := t.imageInfo(ctx, s, vkFmt, layer, level)
 					if info == nil {
 						continue
 					}
@@ -650,7 +685,7 @@ func (t ImageObjectʳ) ResourceData(ctx context.Context, s *api.GlobalState) (*a
 		// Single layer 2D texture
 		levels := make([]*image.Info, t.Info().MipLevels())
 		for level := uint32(0); level < t.Info().MipLevels(); level++ {
-			info := t.imageInfo(ctx, s, format, 0, level)
+			info := t.imageInfo(ctx, s, vkFmt, 0, level)
 			if info == nil {
 				continue
 			}
@@ -662,7 +697,7 @@ func (t ImageObjectʳ) ResourceData(ctx context.Context, s *api.GlobalState) (*a
 		// 3D images can have only one layer
 		levels := make([]*image.Info, t.Info().MipLevels())
 		for level := uint32(0); level < t.Info().MipLevels(); level++ {
-			info := t.imageInfo(ctx, s, format, 0, level)
+			info := t.imageInfo(ctx, s, vkFmt, 0, level)
 			if info == nil {
 				continue
 			}
@@ -677,7 +712,7 @@ func (t ImageObjectʳ) ResourceData(ctx context.Context, s *api.GlobalState) (*a
 			for layer := uint32(0); layer < t.Info().ArrayLayers(); layer++ {
 				levels := make([]*image.Info, t.Info().MipLevels())
 				for level := uint32(0); level < t.Info().MipLevels(); level++ {
-					info := t.imageInfo(ctx, s, format, layer, level)
+					info := t.imageInfo(ctx, s, vkFmt, layer, level)
 					if info == nil {
 						continue
 					}
@@ -690,7 +725,7 @@ func (t ImageObjectʳ) ResourceData(ctx context.Context, s *api.GlobalState) (*a
 		// Single layer 1D texture
 		levels := make([]*image.Info, t.Info().MipLevels())
 		for level := uint32(0); level < t.Info().MipLevels(); level++ {
-			info := t.imageInfo(ctx, s, format, 0, level)
+			info := t.imageInfo(ctx, s, vkFmt, 0, level)
 			if info == nil {
 				continue
 			}