Skip to content
This repository has been archived by the owner on Jun 10, 2024. It is now read-only.

Encoder produces black frame the second time running in the same process #509

Open
david-molnar-oculai opened this issue Aug 2, 2023 · 0 comments

Comments

@david-molnar-oculai
Copy link

david-molnar-oculai commented Aug 2, 2023

Describe the bug
I use the code below to decode and encode a video file. The video has 1 fps and contains 4 frames. The first encoding works as expected, but when the processing is called again on the same file (or any other), the first frame of the produced video is black (the rest is fine). As far as I understand, the problem is caused by converting the surface to tensor and back. Without that conversion the problem doesn't occur.

To Reproduce
See the provided code (python3 code.py). Sample mkv: https://1drv.ms/u/s!At82OVPhN7VajIhfttp06Xb4lx-gVw?e=4bpERG
Store the sample mkv in the same folder as the code.py and name it sample.mkv. The output will be stored in the same folder under output.mkv.

Expected behavior
The first frame of the video is encoded correctly.

Desktop (please complete the following information):

  • OS: Linux (AWS g4dn instance with Tesla T4, 16GB RAM and 4 vCPUs)
  • Nvidia driver version: 535.54.03
  • CUDA Version: 12.2
  • Python Version: 3.8
import torch
import subprocess
import numpy as np
import PyNvCodec as nvc
import PytorchNvCodec as pnvc


class cconverter:
    """
    Colorspace conversion chain.
    """

    def __init__(self, width: int, height: int, gpu_id: int):
        self.gpu_id = gpu_id
        self.w = width
        self.h = height
        self.chain = []

    def add(self, src_fmt: nvc.PixelFormat, dst_fmt: nvc.PixelFormat) -> None:
        self.chain.append(
            nvc.PySurfaceConverter(self.w, self.h, src_fmt, dst_fmt, self.gpu_id)
        )

    def run(self, src_surface: nvc.Surface) -> nvc.Surface:
        surf = src_surface
        cc = nvc.ColorspaceConversionContext(nvc.ColorSpace.BT_601, nvc.ColorRange.MPEG)

        for cvt in self.chain:
            surf = cvt.Execute(surf, cc)
            if surf.Empty():
                raise RuntimeError("Failed to perform color conversion")

        return surf.Clone(self.gpu_id)


def surface_to_tensor(surface: nvc.Surface) -> torch.Tensor:
    """
    Converts planar rgb surface to cuda float tensor.
    """
    if surface.Format() != nvc.PixelFormat.RGB_PLANAR:
        raise RuntimeError("Surface shall be of RGB_PLANAR pixel format")

    surf_plane = surface.PlanePtr()
    img_tensor = pnvc.DptrToTensor(
        surf_plane.GpuMem(),
        surf_plane.Width(),
        surf_plane.Height(),
        surf_plane.Pitch(),
        surf_plane.ElemSize(),
    )
    if img_tensor is None:
        raise RuntimeError("Can not export to tensor.")

    img_tensor.resize_(3, int(surf_plane.Height() / 3), surf_plane.Width())
    img_tensor = img_tensor.type(dtype=torch.cuda.FloatTensor)
    img_tensor = torch.divide(img_tensor, 255.0)
    img_tensor = torch.clamp(img_tensor, 0.0, 1.0)

    return img_tensor


def tensor_to_surface(img_tensor: torch.tensor, gpu_id: int) -> nvc.Surface:
    """
    Converts cuda float tensor to planar rgb surface.
    """
    if len(img_tensor.shape) != 3 and img_tensor.shape[0] != 3:
        raise RuntimeError("Shape of the tensor must be (3, height, width)")

    tensor_w, tensor_h = img_tensor.shape[2], img_tensor.shape[1]
    img = torch.clamp(img_tensor, 0.0, 1.0)
    img = torch.multiply(img, 255.0)
    img = img.type(dtype=torch.cuda.ByteTensor)

    surface = nvc.Surface.Make(nvc.PixelFormat.RGB_PLANAR, tensor_w, tensor_h, gpu_id)
    surf_plane = surface.PlanePtr()
    pnvc.TensorToDptr(
        img,
        surf_plane.GpuMem(),
        surf_plane.Width(),
        surf_plane.Height(),
        surf_plane.Pitch(),
        surf_plane.ElemSize(),
    )

    return surface


def process():
    w = 2560
    h = 1920
    gpu_id = 0

    to_rgb = cconverter(w, h, gpu_id)
    to_rgb.add(nvc.PixelFormat.NV12, nvc.PixelFormat.YUV420)
    to_rgb.add(nvc.PixelFormat.YUV420, nvc.PixelFormat.RGB)
    to_rgb.add(nvc.PixelFormat.RGB, nvc.PixelFormat.RGB_PLANAR)

    to_nv12 = cconverter(w, h, gpu_id)
    to_nv12.add(nvc.PixelFormat.RGB_PLANAR, nvc.PixelFormat.RGB)
    to_nv12.add(nvc.PixelFormat.RGB, nvc.PixelFormat.YUV420)
    to_nv12.add(nvc.PixelFormat.YUV420, nvc.PixelFormat.NV12)

    dstFile = open("output.h264", "wb")

    encFrame = np.ndarray(shape=(0), dtype=np.uint8)
    nvEnc = nvc.PyNvEncoder({
            "preset": "default",
            "codec": "h264",
            "s": f"{str(w)}x{str(h)}",
            "bitrate": "5M",
            "fps": '1'
    }, gpu_id)

    nvDec = nvc.PyNvDecoder(w, h, nvc.PixelFormat.NV12, nvc.CudaVideoCodec.H264, gpu_id)

    packet = np.ndarray(shape=(0), dtype=np.uint8)
    pdata_in, pdata_out = nvc.PacketData(), nvc.PacketData()

    nvDmx = nvc.PyFFmpegDemuxer('sample.mkv')

    while True:
        if not nvDmx.DemuxSinglePacket(packet):
            break

        # Get last packet data to obtain frame timestamp
        nvDmx.LastPacketData(pdata_in)

        src_surface = nvDec.DecodeSurfaceFromPacket(pdata_in, packet, pdata_out)
        if not src_surface.Empty():
            # Convert to planar RGB
            rgb_pln = to_rgb.run(src_surface)
            if rgb_pln.Empty():
                break

            src_tensor = surface_to_tensor(rgb_pln)
            dst_tensor = src_tensor
            surface_rgb = tensor_to_surface(dst_tensor, gpu_id)

            # Convert back to NV12
            dst_surface = to_nv12.run(surface_rgb)
            if src_surface.Empty():
                break

            # Encode
            success = nvEnc.EncodeSingleSurface(dst_surface, encFrame)
            if success:
                byteArray = bytearray(encFrame)
                dstFile.write(byteArray)
            
    while True:
        src_surface = nvDec.FlushSingleSurface()
        if src_surface.Empty():
            break
            # Convert to planar RGB
        rgb_pln = to_rgb.run(src_surface)
        if rgb_pln.Empty():
            break

        src_tensor = surface_to_tensor(rgb_pln)
        dst_tensor = src_tensor
        surface_rgb = tensor_to_surface(dst_tensor, gpu_id)

        # Convert back to NV12
        dst_surface = to_nv12.run(surface_rgb)
        if src_surface.Empty():
            break

        # Encode
        success = nvEnc.EncodeSingleSurface(dst_surface, encFrame)
        if success:
            byteArray = bytearray(encFrame)
            dstFile.write(byteArray)

    while True:
        success = nvEnc.FlushSinglePacket(encFrame)
        if success:
            byteArray = bytearray(encFrame)
            dstFile.write(byteArray)
        else:
            break


    dstFile.close()
    subprocess.run(f"ffmpeg -hide_banner -r 1 -i output.h264 -c copy -y output.mkv".split(' '))


process()
process()
Sign up for free to subscribe to this conversation on GitHub. Already have an account? Sign in.
Labels
None yet
Projects
None yet
Development

No branches or pull requests

1 participant