Skip to content
This repository has been archived by the owner on Jun 10, 2024. It is now read-only.

Commit

Permalink
Refactoring PyNvCodec module
Browse files Browse the repository at this point in the history
  • Loading branch information
rarzumanyan committed Oct 8, 2021
1 parent 5742166 commit e19e85a
Show file tree
Hide file tree
Showing 15 changed files with 2,806 additions and 2,539 deletions.
10 changes: 10 additions & 0 deletions .clang-format
Original file line number Diff line number Diff line change
@@ -0,0 +1,10 @@
---
# We'll use defaults from the LLVM style, but with 4 columns indentation.
BasedOnStyle: LLVM
IndentWidth: 2
BreakBeforeBraces: Linux
---
Language: Cpp
# Force pointers to the type for C++.
DerivePointerAlignment: false
PointerAlignment: Left
18 changes: 9 additions & 9 deletions PyNvCodec/TC/src/MemoryInterfaces.cpp
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
/*
* Copyright 2019 NVIDIA Corporation
* Copyright 2021 Videonetics Technology Private Limited
*
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
Expand Down Expand Up @@ -79,12 +79,12 @@ struct AllocRegister {
}
};

AllocRegister BuffersRegister, HWSurfaceRegister, CudaBuffersRegiser;
AllocRegister BuffersRegister, HWSurfaceRegister, CudaBuffersRegister;

bool CheckAllocationCounters() {
auto numLeakedBuffers = BuffersRegister.GetSize();
auto numLeakedSurfaces = HWSurfaceRegister.GetSize();
auto numLeakedCudaBuffers = CudaBuffersRegiser.GetSize();
auto numLeakedCudaBuffers = CudaBuffersRegister.GetSize();

if (numLeakedBuffers) {
cerr << "Leaked buffers (id : size): " << endl;
Expand All @@ -105,10 +105,10 @@ bool CheckAllocationCounters() {
if (numLeakedCudaBuffers) {
cerr << "Leaked CUDA buffers (id : size): " << endl;
for (auto i = 0; i < numLeakedCudaBuffers; i++) {
auto pNote = CudaBuffersRegiser.GetNoteByIndex(i);
auto pNote = CudaBuffersRegister.GetNoteByIndex(i);
cerr << "\t" << pNote->id << "\t: " << pNote->size << endl;
}
}
}

return (0U == numLeakedBuffers) && (0U == numLeakedSurfaces) && (0U == numLeakedCudaBuffers);
}
Expand Down Expand Up @@ -309,7 +309,7 @@ bool CudaBuffer::Allocate() {

if (0U != gpuMem) {
#ifdef TRACK_TOKEN_ALLOCATIONS
id = CudaBuffersRegiser.AddNote(GetRawMemSize());
id = CudaBuffersRegister.AddNote(GetRawMemSize());
#endif
return true;
}
Expand All @@ -323,7 +323,7 @@ void CudaBuffer::Deallocate() {

#ifdef TRACK_TOKEN_ALLOCATIONS
AllocInfo info(id, GetRawMemSize());
CudaBuffersRegiser.DeleteNote(info);
CudaBuffersRegister.DeleteNote(info);
#endif
}

Expand Down Expand Up @@ -444,8 +444,8 @@ void SurfacePlane::Export(CUdeviceptr dst, uint32_t dst_pitch, CUcontext ctx,
}

SurfacePlane::SurfacePlane(uint32_t newWidth, uint32_t newHeight,
uint32_t newElemSize, uint32_t srcPitch,
CUdeviceptr src, CUcontext context, CUstream str)
uint32_t newElemSize, uint32_t srcPitch,
CUdeviceptr src, CUcontext context, CUstream str)
: SurfacePlane(newWidth, newHeight, newElemSize, context) {
Import(src, srcPitch, context, str);
}
Expand Down
173 changes: 30 additions & 143 deletions PyNvCodec/inc/PyNvCodec.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -61,6 +61,26 @@ class CuvidParserException : public std::runtime_error {
CuvidParserException() : std::runtime_error("HW reset") {}
};

class CudaResMgr
{
private:
CudaResMgr();

public:
CUcontext GetCtx(size_t idx);
CUstream GetStream(size_t idx);
~CudaResMgr();
static CudaResMgr& Instance();
static size_t GetNumGpus();

std::vector<std::pair<CUdevice, CUcontext>> g_Contexts;
std::vector<CUstream> g_Streams;

static std::mutex gInsMutex;
static std::mutex gCtxMutex;
static std::mutex gStrMutex;
};

class PyFrameUploader {
std::unique_ptr<CudaUploadFrame> uploader;
uint32_t surfaceWidth, surfaceHeight;
Expand Down Expand Up @@ -95,7 +115,7 @@ class PyBufferUploader {
CUstream str);

PyBufferUploader(uint32_t elemSize, uint32_t numElems,
size_t ctx, size_t str) :
size_t ctx, size_t str) :
PyBufferUploader(elemSize, numElems, (CUcontext)ctx, (CUstream)str) {}

std::shared_ptr<CudaBuffer> UploadSingleBuffer(py::array_t<uint8_t> &buffer);
Expand Down Expand Up @@ -136,7 +156,7 @@ class PyCudaBufferDownloader {
CUstream str);

PyCudaBufferDownloader(uint32_t elemSize, uint32_t numElems,
size_t ctx, size_t str) :
size_t ctx, size_t str) :
PyCudaBufferDownloader(elemSize, numElems, (CUcontext)ctx, (CUstream)str) {}

bool DownloadSingleCudaBuffer(std::shared_ptr<CudaBuffer> buffer,
Expand Down Expand Up @@ -275,11 +295,11 @@ class PyNvDecoder {
PyNvDecoder(pathToFile, (CUcontext)ctx, (CUstream)str, ffmpeg_options){}

static Buffer *getElementaryVideo(DemuxFrame *demuxer,
SeekContext &seek_ctx, bool needSEI);
SeekContext *seek_ctx, bool needSEI);

static Surface *getDecodedSurface(NvdecDecodeFrame *decoder,
DemuxFrame *demuxer,
SeekContext &seek_ctx, bool needSEI);
SeekContext *seek_ctx, bool needSEI);

uint32_t Width() const;

Expand All @@ -305,148 +325,15 @@ class PyNvDecoder {

Pixel_Format GetPixelFormat() const;

std::shared_ptr<Surface> DecodeSurfaceFromPacket(py::array_t<uint8_t> &packet,
py::array_t<uint8_t> &sei);

std::shared_ptr<Surface> DecodeSurfaceFromPacket(PacketData &enc_packet_data,
py::array_t<uint8_t> &packet,
py::array_t<uint8_t> &sei);

std::shared_ptr<Surface> DecodeSurfaceFromPacket(py::array_t<uint8_t> &packet,
py::array_t<uint8_t> &sei,
PacketData &pkt_data);

std::shared_ptr<Surface> DecodeSurfaceFromPacket(PacketData &enc_packet_data,
py::array_t<uint8_t> &packet,
py::array_t<uint8_t> &sei,
PacketData &pkt_data);

std::shared_ptr<Surface> DecodeSurfaceFromPacket(py::array_t<uint8_t> &packet);

std::shared_ptr<Surface> DecodeSurfaceFromPacket(PacketData &enc_packet_data,
py::array_t<uint8_t> &packet);

std::shared_ptr<Surface> DecodeSurfaceFromPacket(py::array_t<uint8_t> &packet,
PacketData &pkt_data);

std::shared_ptr<Surface> DecodeSurfaceFromPacket(PacketData &enc_packet_data,
py::array_t<uint8_t> &packet,
PacketData &pkt_data);

std::shared_ptr<Surface> DecodeSingleSurface(py::array_t<uint8_t> &sei);

std::shared_ptr<Surface> DecodeSingleSurface(py::array_t<uint8_t> &sei,
PacketData &pkt_data);

std::shared_ptr<Surface> DecodeSingleSurface(py::array_t<uint8_t> &sei,
SeekContext &ctx);

std::shared_ptr<Surface> DecodeSingleSurface(py::array_t<uint8_t> &sei,
SeekContext &ctx,
PacketData &pkt_data);

std::shared_ptr<Surface> DecodeSingleSurface();

std::shared_ptr<Surface> DecodeSingleSurface(PacketData &pkt_data);

std::shared_ptr<Surface> DecodeSingleSurface(SeekContext &ctx);

std::shared_ptr<Surface> DecodeSingleSurface(SeekContext &ctx,
PacketData &pkt_data);

bool DecodeSingleFrame(py::array_t<uint8_t> &frame,
py::array_t<uint8_t> &sei);

bool DecodeSingleFrame(py::array_t<uint8_t> &frame,
py::array_t<uint8_t> &sei,
PacketData &pkt_data);

bool DecodeSingleFrame(py::array_t<uint8_t> &frame,
py::array_t<uint8_t> &sei,
SeekContext &ctx);
bool DecodeSurface(class DecodeContext &ctx);

bool DecodeSingleFrame(py::array_t<uint8_t> &frame,
py::array_t<uint8_t> &sei,
SeekContext &ctx,
PacketData &pkt_data);
bool DecodeFrame(class DecodeContext &ctx, py::array_t<uint8_t>& frame);

bool DecodeSingleFrame(py::array_t<uint8_t> &frame);

bool DecodeSingleFrame(py::array_t<uint8_t> &frame,
PacketData &pkt_data);

bool DecodeSingleFrame(py::array_t<uint8_t> &frame,
SeekContext &ctx);

bool DecodeSingleFrame(py::array_t<uint8_t> &frame,
SeekContext &ctx,
PacketData &pkt_data);

bool DecodeFrameFromPacket(py::array_t<uint8_t> &frame,
py::array_t<uint8_t> &packet,
py::array_t<uint8_t> &sei);

bool DecodeFrameFromPacket(py::array_t<uint8_t> &frame,
PacketData &enc_packet_data,
py::array_t<uint8_t> &packet,
py::array_t<uint8_t> &sei);

bool DecodeFrameFromPacket(py::array_t<uint8_t> &frame,
py::array_t<uint8_t> &packet,
py::array_t<uint8_t> &sei,
PacketData &pkt_data);

bool DecodeFrameFromPacket(py::array_t<uint8_t> &frame,
PacketData &enc_packet_data,
py::array_t<uint8_t> &packet,
py::array_t<uint8_t> &sei,
PacketData &pkt_data);

bool DecodeFrameFromPacket(py::array_t<uint8_t> &frame,
py::array_t<uint8_t> &packet);

bool DecodeFrameFromPacket(py::array_t<uint8_t> &frame,
PacketData &enc_packet_data,
py::array_t<uint8_t> &packet);

bool DecodeFrameFromPacket(py::array_t<uint8_t> &frame,
py::array_t<uint8_t> &packet,
PacketData &pkt_data);

bool DecodeFrameFromPacket(py::array_t<uint8_t> &frame,
PacketData &enc_packet_data,
py::array_t<uint8_t> &packet,
PacketData &pkt_data);

bool FlushSingleFrame(py::array_t<uint8_t> &frame);

bool FlushSingleFrame(py::array_t<uint8_t> &frame, PacketData &pkt_data);

std::shared_ptr<Surface> FlushSingleSurface();

std::shared_ptr<Surface> FlushSingleSurface(PacketData &pkt_data);

private:
bool DecodeSurface(struct DecodeContext &ctx);

Surface *getDecodedSurfaceFromPacket(py::array_t<uint8_t> *pPacket,
PacketData *p_packet_data = nullptr,
Surface *getDecodedSurfaceFromPacket(const py::array_t<uint8_t> *pPacket,
const PacketData *p_packet_data = nullptr,
bool no_eos = false);
};

struct EncodeContext {
std::shared_ptr<Surface> rawSurface;
py::array_t<uint8_t> *pPacket;
const py::array_t<uint8_t> *pMessageSEI;
bool sync;
bool append;

EncodeContext(std::shared_ptr<Surface> spRawSurface,
py::array_t<uint8_t> *packet,
const py::array_t<uint8_t> *messageSEI, bool is_sync,
bool is_append)
: rawSurface(spRawSurface), pPacket(packet), pMessageSEI(messageSEI),
sync(is_sync), append(is_append) {}
void DownloaderLazyInit();
};

class PyNvEncoder {
Expand Down Expand Up @@ -523,5 +410,5 @@ class PyNvEncoder {
bool FlushSinglePacket(py::array_t<uint8_t> &packet);

private:
bool EncodeSingleSurface(EncodeContext &ctx);
bool EncodeSingleSurface(struct EncodeContext &ctx);
};
10 changes: 10 additions & 0 deletions PyNvCodec/src/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -16,5 +16,15 @@

set(PYNVCODEC_SOURCES
${CMAKE_CURRENT_SOURCE_DIR}/PyNvCodec.cpp
${CMAKE_CURRENT_SOURCE_DIR}/PyFrameUploader.cpp
${CMAKE_CURRENT_SOURCE_DIR}/PyBufferUploader.cpp
${CMAKE_CURRENT_SOURCE_DIR}/PySurfaceDownloader.cpp
${CMAKE_CURRENT_SOURCE_DIR}/PyCudaBufferDownloader.cpp
${CMAKE_CURRENT_SOURCE_DIR}/PySurfaceConverter.cpp
${CMAKE_CURRENT_SOURCE_DIR}/PySurfaceResizer.cpp
${CMAKE_CURRENT_SOURCE_DIR}/PyFFMpegDecoder.cpp
${CMAKE_CURRENT_SOURCE_DIR}/PyFFMpegDemuxer.cpp
${CMAKE_CURRENT_SOURCE_DIR}/PyNvDecoder.cpp
${CMAKE_CURRENT_SOURCE_DIR}/PyNvEncoder.cpp
PARENT_SCOPE
)
74 changes: 74 additions & 0 deletions PyNvCodec/src/PyBufferUploader.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,74 @@
/*
* Copyright 2019 NVIDIA Corporation
* Copyright 2021 Kognia Sports Intelligence
* Copyright 2021 Videonetics Technology Private Limited
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
* http://www.apache.org/licenses/LICENSE-2.0
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

#include "PyNvCodec.hpp"

using namespace std;
using namespace VPF;
using namespace chrono;

namespace py = pybind11;

constexpr auto TASK_EXEC_SUCCESS = TaskExecStatus::TASK_EXEC_SUCCESS;
constexpr auto TASK_EXEC_FAIL = TaskExecStatus::TASK_EXEC_FAIL;

PyBufferUploader::PyBufferUploader(uint32_t elemSize, uint32_t numElems,
uint32_t gpu_ID)
{
elem_size = elemSize;
num_elems = numElems;

uploader.reset(UploadBuffer::Make(CudaResMgr::Instance().GetStream(gpu_ID),
CudaResMgr::Instance().GetCtx(gpu_ID),
elem_size, num_elems));
}

PyBufferUploader::PyBufferUploader(uint32_t elemSize, uint32_t numElems,
CUcontext ctx, CUstream str)
{
elem_size = elemSize;
num_elems = numElems;

uploader.reset(UploadBuffer::Make(str, ctx, elem_size, num_elems));
}

shared_ptr<CudaBuffer>
PyBufferUploader::UploadSingleBuffer(py::array_t<uint8_t>& frame)
{
auto pRawBuf = Buffer::Make(frame.size(), frame.mutable_data());
uploader->SetInput(pRawBuf, 0U);
auto res = uploader->Execute();
delete pRawBuf;

if (TASK_EXEC_FAIL == res)
throw runtime_error("Error uploading frame to GPU");

auto pCudaBuffer = (CudaBuffer*)uploader->GetOutput(0U);
if (!pCudaBuffer)
throw runtime_error("Error uploading frame to GPU");

return shared_ptr<CudaBuffer>(pCudaBuffer->Clone());
}

void Init_PyBufferUploader(py::module& m)
{
py::class_<PyBufferUploader>(m, "PyBufferUploader")
.def(py::init<uint32_t, uint32_t, uint32_t>())
.def(py::init<uint32_t, uint32_t, size_t, size_t>())
.def("UploadSingleBuffer", &PyBufferUploader::UploadSingleBuffer,
py::return_value_policy::take_ownership,
py::call_guard<py::gil_scoped_release>());
}
Loading

0 comments on commit e19e85a

Please sign in to comment.