I am trying to develop multi-streaming H.264 video player based on cudaDecodeD3D9 example from NVIDIA GPU Computing SDK 4.2.
Application works correctly with a few streams but it raises assertion (CUDA_ERROR_OUT_OF_MEMORY) in cuvidCreateDecoder function for 12 streams with resolution 800x600 or 9 streams with resolution 1920x1080. cudaMemGetInfo returns 387MB (for video card with 1GB) and 1.3Gb (for video card with 2GB) available memory. Does memory fragmentation cause this? How can I use available memory?
VideoDecoder::VideoDecoder(const CUVIDEOFORMAT & rVideoFormat,
CUcontext &rContext,
cudaVideoCreateFlags eCreateFlags,
CUvideoctxlock &vidCtxLock)
: m_VidCtxLock(vidCtxLock)
{
// get a copy of the CUDA context
m_Context = rContext;
m_VideoCreateFlags = eCreateFlags;
// Fill the decoder-create-info struct from the given video-format struct.
memset(&oVideoDecodeCreateInfo_, 0, sizeof(CUVIDDECODECREATEINFO));
// Create video decoder
oVideoDecodeCreateInfo_.CodecType = rVideoFormat.codec;
oVideoDecodeCreateInfo_.ulWidth = rVideoFormat.coded_width;
oVideoDecodeCreateInfo_.ulHeight = rVideoFormat.coded_height;
oVideoDecodeCreateInfo_.ulNumDecodeSurfaces = FrameQueue::cnMaximumSize;
// Limit decode memory to 24MB (16M pixels at 4:2:0 = 24M bytes)
while (oVideoDecodeCreateInfo_.ulNumDecodeSurfaces * rVideoFormat.coded_width * rVideoFormat.coded_height > 16*1024*1024)
{
oVideoDecodeCreateInfo_.ulNumDecodeSurfaces--;
}
oVideoDecodeCreateInfo_.ChromaFormat = rVideoFormat.chroma_format;
oVideoDecodeCreateInfo_.OutputFormat = cudaVideoSurfaceFormat_NV12;
oVideoDecodeCreateInfo_.DeinterlaceMode = cudaVideoDeinterlaceMode_Adaptive;
// No scaling
oVideoDecodeCreateInfo_.ulTargetWidth = oVideoDecodeCreateInfo_.ulWidth;
oVideoDecodeCreateInfo_.ulTargetHeight = oVideoDecodeCreateInfo_.ulHeight;
oVideoDecodeCreateInfo_.ulNumOutputSurfaces = MAX_FRAME_COUNT; // We won't simultaneously map more than 8 surfaces
oVideoDecodeCreateInfo_.ulCreationFlags = m_VideoCreateFlags;
oVideoDecodeCreateInfo_.vidLock = m_VidCtxLock;
size_t available, total;
cudaMemGetInfo(&available, &total);
// create the decoder
CUresult oResult = cuvidCreateDecoder(&oDecoder_, &oVideoDecodeCreateInfo_);
assert(CUDA_SUCCESS == oResult);
}
Can cuvidCreateDecoder work with resolution over 1920x1080? When I try 2560x1920 stream cuvidCreateDecoder asserts CUDA_ERROR_INVALID_SOURCE.
For the memory issue, see this answer.
For the question on resolution, Compute Capability 2.0 and earlier GPUs do not support larger than HD resolution for cudaDecodeD3D9
. This is why you can't decode a 2560x1920 stream.
Kepler GPUs can support much larger resolutions.