| // Copyright (C) 2019 The Android Open Source Project |
| // |
| // Licensed under the Apache License, Version 2.0 (the "License"); |
| // you may not use this file except in compliance with the License. |
| // You may obtain a copy of the License at |
| // |
| // http://www.apache.org/licenses/LICENSE-2.0 |
| // |
| // Unless required by applicable law or agreed to in writing, software |
| // distributed under the License is distributed on an "AS IS" BASIS, |
| // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
| // See the License for the specific language governing permissions and |
| // limitations under the License. |
| |
| #include "host-common/MediaCudaVideoHelper.h" |
| #include "host-common/MediaCudaDriverHelper.h" |
| #include "host-common/MediaCudaUtils.h" |
| #include "host-common/YuvConverter.h" |
| #include "android/utils/debug.h" |
| |
| extern "C" { |
| #define INIT_CUDA_GL 1 |
| #include "host-common/dynlink_cuda.h" |
| #include "host-common/dynlink_cudaGL.h" |
| #include "host-common/dynlink_nvcuvid.h" |
| } |
| #define MEDIA_CUDA_DEBUG 0 |
| |
| #if MEDIA_CUDA_DEBUG |
| #define CUDA_DPRINT(fmt, ...) \ |
| fprintf(stderr, "media-cuda-video-helper: %s:%d " fmt "\n", __func__, \ |
| __LINE__, ##__VA_ARGS__); |
| #else |
| #define CUDA_DPRINT(fmt, ...) |
| #endif |
| |
| #define NVDEC_API_CALL(cuvidAPI) \ |
| do { \ |
| CUresult errorCode = cuvidAPI; \ |
| if (errorCode != CUDA_SUCCESS) { \ |
| CUDA_DPRINT("%s failed with error code %d\n", #cuvidAPI, \ |
| (int)errorCode); \ |
| } \ |
| } while (0) |
| |
| namespace android { |
| namespace emulation { |
| |
| bool MediaCudaVideoHelper::s_isCudaDecoderGood = true; |
| |
| using TextureFrame = MediaTexturePool::TextureFrame; |
| using FrameInfo = MediaSnapshotState::FrameInfo; |
| using ColorAspects = MediaSnapshotState::ColorAspects; |
| |
| MediaCudaVideoHelper::MediaCudaVideoHelper(OutputTreatmentMode oMode, |
| FrameStorageMode fMode, |
| cudaVideoCodec cudaVideoCodecType) |
| : mUseGpuTexture(fMode == FrameStorageMode::USE_GPU_TEXTURE), |
| mCudaVideoCodecType(cudaVideoCodecType) { |
| mIgnoreDecoderOutput = (oMode == OutputTreatmentMode::IGNORE_RESULT); |
| } |
| |
| MediaCudaVideoHelper::~MediaCudaVideoHelper() { |
| deInit(); |
| } |
| |
| void MediaCudaVideoHelper::deInit() { |
| CUDA_DPRINT("deInit calling"); |
| |
| mSavedDecodedFrames.clear(); |
| if (mCudaContext != nullptr) { |
| NVDEC_API_CALL(cuCtxPushCurrent(mCudaContext)); |
| if (mCudaParser != nullptr) { |
| NVDEC_API_CALL(cuvidDestroyVideoParser(mCudaParser)); |
| mCudaParser = nullptr; |
| } |
| |
| if (mCudaDecoder != nullptr) { |
| NVDEC_API_CALL(cuvidDestroyDecoder(mCudaDecoder)); |
| mCudaDecoder = nullptr; |
| } |
| NVDEC_API_CALL(cuCtxPopCurrent(NULL)); |
| NVDEC_API_CALL(cuvidCtxLockDestroy(mCtxLock)); |
| } |
| |
| if (mCudaContext != nullptr) { |
| CUresult myres = cuCtxDestroy(mCudaContext); |
| if (myres != CUDA_SUCCESS) { |
| CUDA_DPRINT("Failed to destroy cuda context; error code %d", |
| (int)myres); |
| } |
| mCudaContext = nullptr; |
| } |
| } |
| |
| bool MediaCudaVideoHelper::init() { |
| if (!s_isCudaDecoderGood) { |
| CUDA_DPRINT( |
| "Already verified: cuda decoder does not work on this host"); |
| return false; |
| } |
| if (!MediaCudaDriverHelper::initCudaDrivers()) { |
| CUDA_DPRINT("Failed to initCudaDrivers"); |
| mIsGood = false; |
| mErrorCode = 1; |
| s_isCudaDecoderGood = false; |
| return false; |
| } |
| |
| if (mCudaContext != nullptr) { |
| deInit(); |
| } |
| |
| // cudat stuff |
| const int gpuIndex = 0; |
| const int cudaFlags = 0; |
| CUdevice cudaDevice = 0; |
| CUresult myres = cuDeviceGet(&cudaDevice, gpuIndex); |
| if (myres != CUDA_SUCCESS) { |
| mIsGood = false; |
| mErrorCode = 2; |
| s_isCudaDecoderGood = false; |
| CUDA_DPRINT("Failed to get cuda device, error code %d", (int)myres); |
| return false; |
| } |
| |
| char buf[1024]; |
| myres = cuDeviceGetName(buf, sizeof(buf), cudaDevice); |
| if (myres != CUDA_SUCCESS) { |
| mIsGood = false; |
| mErrorCode = 3; |
| s_isCudaDecoderGood = false; |
| CUDA_DPRINT("Failed to get gpu device name, error code %d", (int)myres); |
| return false; |
| } |
| |
| CUDA_DPRINT("using gpu device %s", buf); |
| |
| myres = cuCtxCreate(&mCudaContext, cudaFlags, cudaDevice); |
| if (myres != CUDA_SUCCESS) { |
| mIsGood = false; |
| s_isCudaDecoderGood = false; |
| CUDA_DPRINT("Failed to create cuda context, error code %d", (int)myres); |
| return false; |
| } |
| |
| NVDEC_API_CALL(cuvidCtxLockCreate(&mCtxLock, mCudaContext)); |
| |
| CUVIDPARSERPARAMS videoParserParameters = {}; |
| // videoParserParameters.CodecType = (mType == MediaCodecType::VP8Codec) ? |
| // cudaVideoCodec_VP8 : cudaVideoCodec_VP9; |
| videoParserParameters.CodecType = mCudaVideoCodecType; |
| |
| videoParserParameters.ulMaxNumDecodeSurfaces = 1; |
| videoParserParameters.ulMaxDisplayDelay = 1; |
| videoParserParameters.pUserData = this; |
| videoParserParameters.pfnSequenceCallback = HandleVideoSequenceProc; |
| videoParserParameters.pfnDecodePicture = HandlePictureDecodeProc; |
| videoParserParameters.pfnDisplayPicture = HandlePictureDisplayProc; |
| NVDEC_API_CALL( |
| cuvidCreateVideoParser(&mCudaParser, &videoParserParameters)); |
| |
| CUDA_DPRINT("Successfully created cuda context %p", mCudaContext); |
| dprint("successfully created cuda video decoder for %s, with gpu texture " |
| "mode %s", |
| mCudaVideoCodecType == cudaVideoCodec_H264 |
| ? "H264" |
| : (mCudaVideoCodecType == cudaVideoCodec_VP8 ? "VP8" |
| : "VP9"), |
| mUseGpuTexture ? "on" : "off"); |
| |
| return true; |
| } |
| |
| void MediaCudaVideoHelper::decode(const uint8_t* frame, |
| size_t szBytes, |
| uint64_t inputPts) { |
| CUDA_DPRINT("%s(frame=%p, sz=%zu)", __func__, frame, szBytes); |
| |
| CUVIDSOURCEDATAPACKET packet = {0}; |
| packet.payload = frame; |
| packet.payload_size = szBytes; |
| packet.flags = CUVID_PKT_TIMESTAMP; |
| packet.timestamp = inputPts; |
| if (!frame || szBytes == 0) { |
| packet.flags |= CUVID_PKT_ENDOFSTREAM; |
| } else { |
| ++mNumInputFrame; |
| } |
| NVDEC_API_CALL(cuvidParseVideoData(mCudaParser, &packet)); |
| } |
| |
| void MediaCudaVideoHelper::flush() { |
| CUDA_DPRINT("started flushing"); |
| CUVIDSOURCEDATAPACKET packet = {0}; |
| packet.payload = NULL; |
| packet.payload_size = 0; |
| packet.flags |= CUVID_PKT_ENDOFSTREAM; |
| NVDEC_API_CALL(cuvidParseVideoData(mCudaParser, &packet)); |
| CUDA_DPRINT("done one flushing"); |
| } |
| |
| int MediaCudaVideoHelper::HandleVideoSequence(CUVIDEOFORMAT* pVideoFormat) { |
| int nDecodeSurface = 8; // need 8 for 4K video |
| |
| CUVIDDECODECAPS decodecaps; |
| memset(&decodecaps, 0, sizeof(decodecaps)); |
| |
| decodecaps.eCodecType = pVideoFormat->codec; |
| decodecaps.eChromaFormat = pVideoFormat->chroma_format; |
| decodecaps.nBitDepthMinus8 = pVideoFormat->bit_depth_luma_minus8; |
| |
| NVDEC_API_CALL(cuCtxPushCurrent(mCudaContext)); |
| NVDEC_API_CALL(cuvidGetDecoderCaps(&decodecaps)); |
| NVDEC_API_CALL(cuCtxPopCurrent(NULL)); |
| |
| if (!decodecaps.bIsSupported) { |
| mIsGood = false; |
| mErrorCode = 4; |
| CUDA_DPRINT("Codec not supported on this GPU."); |
| return nDecodeSurface; |
| } |
| |
| if ((pVideoFormat->coded_width > decodecaps.nMaxWidth) || |
| (pVideoFormat->coded_height > decodecaps.nMaxHeight)) { |
| CUDA_DPRINT("Resolution not supported on this GPU"); |
| mIsGood = false; |
| mErrorCode = 5; |
| return nDecodeSurface; |
| } |
| |
| if ((pVideoFormat->coded_width >> 4) * (pVideoFormat->coded_height >> 4) > |
| decodecaps.nMaxMBCount) { |
| CUDA_DPRINT("MBCount not supported on this GPU"); |
| mIsGood = false; |
| mErrorCode = 6; |
| return nDecodeSurface; |
| } |
| |
| mLumaWidth = |
| pVideoFormat->display_area.right - pVideoFormat->display_area.left; |
| mLumaHeight = |
| pVideoFormat->display_area.bottom - pVideoFormat->display_area.top; |
| mChromaHeight = mLumaHeight * 0.5; // NV12 |
| mBPP = pVideoFormat->bit_depth_luma_minus8 > 0 ? 2 : 1; |
| |
| if (mCudaVideoCodecType == cudaVideoCodec_H264) { |
| if (pVideoFormat->video_signal_description.video_full_range_flag) |
| mColorRange = 2; |
| else |
| mColorRange = 0; |
| |
| mColorPrimaries = |
| pVideoFormat->video_signal_description.color_primaries; |
| mColorTransfer = |
| pVideoFormat->video_signal_description.transfer_characteristics; |
| mColorSpace = |
| pVideoFormat->video_signal_description.matrix_coefficients; |
| } |
| |
| CUVIDDECODECREATEINFO videoDecodeCreateInfo = {0}; |
| videoDecodeCreateInfo.CodecType = pVideoFormat->codec; |
| videoDecodeCreateInfo.ChromaFormat = pVideoFormat->chroma_format; |
| videoDecodeCreateInfo.OutputFormat = cudaVideoSurfaceFormat_NV12; |
| CUDA_DPRINT("output format is %d", videoDecodeCreateInfo.OutputFormat); |
| videoDecodeCreateInfo.bitDepthMinus8 = pVideoFormat->bit_depth_luma_minus8; |
| if (pVideoFormat->progressive_sequence) |
| videoDecodeCreateInfo.DeinterlaceMode = cudaVideoDeinterlaceMode_Weave; |
| else |
| videoDecodeCreateInfo.DeinterlaceMode = |
| cudaVideoDeinterlaceMode_Adaptive; |
| videoDecodeCreateInfo.ulNumOutputSurfaces = 1; |
| // With PreferCUVID, JPEG is still decoded by CUDA while video is decoded by |
| // NVDEC hardware |
| videoDecodeCreateInfo.ulCreationFlags = cudaVideoCreate_PreferCUVID; |
| videoDecodeCreateInfo.ulNumDecodeSurfaces = nDecodeSurface; |
| videoDecodeCreateInfo.vidLock = mCtxLock; |
| videoDecodeCreateInfo.ulWidth = pVideoFormat->coded_width; |
| videoDecodeCreateInfo.ulHeight = pVideoFormat->coded_height; |
| if (mOutputHeight != mLumaHeight || mOutputWidth != mLumaWidth) { |
| CUDA_DPRINT("old width %d old height %d", mOutputWidth, mOutputHeight); |
| mOutputWidth = mLumaWidth; |
| mOutputHeight = mLumaHeight; |
| CUDA_DPRINT("new width %d new height %d", mOutputWidth, mOutputHeight); |
| unsigned int newOutBufferSize = mOutputWidth * mOutputHeight * 3 / 2; |
| if (mOutBufferSize < newOutBufferSize) { |
| mOutBufferSize = newOutBufferSize; |
| } |
| } |
| |
| videoDecodeCreateInfo.ulTargetWidth = pVideoFormat->coded_width; |
| videoDecodeCreateInfo.ulTargetHeight = pVideoFormat->coded_height; |
| |
| mSurfaceWidth = videoDecodeCreateInfo.ulTargetWidth; |
| mSurfaceHeight = videoDecodeCreateInfo.ulTargetHeight; |
| |
| NVDEC_API_CALL(cuCtxPushCurrent(mCudaContext)); |
| if (mCudaDecoder != nullptr) { |
| NVDEC_API_CALL(cuvidDestroyDecoder(mCudaDecoder)); |
| mCudaDecoder = nullptr; |
| } |
| { |
| size_t free, total; |
| cuMemGetInfo(&free, &total); |
| CUDA_DPRINT("free memory %g M, total %g M", free / 1048576.0, |
| total / 1048576.0); |
| } |
| NVDEC_API_CALL(cuCtxPopCurrent(NULL)); |
| NVDEC_API_CALL(cuCtxPushCurrent(mCudaContext)); |
| NVDEC_API_CALL(cuvidCreateDecoder(&mCudaDecoder, &videoDecodeCreateInfo)); |
| NVDEC_API_CALL(cuCtxPopCurrent(NULL)); |
| CUDA_DPRINT("successfully called. decoder %p", mCudaDecoder); |
| return nDecodeSurface; |
| } |
| |
| int MediaCudaVideoHelper::HandlePictureDecode(CUVIDPICPARAMS* pPicParams) { |
| NVDEC_API_CALL(cuvidDecodePicture(mCudaDecoder, pPicParams)); |
| CUDA_DPRINT("successfully called."); |
| return 1; |
| } |
| |
| int MediaCudaVideoHelper::HandlePictureDisplay(CUVIDPARSERDISPINFO* pDispInfo) { |
| if (mIgnoreDecoderOutput) { |
| return 1; |
| } |
| constexpr int MAX_NUM_INPUT_WITHOUT_OUTPUT = 16; |
| if (mNumOutputFrame == 0 && mNumInputFrame > MAX_NUM_INPUT_WITHOUT_OUTPUT) { |
| // after more than 16 inputs, there is still no output, |
| // probably corrupted stream, ignore everything from now on |
| dprint("WARNING: %d frames decoded witout any output, possibly bad " |
| "input stream. Ignore output frames (they might be corrupted) " |
| "from now on.", |
| MAX_NUM_INPUT_WITHOUT_OUTPUT); |
| return 0; |
| } |
| |
| CUVIDPROCPARAMS videoProcessingParameters = {}; |
| videoProcessingParameters.progressive_frame = pDispInfo->progressive_frame; |
| videoProcessingParameters.second_field = pDispInfo->repeat_first_field + 1; |
| videoProcessingParameters.top_field_first = pDispInfo->top_field_first; |
| videoProcessingParameters.unpaired_field = |
| pDispInfo->repeat_first_field < 0; |
| videoProcessingParameters.output_stream = 0; |
| uint64_t myOutputPts = pDispInfo->timestamp; |
| |
| CUdeviceptr dpSrcFrame = 0; |
| unsigned int nSrcPitch = 0; |
| CUresult errorCode = cuvidMapVideoFrame(mCudaDecoder, pDispInfo->picture_index, |
| &dpSrcFrame, &nSrcPitch, |
| &videoProcessingParameters); |
| if (errorCode != CUDA_SUCCESS) { |
| CUDA_DPRINT("failed to call cuvidMapVideoFrame with error code %d\n", (int)errorCode); |
| return 0; |
| } |
| |
| NVDEC_API_CALL(cuCtxPushCurrent(mCudaContext)); |
| unsigned int newOutBufferSize = mOutputWidth * mOutputHeight * 3 / 2; |
| std::vector<uint8_t> myFrame; |
| TextureFrame texFrame; |
| if (mUseGpuTexture && mTexturePool != nullptr) { |
| media_cuda_utils_copy_context my_copy_context{ |
| .src_frame = dpSrcFrame, |
| .src_pitch = nSrcPitch, |
| .src_surface_height = mSurfaceHeight, |
| .dest_width = mOutputWidth, |
| .dest_height = mOutputHeight, |
| }; |
| texFrame = mTexturePool->getTextureFrame(mOutputWidth, mOutputHeight); |
| mTexturePool->saveDecodedFrameToTexture( |
| texFrame, &my_copy_context, |
| (void*)media_cuda_utils_nv12_updater); |
| } else { |
| myFrame.resize(newOutBufferSize); |
| uint8_t* pDecodedFrame = &(myFrame[0]); |
| |
| CUDA_MEMCPY2D m = {0}; |
| m.srcMemoryType = CU_MEMORYTYPE_DEVICE; |
| m.srcDevice = dpSrcFrame; |
| m.srcPitch = nSrcPitch; |
| m.dstMemoryType = CU_MEMORYTYPE_HOST; |
| m.dstDevice = (CUdeviceptr)(m.dstHost = pDecodedFrame); |
| m.dstPitch = mOutputWidth * mBPP; |
| m.WidthInBytes = mOutputWidth * mBPP; |
| m.Height = mLumaHeight; |
| CUDA_DPRINT("dstDevice %p, dstPitch %d, WidthInBytes %d Height %d", |
| m.dstHost, (int)m.dstPitch, (int)m.WidthInBytes, |
| (int)m.Height); |
| |
| NVDEC_API_CALL(cuMemcpy2DAsync(&m, 0)); |
| |
| m.srcDevice = (CUdeviceptr)((uint8_t*)dpSrcFrame + |
| m.srcPitch * mSurfaceHeight); |
| m.dstDevice = (CUdeviceptr)(m.dstHost = pDecodedFrame + |
| m.dstPitch * mLumaHeight); |
| m.Height = mChromaHeight; |
| NVDEC_API_CALL(cuMemcpy2DAsync(&m, 0)); |
| YuvConverter<uint8_t> convert8(mOutputWidth, mOutputHeight); |
| convert8.UVInterleavedToPlanar(pDecodedFrame); |
| } |
| |
| NVDEC_API_CALL(cuStreamSynchronize(0)); |
| NVDEC_API_CALL(cuCtxPopCurrent(NULL)); |
| |
| NVDEC_API_CALL(cuvidUnmapVideoFrame(mCudaDecoder, dpSrcFrame)); |
| { |
| std::lock_guard<std::mutex> g(mFrameLock); |
| |
| mSavedDecodedFrames.push_back(MediaSnapshotState::FrameInfo{ |
| std::move(myFrame), |
| std::vector<uint32_t>{texFrame.Ytex, texFrame.UVtex}, |
| (int)mOutputWidth, (int)mOutputHeight, myOutputPts, |
| ColorAspects{mColorPrimaries, mColorRange, mColorTransfer, |
| mColorSpace}}); |
| } |
| ++mNumOutputFrame; |
| CUDA_DPRINT("successfully called."); |
| return 1; |
| } |
| |
| } // namespace emulation |
| } // namespace android |