| // Copyright (C) 2019 The Android Open Source Project |
| // |
| // Licensed under the Apache License, Version 2.0 (the "License"); |
| // you may not use this file except in compliance with the License. |
| // You may obtain a copy of the License at |
| // |
| // http://www.apache.org/licenses/LICENSE-2.0 |
| // |
| // Unless required by applicable law or agreed to in writing, software |
| // distributed under the License is distributed on an "AS IS" BASIS, |
| // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
| // See the License for the specific language governing permissions and |
| // limitations under the License. |
| |
| #include "host-common/MediaH264DecoderCuvid.h" |
| #include "host-common/H264NaluParser.h" |
| #include "host-common/YuvConverter.h" |
| #include "android/main-emugl.h" |
| // MediaH264DecoderCuvid.h |
| #include <cstdint> |
| #include <string> |
| #include <vector> |
| |
| #ifdef _WIN32 |
| #define WIN32_LEAN_AND_MEAN 1 |
| #include <windows.h> |
| #include <winioctl.h> |
| #endif |
| |
| #include <stdio.h> |
| #include <string.h> |
| |
| extern "C" { |
| #define INIT_CUDA_GL 1 |
| #include "host-common/dynlink_cuda.h" |
| #include "host-common/dynlink_cudaGL.h" |
| #include "host-common/dynlink_nvcuvid.h" |
| } |
| #define MEDIA_H264_DEBUG 0 |
| |
| #if MEDIA_H264_DEBUG |
| #define H264_DPRINT(fmt, ...) \ |
| fprintf(stderr, "h264-cuvid-dec: %s:%d " fmt "\n", __func__, __LINE__, \ |
| ##__VA_ARGS__); |
| #else |
| #define H264_DPRINT(fmt, ...) |
| #endif |
| |
| #define NVDEC_API_CALL(cuvidAPI) \ |
| do { \ |
| CUresult errorCode = cuvidAPI; \ |
| if (errorCode != CUDA_SUCCESS) { \ |
| H264_DPRINT("%s failed with error code %d\n", #cuvidAPI, \ |
| (int)errorCode); \ |
| } \ |
| } while (0) |
| |
| namespace android { |
| namespace emulation { |
| |
| using InitContextParam = H264PingInfoParser::InitContextParam; |
| using DecodeFrameParam = H264PingInfoParser::DecodeFrameParam; |
| using ResetParam = H264PingInfoParser::ResetParam; |
| using GetImageParam = H264PingInfoParser::GetImageParam; |
| using TextureFrame = MediaHostRenderer::TextureFrame; |
| |
| MediaH264DecoderCuvid::MediaH264DecoderCuvid(uint64_t id, |
| H264PingInfoParser parser) |
| : mId(id), mParser(parser) { |
| auto useGpuTextureEnv = android::base::System::getEnvironmentVariable( |
| "ANDROID_EMU_CODEC_USE_GPU_TEXTURE"); |
| if (useGpuTextureEnv != "") { |
| if (mParser.version() == 200) { |
| if (emuglConfig_get_current_renderer() == SELECTED_RENDERER_HOST) { |
| mUseGpuTexture = true; |
| } else { |
| H264_DPRINT( |
| "cannot use gpu texture to save decoded frame in " |
| "non-host gpu mode"); |
| if (emuglConfig_get_current_renderer() == |
| SELECTED_RENDERER_SWIFTSHADER_INDIRECT) { |
| H264_DPRINT("your gpu mode is: swiftshader_indirect"); |
| } |
| } |
| } |
| } |
| }; |
| |
| MediaH264DecoderPlugin* MediaH264DecoderCuvid::clone() { |
| return new MediaH264DecoderCuvid(mId, mParser); |
| }; |
| |
| MediaH264DecoderCuvid::~MediaH264DecoderCuvid() { |
| destroyH264Context(); |
| } |
| |
| void MediaH264DecoderCuvid::reset(void* ptr) { |
| destroyH264Context(); |
| ResetParam param{}; |
| mParser.parseResetParams(ptr, param); |
| initH264ContextInternal(param.width, param.height, param.outputWidth, |
| param.outputHeight, param.outputPixelFormat); |
| } |
| |
| void MediaH264DecoderCuvid::initH264Context(void* ptr) { |
| InitContextParam param{}; |
| mParser.parseInitContextParams(ptr, param); |
| initH264ContextInternal(param.width, param.height, param.outputWidth, |
| param.outputHeight, param.outputPixelFormat); |
| } |
| |
| void MediaH264DecoderCuvid::initH264ContextInternal(unsigned int width, |
| unsigned int height, |
| unsigned int outWidth, |
| unsigned int outHeight, |
| PixelFormat outPixFmt) { |
| if (!initCudaDrivers()) { |
| H264_DPRINT("Failed to initH264Context because driver is not working"); |
| return; |
| } |
| |
| if (mCudaContext != nullptr) { |
| destroyH264Context(); |
| } |
| H264_DPRINT("%s(w=%u h=%u out_w=%u out_h=%u pixfmt=%u)", __func__, width, |
| height, outWidth, outHeight, (uint8_t)outPixFmt); |
| mWidth = width; |
| mHeight = height; |
| |
| mOutputWidth = outWidth; |
| mOutputHeight = outHeight; |
| mOutPixFmt = outPixFmt; |
| mOutBufferSize = outWidth * outHeight * 3 / 2; |
| |
| // cudat stuff |
| const int gpuIndex = 0; |
| const int cudaFlags = 0; |
| CUdevice cudaDevice = 0; |
| CUresult myres = cuDeviceGet(&cudaDevice, gpuIndex); |
| if (myres != CUDA_SUCCESS) { |
| H264_DPRINT("Failed to get cuda device, error code %d", (int)myres); |
| return; |
| } |
| |
| char buf[1024]; |
| myres = cuDeviceGetName(buf, sizeof(buf), cudaDevice); |
| if (myres != CUDA_SUCCESS) { |
| H264_DPRINT("Failed to get gpu device name, error code %d", (int)myres); |
| return; |
| } |
| |
| H264_DPRINT("using gpu device %s", buf); |
| |
| myres = cuCtxCreate(&mCudaContext, cudaFlags, cudaDevice); |
| if (myres != CUDA_SUCCESS) { |
| H264_DPRINT("Failed to create cuda context, error code %d", (int)myres); |
| } |
| |
| NVDEC_API_CALL(cuvidCtxLockCreate(&mCtxLock, mCudaContext)); |
| |
| CUVIDPARSERPARAMS videoParserParameters = {}; |
| videoParserParameters.CodecType = cudaVideoCodec_H264; |
| videoParserParameters.ulMaxNumDecodeSurfaces = 1; |
| videoParserParameters.ulMaxDisplayDelay = 1; |
| videoParserParameters.pUserData = this; |
| videoParserParameters.pfnSequenceCallback = HandleVideoSequenceProc; |
| videoParserParameters.pfnDecodePicture = HandlePictureDecodeProc; |
| videoParserParameters.pfnDisplayPicture = HandlePictureDisplayProc; |
| NVDEC_API_CALL( |
| cuvidCreateVideoParser(&mCudaParser, &videoParserParameters)); |
| |
| H264_DPRINT("Successfully created cuda context %p", mCudaContext); |
| } |
| |
| void MediaH264DecoderCuvid::destroyH264Context() { |
| H264_DPRINT("destroyH264Context calling"); |
| |
| for (auto texFrame : mSavedTexFrames) { |
| mRenderer.putTextureFrame(texFrame); |
| } |
| mRenderer.cleanUpTextures(); |
| mSavedTexFrames.clear(); |
| if (mCudaContext != nullptr) { |
| NVDEC_API_CALL(cuCtxPushCurrent(mCudaContext)); |
| if (mCudaParser != nullptr) { |
| NVDEC_API_CALL(cuvidDestroyVideoParser(mCudaParser)); |
| mCudaParser = nullptr; |
| } |
| |
| if (mCudaDecoder != nullptr) { |
| NVDEC_API_CALL(cuvidDestroyDecoder(mCudaDecoder)); |
| mCudaDecoder = nullptr; |
| } |
| NVDEC_API_CALL(cuCtxPopCurrent(NULL)); |
| NVDEC_API_CALL(cuvidCtxLockDestroy(mCtxLock)); |
| } |
| |
| if (mCudaContext != nullptr) { |
| CUresult myres = cuCtxDestroy(mCudaContext); |
| if (myres != CUDA_SUCCESS) { |
| H264_DPRINT("Failed to destroy cuda context; error code %d", |
| (int)myres); |
| } |
| mCudaContext = nullptr; |
| } |
| } |
| |
| void MediaH264DecoderCuvid::decodeFrame(void* ptr) { |
| DecodeFrameParam param{}; |
| mParser.parseDecodeFrameParams(ptr, param); |
| |
| const uint8_t* frame = param.pData; |
| size_t szBytes = param.size; |
| uint64_t inputPts = param.pts; |
| |
| const bool enableSnapshot = true; |
| if (enableSnapshot) { |
| std::vector<uint8_t> v; |
| v.assign(frame, frame + szBytes); |
| bool hasSps = H264NaluParser::checkSpsFrame(frame, szBytes); |
| if (hasSps) { |
| mSnapshotState = SnapshotState{}; |
| mSnapshotState.saveSps(v); |
| } else { |
| bool hasPps = H264NaluParser::checkPpsFrame(frame, szBytes); |
| if (hasPps) { |
| mSnapshotState.savePps(v); |
| mSnapshotState.savedPackets.clear(); |
| mSnapshotState.savedDecodedFrame.data.clear(); |
| } else { |
| bool isIFrame = H264NaluParser::checkIFrame(frame, szBytes); |
| if (isIFrame) { |
| mSnapshotState.savedPackets.clear(); |
| } |
| mSnapshotState.savePacket(std::move(v), inputPts); |
| H264_DPRINT("saving packet; total is %d", |
| (int)(mSnapshotState.savedPackets.size())); |
| } |
| } |
| } |
| |
| decodeFrameInternal(param.pConsumedBytes, param.pDecoderErrorCode, frame, |
| szBytes, inputPts); |
| } |
| |
| void MediaH264DecoderCuvid::decodeFrameInternal(uint64_t* pRetSzBytes, |
| int32_t* pRetErr, |
| const uint8_t* frame, |
| size_t szBytes, |
| uint64_t inputPts) { |
| mIsInFlush = false; |
| H264_DPRINT("%s(frame=%p, sz=%zu)", __func__, frame, szBytes); |
| Err h264Err = Err::NoErr; |
| |
| CUVIDSOURCEDATAPACKET packet = {0}; |
| packet.payload = frame; |
| packet.payload_size = szBytes; |
| packet.flags = CUVID_PKT_TIMESTAMP; |
| packet.timestamp = inputPts; |
| if (!frame || szBytes == 0) { |
| packet.flags |= CUVID_PKT_ENDOFSTREAM; |
| } |
| NVDEC_API_CALL(cuvidParseVideoData(mCudaParser, &packet)); |
| if (pRetSzBytes) { |
| *pRetSzBytes = szBytes; |
| } |
| if (pRetErr) { |
| *pRetErr = (int32_t)h264Err; |
| } |
| } |
| |
| void MediaH264DecoderCuvid::doFlush() { |
| if (!mIsInFlush) { |
| return; |
| } |
| H264_DPRINT("started flushing"); |
| CUVIDSOURCEDATAPACKET packet = {0}; |
| packet.payload = NULL; |
| packet.payload_size = 0; |
| packet.flags |= CUVID_PKT_ENDOFSTREAM; |
| NVDEC_API_CALL(cuvidParseVideoData(mCudaParser, &packet)); |
| H264_DPRINT("done one flushing"); |
| } |
| |
| void MediaH264DecoderCuvid::flush(void* ptr) { |
| mIsInFlush = true; |
| doFlush(); |
| } |
| |
| void MediaH264DecoderCuvid::getImage(void* ptr) { |
| H264_DPRINT("getImage %p", ptr); |
| GetImageParam param{}; |
| mParser.parseGetImageParams(ptr, param); |
| |
| int* retErr = param.pDecoderErrorCode; |
| uint32_t* retWidth = param.pRetWidth; |
| uint32_t* retHeight = param.pRetHeight; |
| uint64_t* retPts = param.pRetPts; |
| uint32_t* retColorPrimaries = param.pRetColorPrimaries; |
| uint32_t* retColorRange = param.pRetColorRange; |
| uint32_t* retColorTransfer = param.pRetColorTransfer; |
| uint32_t* retColorSpace = param.pRetColorSpace; |
| |
| static int numbers = 0; |
| H264_DPRINT("calling getImage %d colorbuffer %d", numbers++, |
| (int)param.hostColorBufferId); |
| doFlush(); |
| uint8_t* dst = param.pDecodedFrame; |
| int myOutputWidth = mOutputWidth; |
| int myOutputHeight = mOutputHeight; |
| std::vector<uint8_t> decodedFrame; |
| TextureFrame decodedTexFrame; |
| { |
| std::lock_guard<std::mutex> g(mFrameLock); |
| mImageReady = !mSavedFrames.empty(); |
| if (!mImageReady) { |
| H264_DPRINT("%s: no new frame yet", __func__); |
| *retErr = static_cast<int>(Err::NoDecodedFrame); |
| return; |
| } |
| |
| std::vector<uint8_t>& myFrame = mSavedFrames.front(); |
| std::swap(decodedFrame, myFrame); |
| decodedTexFrame = mSavedTexFrames.front(); |
| mOutputPts = mSavedPts.front(); |
| |
| myOutputWidth = mSavedW.front(); |
| myOutputHeight = mSavedH.front(); |
| *retWidth = myOutputWidth; |
| *retHeight = myOutputHeight; |
| |
| mSavedFrames.pop_front(); |
| mSavedTexFrames.pop_front(); |
| mSavedPts.pop_front(); |
| mSavedW.pop_front(); |
| mSavedH.pop_front(); |
| } |
| |
| bool needToCopyToGuest = true; |
| |
| if (mUseGpuTexture) { |
| needToCopyToGuest = false; |
| } else { |
| YuvConverter<uint8_t> convert8(myOutputWidth, myOutputHeight); |
| convert8.UVInterleavedToPlanar(decodedFrame.data()); |
| } |
| |
| if (mParser.version() == 200) { |
| if (param.hostColorBufferId >= 0) { |
| needToCopyToGuest = false; |
| if (mUseGpuTexture) { |
| mRenderer.renderToHostColorBufferWithTextures( |
| param.hostColorBufferId, myOutputWidth, myOutputHeight, |
| decodedTexFrame); |
| } else { |
| mRenderer.renderToHostColorBuffer(param.hostColorBufferId, |
| myOutputWidth, myOutputHeight, |
| decodedFrame.data()); |
| } |
| } else { |
| if (mUseGpuTexture) { |
| // no colorbuffer to send the textures to, just recycle |
| // them back to Renderer |
| mRenderer.putTextureFrame(decodedTexFrame); |
| } |
| } |
| } |
| |
| if (needToCopyToGuest) { |
| memcpy(dst, decodedFrame.data(), |
| myOutputHeight * myOutputWidth * 3 / 2); |
| } |
| |
| mImageReady = false; |
| *retErr = myOutputHeight * myOutputWidth * 3 / 2; |
| *retPts = mOutputPts; |
| *retColorPrimaries = mColorPrimaries; |
| *retColorRange = mColorRange; |
| *retColorTransfer = mColorTransfer; |
| *retColorSpace = mColorSpace; |
| H264_DPRINT("Frame primary %d range %d transfer %d space %d", |
| (int)mColorPrimaries, (int)mColorRange, (int)mColorTransfer, |
| (int)mColorSpace); |
| H264_DPRINT("Copying completed pts %lld", (long long)mOutputPts); |
| } |
| |
| bool MediaH264DecoderCuvid::initCudaDrivers() { |
| if (s_isCudaInitialized) { |
| return true; |
| } |
| #if defined(WIN32) || defined(_WIN32) || defined(WIN64) || defined(_WIN64) |
| typedef HMODULE CUDADRIVER; |
| #else |
| typedef void* CUDADRIVER; |
| #endif |
| CUDADRIVER hHandleDriver = 0; |
| if (CUDA_SUCCESS != cuInit(0, __CUDA_API_VERSION, hHandleDriver)) { |
| fprintf(stderr, |
| "Failed to call cuInit, cannot use nvidia cuvid decoder for " |
| "h264 stream\n"); |
| return false; |
| } |
| if (CUDA_SUCCESS != cuvidInit(0)) { |
| fprintf(stderr, |
| "Failed to call cuvidInit, cannot use nvidia cuvid decoder for " |
| "h264 stream\n"); |
| return false; |
| } |
| |
| int numGpuCards = 0; |
| CUresult myres = cuDeviceGetCount(&numGpuCards); |
| if (myres != CUDA_SUCCESS) { |
| H264_DPRINT( |
| "Failed to get number of GPU cards installed on host; error " |
| "code %d", |
| (int)myres); |
| return false; |
| } |
| |
| if (numGpuCards <= 0) { |
| H264_DPRINT("There are no nvidia GPU cards on this host."); |
| return false; |
| } |
| |
| // lukily, we get cuda initialized. |
| s_isCudaInitialized = true; |
| |
| return true; |
| } |
| |
| int MediaH264DecoderCuvid::HandleVideoSequence(CUVIDEOFORMAT* pVideoFormat) { |
| int nDecodeSurface = 8; // need 8 for 4K video |
| |
| CUVIDDECODECAPS decodecaps; |
| memset(&decodecaps, 0, sizeof(decodecaps)); |
| |
| decodecaps.eCodecType = pVideoFormat->codec; |
| decodecaps.eChromaFormat = pVideoFormat->chroma_format; |
| decodecaps.nBitDepthMinus8 = pVideoFormat->bit_depth_luma_minus8; |
| |
| NVDEC_API_CALL(cuCtxPushCurrent(mCudaContext)); |
| NVDEC_API_CALL(cuvidGetDecoderCaps(&decodecaps)); |
| NVDEC_API_CALL(cuCtxPopCurrent(NULL)); |
| |
| if (!decodecaps.bIsSupported) { |
| H264_DPRINT("Codec not supported on this GPU."); |
| return nDecodeSurface; |
| } |
| |
| if ((pVideoFormat->coded_width > decodecaps.nMaxWidth) || |
| (pVideoFormat->coded_height > decodecaps.nMaxHeight)) { |
| H264_DPRINT("Resolution not supported on this GPU"); |
| return nDecodeSurface; |
| } |
| |
| if ((pVideoFormat->coded_width >> 4) * (pVideoFormat->coded_height >> 4) > |
| decodecaps.nMaxMBCount) { |
| H264_DPRINT("MBCount not supported on this GPU"); |
| return nDecodeSurface; |
| } |
| |
| mLumaWidth = |
| pVideoFormat->display_area.right - pVideoFormat->display_area.left; |
| mLumaHeight = |
| pVideoFormat->display_area.bottom - pVideoFormat->display_area.top; |
| mChromaHeight = mLumaHeight * 0.5; // NV12 |
| mBPP = pVideoFormat->bit_depth_luma_minus8 > 0 ? 2 : 1; |
| |
| if (pVideoFormat->video_signal_description.video_full_range_flag) |
| mColorRange = 2; |
| else |
| mColorRange = 0; |
| |
| mColorPrimaries = pVideoFormat->video_signal_description.color_primaries; |
| mColorTransfer = |
| pVideoFormat->video_signal_description.transfer_characteristics; |
| mColorSpace = pVideoFormat->video_signal_description.matrix_coefficients; |
| |
| CUVIDDECODECREATEINFO videoDecodeCreateInfo = {0}; |
| videoDecodeCreateInfo.CodecType = pVideoFormat->codec; |
| videoDecodeCreateInfo.ChromaFormat = pVideoFormat->chroma_format; |
| videoDecodeCreateInfo.OutputFormat = cudaVideoSurfaceFormat_NV12; |
| H264_DPRINT("output format is %d", videoDecodeCreateInfo.OutputFormat); |
| videoDecodeCreateInfo.bitDepthMinus8 = pVideoFormat->bit_depth_luma_minus8; |
| if (pVideoFormat->progressive_sequence) |
| videoDecodeCreateInfo.DeinterlaceMode = cudaVideoDeinterlaceMode_Weave; |
| else |
| videoDecodeCreateInfo.DeinterlaceMode = |
| cudaVideoDeinterlaceMode_Adaptive; |
| videoDecodeCreateInfo.ulNumOutputSurfaces = 1; |
| // With PreferCUVID, JPEG is still decoded by CUDA while video is decoded by |
| // NVDEC hardware |
| videoDecodeCreateInfo.ulCreationFlags = cudaVideoCreate_PreferCUVID; |
| videoDecodeCreateInfo.ulNumDecodeSurfaces = nDecodeSurface; |
| videoDecodeCreateInfo.vidLock = mCtxLock; |
| videoDecodeCreateInfo.ulWidth = pVideoFormat->coded_width; |
| videoDecodeCreateInfo.ulHeight = pVideoFormat->coded_height; |
| if (mOutputHeight != mLumaHeight || mOutputWidth != mLumaWidth) { |
| H264_DPRINT("old width %d old height %d", mOutputWidth, mOutputHeight); |
| mOutputWidth = mLumaWidth; |
| mOutputHeight = mLumaHeight; |
| H264_DPRINT("new width %d new height %d", mOutputWidth, mOutputHeight); |
| unsigned int newOutBufferSize = mOutputWidth * mOutputHeight * 3 / 2; |
| if (mOutBufferSize < newOutBufferSize) { |
| mOutBufferSize = newOutBufferSize; |
| } |
| } |
| |
| videoDecodeCreateInfo.ulTargetWidth = pVideoFormat->coded_width; |
| videoDecodeCreateInfo.ulTargetHeight = pVideoFormat->coded_height; |
| |
| mSurfaceWidth = videoDecodeCreateInfo.ulTargetWidth; |
| mSurfaceHeight = videoDecodeCreateInfo.ulTargetHeight; |
| |
| NVDEC_API_CALL(cuCtxPushCurrent(mCudaContext)); |
| if (mCudaDecoder != nullptr) { |
| NVDEC_API_CALL(cuvidDestroyDecoder(mCudaDecoder)); |
| mCudaDecoder = nullptr; |
| } |
| { |
| size_t free, total; |
| cuMemGetInfo(&free, &total); |
| H264_DPRINT("free memory %g M, total %g M", free / 1048576.0, |
| total / 1048576.0); |
| } |
| NVDEC_API_CALL(cuCtxPopCurrent(NULL)); |
| NVDEC_API_CALL(cuCtxPushCurrent(mCudaContext)); |
| NVDEC_API_CALL(cuvidCreateDecoder(&mCudaDecoder, &videoDecodeCreateInfo)); |
| NVDEC_API_CALL(cuCtxPopCurrent(NULL)); |
| H264_DPRINT("successfully called. decoder %p", mCudaDecoder); |
| return nDecodeSurface; |
| } |
| |
| int MediaH264DecoderCuvid::HandlePictureDecode(CUVIDPICPARAMS* pPicParams) { |
| NVDEC_API_CALL(cuvidDecodePicture(mCudaDecoder, pPicParams)); |
| H264_DPRINT("successfully called."); |
| return 1; |
| } |
| |
| extern "C" { |
| |
| #define MEDIA_H264_COPY_Y_TEXTURE 1 |
| #define MEDIA_H264_COPY_UV_TEXTURE 2 |
| |
| struct h264_cuvid_copy_context { |
| CUdeviceptr src_frame; |
| unsigned int src_pitch; |
| |
| // this usually >= dest_height due to padding, e.g. |
| // src_surface_height: 1088, dest_height: 1080 |
| // so, when copying UV data, the src has to start at |
| // offset = src_pitch * src_surface_height |
| unsigned int src_surface_height; |
| |
| unsigned int dest_width; |
| unsigned int dest_height; |
| }; |
| |
| void cuda_copy_decoded_frame(void* privData, |
| int mode, |
| uint32_t dest_texture_handle) { |
| h264_cuvid_copy_context* copy_context = |
| static_cast<h264_cuvid_copy_context*>(privData); |
| |
| const unsigned int GL_TEXTURE_2D = 0x0DE1; |
| const unsigned int cudaGraphicsMapFlagsNone = 0x0; |
| CUgraphicsResource CudaRes{0}; |
| H264_DPRINT("cuda copy decoded frame testure %d", (int)dest_texture_handle); |
| NVDEC_API_CALL(cuGraphicsGLRegisterImage(&CudaRes, dest_texture_handle, |
| GL_TEXTURE_2D, 0x0)); |
| CUarray texture_ptr; |
| NVDEC_API_CALL(cuGraphicsMapResources(1, &CudaRes, 0)); |
| NVDEC_API_CALL( |
| cuGraphicsSubResourceGetMappedArray(&texture_ptr, CudaRes, 0, 0)); |
| CUdeviceptr dpSrcFrame = copy_context->src_frame; |
| CUDA_MEMCPY2D m = {0}; |
| m.srcMemoryType = CU_MEMORYTYPE_DEVICE; |
| m.srcDevice = dpSrcFrame; |
| m.srcPitch = copy_context->src_pitch; |
| m.dstMemoryType = CU_MEMORYTYPE_ARRAY; |
| m.dstArray = texture_ptr; |
| m.dstPitch = copy_context->dest_width * 1; |
| m.WidthInBytes = copy_context->dest_width * 1; |
| m.Height = copy_context->dest_height; |
| H264_DPRINT("dstPitch %d, WidthInBytes %d Height %d surface-height %d", |
| (int)m.dstPitch, (int)m.WidthInBytes, (int)m.Height, |
| (int)copy_context->src_surface_height); |
| |
| if (mode == MEDIA_H264_COPY_Y_TEXTURE) { // copy Y data |
| NVDEC_API_CALL(cuMemcpy2D(&m)); |
| } else if (mode == MEDIA_H264_COPY_UV_TEXTURE) { // copy UV data |
| m.srcDevice = |
| (CUdeviceptr)((uint8_t*)dpSrcFrame + |
| m.srcPitch * copy_context->src_surface_height); |
| m.Height = m.Height / 2; |
| NVDEC_API_CALL(cuMemcpy2D(&m)); |
| } |
| NVDEC_API_CALL(cuGraphicsUnmapResources(1, &CudaRes, 0)); |
| NVDEC_API_CALL(cuGraphicsUnregisterResource(CudaRes)); |
| } |
| |
| void cuda_nv12_updater(void* privData, uint32_t type, uint32_t* textures) { |
| constexpr uint32_t kFRAMEWORK_FORMAT_NV12 = 3; |
| if (type != kFRAMEWORK_FORMAT_NV12) { |
| return; |
| } |
| H264_DPRINT("copyiong Ytex %d", textures[0]); |
| H264_DPRINT("copyiong UVtex %d", textures[1]); |
| cuda_copy_decoded_frame(privData, MEDIA_H264_COPY_Y_TEXTURE, textures[0]); |
| cuda_copy_decoded_frame(privData, MEDIA_H264_COPY_UV_TEXTURE, textures[1]); |
| } |
| |
| } // end extern C |
| |
| int MediaH264DecoderCuvid::HandlePictureDisplay( |
| CUVIDPARSERDISPINFO* pDispInfo) { |
| if (mIsLoadingFromSnapshot) { |
| return 1; |
| } |
| |
| CUVIDPROCPARAMS videoProcessingParameters = {}; |
| videoProcessingParameters.progressive_frame = pDispInfo->progressive_frame; |
| videoProcessingParameters.second_field = pDispInfo->repeat_first_field + 1; |
| videoProcessingParameters.top_field_first = pDispInfo->top_field_first; |
| videoProcessingParameters.unpaired_field = |
| pDispInfo->repeat_first_field < 0; |
| videoProcessingParameters.output_stream = 0; |
| uint64_t myOutputPts = pDispInfo->timestamp; |
| |
| CUdeviceptr dpSrcFrame = 0; |
| unsigned int nSrcPitch = 0; |
| NVDEC_API_CALL(cuvidMapVideoFrame(mCudaDecoder, pDispInfo->picture_index, |
| &dpSrcFrame, &nSrcPitch, |
| &videoProcessingParameters)); |
| |
| NVDEC_API_CALL(cuCtxPushCurrent(mCudaContext)); |
| unsigned int newOutBufferSize = mOutputWidth * mOutputHeight * 3 / 2; |
| std::vector<uint8_t> myFrame; |
| TextureFrame texFrame; |
| if (mUseGpuTexture) { |
| h264_cuvid_copy_context my_copy_context{ |
| .src_frame = dpSrcFrame, |
| .src_pitch = nSrcPitch, |
| .src_surface_height = mSurfaceHeight, |
| .dest_width = mOutputWidth, |
| .dest_height = mOutputHeight, |
| }; |
| texFrame = mRenderer.getTextureFrame(mOutputWidth, mOutputHeight); |
| mRenderer.saveDecodedFrameToTexture(texFrame, &my_copy_context, |
| (void*)cuda_nv12_updater); |
| } else { |
| myFrame.resize(newOutBufferSize); |
| uint8_t* pDecodedFrame = &(myFrame[0]); |
| |
| CUDA_MEMCPY2D m = {0}; |
| m.srcMemoryType = CU_MEMORYTYPE_DEVICE; |
| m.srcDevice = dpSrcFrame; |
| m.srcPitch = nSrcPitch; |
| m.dstMemoryType = CU_MEMORYTYPE_HOST; |
| m.dstDevice = (CUdeviceptr)(m.dstHost = pDecodedFrame); |
| m.dstPitch = mOutputWidth * mBPP; |
| m.WidthInBytes = mOutputWidth * mBPP; |
| m.Height = mLumaHeight; |
| H264_DPRINT("dstDevice %p, dstPitch %d, WidthInBytes %d Height %d", |
| m.dstHost, (int)m.dstPitch, (int)m.WidthInBytes, |
| (int)m.Height); |
| |
| NVDEC_API_CALL(cuMemcpy2DAsync(&m, 0)); |
| |
| m.srcDevice = (CUdeviceptr)((uint8_t*)dpSrcFrame + |
| m.srcPitch * mSurfaceHeight); |
| m.dstDevice = (CUdeviceptr)(m.dstHost = pDecodedFrame + |
| m.dstPitch * mLumaHeight); |
| m.Height = mChromaHeight; |
| NVDEC_API_CALL(cuMemcpy2DAsync(&m, 0)); |
| } |
| |
| NVDEC_API_CALL(cuStreamSynchronize(0)); |
| NVDEC_API_CALL(cuCtxPopCurrent(NULL)); |
| |
| NVDEC_API_CALL(cuvidUnmapVideoFrame(mCudaDecoder, dpSrcFrame)); |
| if (!mIsLoadingFromSnapshot) { |
| std::lock_guard<std::mutex> g(mFrameLock); |
| mSavedFrames.push_back(myFrame); |
| mSavedTexFrames.push_back(texFrame); |
| mSavedPts.push_back(myOutputPts); |
| mSavedW.push_back(mOutputWidth); |
| mSavedH.push_back(mOutputHeight); |
| } |
| mImageReady = true; |
| H264_DPRINT("successfully called."); |
| return 1; |
| } |
| |
| void MediaH264DecoderCuvid::oneShotDecode(std::vector<uint8_t>& data, |
| uint64_t pts) { |
| H264_DPRINT("decoding pts %lld", (long long)pts); |
| decodeFrameInternal(nullptr, nullptr, data.data(), data.size(), pts); |
| } |
| |
| void MediaH264DecoderCuvid::save(base::Stream* stream) const { |
| stream->putBe32(mParser.version()); |
| const int useGpuTexture = mUseGpuTexture ? 1 : 0; |
| stream->putBe32(useGpuTexture); |
| |
| stream->putBe32(mWidth); |
| stream->putBe32(mHeight); |
| stream->putBe32(mOutputWidth); |
| stream->putBe32(mOutputHeight); |
| stream->putBe32((int)mOutPixFmt); |
| |
| const int hasContext = mCudaContext == nullptr ? 0 : 1; |
| stream->putBe32(hasContext); |
| |
| mSnapshotState.savedFrames.clear(); |
| mSnapshotState.savedDecodedFrame.data.clear(); |
| for (size_t i = 0; i < mSavedFrames.size(); ++i) { |
| const std::vector<uint8_t>& myFrame = mSavedFrames.front(); |
| int myOutputWidth = mSavedW.front(); |
| int myOutputHeight = mSavedH.front(); |
| int myOutputPts = mSavedPts.front(); |
| mSnapshotState.saveDecodedFrame( |
| myFrame, myOutputWidth, myOutputHeight, |
| ColorAspects{mColorPrimaries, mColorRange, mColorTransfer, |
| mColorSpace}, |
| myOutputPts); |
| mSavedFrames.pop_front(); |
| mSavedTexFrames.pop_front(); |
| mSavedW.pop_front(); |
| mSavedH.pop_front(); |
| mSavedPts.pop_front(); |
| } |
| H264_DPRINT("saving packets now %d", |
| (int)(mSnapshotState.savedPackets.size())); |
| mSnapshotState.save(stream); |
| } |
| |
| bool MediaH264DecoderCuvid::load(base::Stream* stream) { |
| mIsLoadingFromSnapshot = true; |
| uint32_t version = stream->getBe32(); |
| mParser = H264PingInfoParser{version}; |
| const int useGpuTexture = stream->getBe32(); |
| mUseGpuTexture = useGpuTexture ? true : false; |
| |
| mWidth = stream->getBe32(); |
| mHeight = stream->getBe32(); |
| mOutputWidth = stream->getBe32(); |
| mOutputHeight = stream->getBe32(); |
| mOutPixFmt = (PixelFormat)stream->getBe32(); |
| |
| const int hasContext = stream->getBe32(); |
| if (hasContext) { |
| initH264ContextInternal(mWidth, mHeight, mWidth, mHeight, mOutPixFmt); |
| } |
| |
| mSnapshotState.load(stream); |
| |
| H264_DPRINT("loaded packets %d, now restore decoder", |
| (int)(mSnapshotState.savedPackets.size())); |
| if (hasContext && mSnapshotState.sps.size() > 0) { |
| oneShotDecode(mSnapshotState.sps, 0); |
| if (mSnapshotState.pps.size() > 0) { |
| oneShotDecode(mSnapshotState.pps, 0); |
| if (mSnapshotState.savedPackets.size() > 0) { |
| for (int i = 0; i < mSnapshotState.savedPackets.size(); ++i) { |
| PacketInfo& pkt = mSnapshotState.savedPackets[i]; |
| oneShotDecode(pkt.data, pkt.pts); |
| } |
| } |
| } |
| } |
| |
| mImageReady = false; |
| for (size_t i = 0; i < mSnapshotState.savedFrames.size(); ++i) { |
| auto& frame = mSnapshotState.savedFrames[i]; |
| mOutBufferSize = frame.data.size(); |
| mOutputWidth = frame.width; |
| mOutputHeight = frame.height; |
| mColorPrimaries = frame.color.primaries; |
| mColorRange = frame.color.range; |
| mColorTransfer = frame.color.transfer; |
| mColorSpace = frame.color.space; |
| mOutputPts = frame.pts; |
| mSavedFrames.push_back(frame.data); |
| TextureFrame texFrame = |
| mRenderer.getTextureFrame(mOutputWidth, mOutputHeight); |
| mSavedTexFrames.push_back(texFrame); |
| mSavedW.push_back(mOutputWidth); |
| mSavedH.push_back(mOutputHeight); |
| mSavedPts.push_back(mOutputPts); |
| mImageReady = true; |
| } |
| mIsLoadingFromSnapshot = false; |
| return true; |
| } |
| |
| bool MediaH264DecoderCuvid::s_isCudaInitialized = false; |
| // static |
| |
| } // namespace emulation |
| } // namespace android |