FFmpeg  4.0
hwcontext_cuda.c
Go to the documentation of this file.
1 /*
2  * This file is part of FFmpeg.
3  *
4  * FFmpeg is free software; you can redistribute it and/or
5  * modify it under the terms of the GNU Lesser General Public
6  * License as published by the Free Software Foundation; either
7  * version 2.1 of the License, or (at your option) any later version.
8  *
9  * FFmpeg is distributed in the hope that it will be useful,
10  * but WITHOUT ANY WARRANTY; without even the implied warranty of
11  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
12  * Lesser General Public License for more details.
13  *
14  * You should have received a copy of the GNU Lesser General Public
15  * License along with FFmpeg; if not, write to the Free Software
16  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
17  */
18 
19 #include "buffer.h"
20 #include "common.h"
21 #include "hwcontext.h"
22 #include "hwcontext_internal.h"
24 #include "mem.h"
25 #include "pixdesc.h"
26 #include "pixfmt.h"
27 
28 #define CUDA_FRAME_ALIGNMENT 256
29 
30 typedef struct CUDAFramesContext {
33 
34 static const enum AVPixelFormat supported_formats[] = {
41 };
42 
44  const void *hwconfig,
45  AVHWFramesConstraints *constraints)
46 {
47  int i;
48 
50  sizeof(*constraints->valid_sw_formats));
51  if (!constraints->valid_sw_formats)
52  return AVERROR(ENOMEM);
53 
54  for (i = 0; i < FF_ARRAY_ELEMS(supported_formats); i++)
55  constraints->valid_sw_formats[i] = supported_formats[i];
56  constraints->valid_sw_formats[FF_ARRAY_ELEMS(supported_formats)] = AV_PIX_FMT_NONE;
57 
58  constraints->valid_hw_formats = av_malloc_array(2, sizeof(*constraints->valid_hw_formats));
59  if (!constraints->valid_hw_formats)
60  return AVERROR(ENOMEM);
61 
62  constraints->valid_hw_formats[0] = AV_PIX_FMT_CUDA;
63  constraints->valid_hw_formats[1] = AV_PIX_FMT_NONE;
64 
65  return 0;
66 }
67 
68 static void cuda_buffer_free(void *opaque, uint8_t *data)
69 {
70  AVHWFramesContext *ctx = opaque;
71  AVCUDADeviceContext *hwctx = ctx->device_ctx->hwctx;
72  CudaFunctions *cu = hwctx->internal->cuda_dl;
73 
74  CUcontext dummy;
75 
76  cu->cuCtxPushCurrent(hwctx->cuda_ctx);
77 
78  cu->cuMemFree((CUdeviceptr)data);
79 
80  cu->cuCtxPopCurrent(&dummy);
81 }
82 
83 static AVBufferRef *cuda_pool_alloc(void *opaque, int size)
84 {
85  AVHWFramesContext *ctx = opaque;
86  AVCUDADeviceContext *hwctx = ctx->device_ctx->hwctx;
87  CudaFunctions *cu = hwctx->internal->cuda_dl;
88 
89  AVBufferRef *ret = NULL;
90  CUcontext dummy = NULL;
91  CUdeviceptr data;
92  CUresult err;
93 
94  err = cu->cuCtxPushCurrent(hwctx->cuda_ctx);
95  if (err != CUDA_SUCCESS) {
96  av_log(ctx, AV_LOG_ERROR, "Error setting current CUDA context\n");
97  return NULL;
98  }
99 
100  err = cu->cuMemAlloc(&data, size);
101  if (err != CUDA_SUCCESS)
102  goto fail;
103 
104  ret = av_buffer_create((uint8_t*)data, size, cuda_buffer_free, ctx, 0);
105  if (!ret) {
106  cu->cuMemFree(data);
107  goto fail;
108  }
109 
110 fail:
111  cu->cuCtxPopCurrent(&dummy);
112  return ret;
113 }
114 
116 {
117  CUDAFramesContext *priv = ctx->internal->priv;
118  int aligned_width = FFALIGN(ctx->width, CUDA_FRAME_ALIGNMENT);
119  int i;
120 
121  for (i = 0; i < FF_ARRAY_ELEMS(supported_formats); i++) {
122  if (ctx->sw_format == supported_formats[i])
123  break;
124  }
125  if (i == FF_ARRAY_ELEMS(supported_formats)) {
126  av_log(ctx, AV_LOG_ERROR, "Pixel format '%s' is not supported\n",
128  return AVERROR(ENOSYS);
129  }
130 
132 
133  if (!ctx->pool) {
134  int size;
135 
136  switch (ctx->sw_format) {
137  case AV_PIX_FMT_NV12:
138  case AV_PIX_FMT_YUV420P:
139  size = aligned_width * ctx->height * 3 / 2;
140  break;
141  case AV_PIX_FMT_YUV444P:
142  case AV_PIX_FMT_P010:
143  case AV_PIX_FMT_P016:
144  size = aligned_width * ctx->height * 3;
145  break;
147  size = aligned_width * ctx->height * 6;
148  break;
149  default:
150  av_log(ctx, AV_LOG_ERROR, "BUG: Pixel format missing from size calculation.");
151  return AVERROR_BUG;
152  }
153 
155  if (!ctx->internal->pool_internal)
156  return AVERROR(ENOMEM);
157  }
158 
159  return 0;
160 }
161 
163 {
164  int aligned_width;
165  int width_in_bytes = ctx->width;
166 
167  if (ctx->sw_format == AV_PIX_FMT_P010 ||
168  ctx->sw_format == AV_PIX_FMT_P016 ||
170  width_in_bytes *= 2;
171  }
172  aligned_width = FFALIGN(width_in_bytes, CUDA_FRAME_ALIGNMENT);
173 
174  frame->buf[0] = av_buffer_pool_get(ctx->pool);
175  if (!frame->buf[0])
176  return AVERROR(ENOMEM);
177 
178  switch (ctx->sw_format) {
179  case AV_PIX_FMT_NV12:
180  case AV_PIX_FMT_P010:
181  case AV_PIX_FMT_P016:
182  frame->data[0] = frame->buf[0]->data;
183  frame->data[1] = frame->data[0] + aligned_width * ctx->height;
184  frame->linesize[0] = aligned_width;
185  frame->linesize[1] = aligned_width;
186  break;
187  case AV_PIX_FMT_YUV420P:
188  frame->data[0] = frame->buf[0]->data;
189  frame->data[2] = frame->data[0] + aligned_width * ctx->height;
190  frame->data[1] = frame->data[2] + aligned_width * ctx->height / 4;
191  frame->linesize[0] = aligned_width;
192  frame->linesize[1] = aligned_width / 2;
193  frame->linesize[2] = aligned_width / 2;
194  break;
195  case AV_PIX_FMT_YUV444P:
197  frame->data[0] = frame->buf[0]->data;
198  frame->data[1] = frame->data[0] + aligned_width * ctx->height;
199  frame->data[2] = frame->data[1] + aligned_width * ctx->height;
200  frame->linesize[0] = aligned_width;
201  frame->linesize[1] = aligned_width;
202  frame->linesize[2] = aligned_width;
203  break;
204  default:
205  av_frame_unref(frame);
206  return AVERROR_BUG;
207  }
208 
209  frame->format = AV_PIX_FMT_CUDA;
210  frame->width = ctx->width;
211  frame->height = ctx->height;
212 
213  return 0;
214 }
215 
218  enum AVPixelFormat **formats)
219 {
220  enum AVPixelFormat *fmts;
221 
222  fmts = av_malloc_array(2, sizeof(*fmts));
223  if (!fmts)
224  return AVERROR(ENOMEM);
225 
226  fmts[0] = ctx->sw_format;
227  fmts[1] = AV_PIX_FMT_NONE;
228 
229  *formats = fmts;
230 
231  return 0;
232 }
233 
235  const AVFrame *src)
236 {
237  CUDAFramesContext *priv = ctx->internal->priv;
238  AVCUDADeviceContext *device_hwctx = ctx->device_ctx->hwctx;
239  CudaFunctions *cu = device_hwctx->internal->cuda_dl;
240 
241  CUcontext dummy;
242  CUresult err;
243  int i;
244 
245  err = cu->cuCtxPushCurrent(device_hwctx->cuda_ctx);
246  if (err != CUDA_SUCCESS)
247  return AVERROR_UNKNOWN;
248 
249  for (i = 0; i < FF_ARRAY_ELEMS(src->data) && src->data[i]; i++) {
250  CUDA_MEMCPY2D cpy = {
251  .srcMemoryType = CU_MEMORYTYPE_DEVICE,
252  .dstMemoryType = CU_MEMORYTYPE_HOST,
253  .srcDevice = (CUdeviceptr)src->data[i],
254  .dstHost = dst->data[i],
255  .srcPitch = src->linesize[i],
256  .dstPitch = dst->linesize[i],
257  .WidthInBytes = FFMIN(src->linesize[i], dst->linesize[i]),
258  .Height = src->height >> (i ? priv->shift_height : 0),
259  };
260 
261  err = cu->cuMemcpy2D(&cpy);
262  if (err != CUDA_SUCCESS) {
263  av_log(ctx, AV_LOG_ERROR, "Error transferring the data from the CUDA frame\n");
264  return AVERROR_UNKNOWN;
265  }
266  }
267 
268  cu->cuCtxPopCurrent(&dummy);
269 
270  return 0;
271 }
272 
274  const AVFrame *src)
275 {
276  CUDAFramesContext *priv = ctx->internal->priv;
277  AVCUDADeviceContext *device_hwctx = ctx->device_ctx->hwctx;
278  CudaFunctions *cu = device_hwctx->internal->cuda_dl;
279 
280  CUcontext dummy;
281  CUresult err;
282  int i;
283 
284  err = cu->cuCtxPushCurrent(device_hwctx->cuda_ctx);
285  if (err != CUDA_SUCCESS)
286  return AVERROR_UNKNOWN;
287 
288  for (i = 0; i < FF_ARRAY_ELEMS(src->data) && src->data[i]; i++) {
289  CUDA_MEMCPY2D cpy = {
290  .srcMemoryType = CU_MEMORYTYPE_HOST,
291  .dstMemoryType = CU_MEMORYTYPE_DEVICE,
292  .srcHost = src->data[i],
293  .dstDevice = (CUdeviceptr)dst->data[i],
294  .srcPitch = src->linesize[i],
295  .dstPitch = dst->linesize[i],
296  .WidthInBytes = FFMIN(src->linesize[i], dst->linesize[i]),
297  .Height = src->height >> (i ? priv->shift_height : 0),
298  };
299 
300  err = cu->cuMemcpy2D(&cpy);
301  if (err != CUDA_SUCCESS) {
302  av_log(ctx, AV_LOG_ERROR, "Error transferring the data from the CUDA frame\n");
303  return AVERROR_UNKNOWN;
304  }
305  }
306 
307  cu->cuCtxPopCurrent(&dummy);
308 
309  return 0;
310 }
311 
313 {
314  AVCUDADeviceContext *hwctx = ctx->hwctx;
315 
316  if (hwctx->internal) {
317  if (hwctx->internal->is_allocated && hwctx->cuda_ctx) {
318  hwctx->internal->cuda_dl->cuCtxDestroy(hwctx->cuda_ctx);
319  hwctx->cuda_ctx = NULL;
320  }
321  cuda_free_functions(&hwctx->internal->cuda_dl);
322  }
323 
324  av_freep(&hwctx->internal);
325 }
326 
328 {
329  AVCUDADeviceContext *hwctx = ctx->hwctx;
330  int ret;
331 
332  if (!hwctx->internal) {
333  hwctx->internal = av_mallocz(sizeof(*hwctx->internal));
334  if (!hwctx->internal)
335  return AVERROR(ENOMEM);
336  }
337 
338  if (!hwctx->internal->cuda_dl) {
339  ret = cuda_load_functions(&hwctx->internal->cuda_dl, ctx);
340  if (ret < 0) {
341  av_log(ctx, AV_LOG_ERROR, "Could not dynamically load CUDA\n");
342  goto error;
343  }
344  }
345 
346  return 0;
347 
348 error:
349  cuda_device_uninit(ctx);
350  return ret;
351 }
352 
353 static int cuda_device_create(AVHWDeviceContext *ctx, const char *device,
354  AVDictionary *opts, int flags)
355 {
356  AVCUDADeviceContext *hwctx = ctx->hwctx;
357  CudaFunctions *cu;
358  CUdevice cu_device;
359  CUcontext dummy;
360  CUresult err;
361  int device_idx = 0;
362 
363  if (device)
364  device_idx = strtol(device, NULL, 0);
365 
366  if (cuda_device_init(ctx) < 0)
367  goto error;
368 
369  cu = hwctx->internal->cuda_dl;
370 
371  err = cu->cuInit(0);
372  if (err != CUDA_SUCCESS) {
373  av_log(ctx, AV_LOG_ERROR, "Could not initialize the CUDA driver API\n");
374  goto error;
375  }
376 
377  err = cu->cuDeviceGet(&cu_device, device_idx);
378  if (err != CUDA_SUCCESS) {
379  av_log(ctx, AV_LOG_ERROR, "Could not get the device number %d\n", device_idx);
380  goto error;
381  }
382 
383  err = cu->cuCtxCreate(&hwctx->cuda_ctx, CU_CTX_SCHED_BLOCKING_SYNC, cu_device);
384  if (err != CUDA_SUCCESS) {
385  av_log(ctx, AV_LOG_ERROR, "Error creating a CUDA context\n");
386  goto error;
387  }
388 
389  cu->cuCtxPopCurrent(&dummy);
390 
391  hwctx->internal->is_allocated = 1;
392 
393  return 0;
394 
395 error:
396  cuda_device_uninit(ctx);
397  return AVERROR_UNKNOWN;
398 }
399 
402  .name = "CUDA",
403 
404  .device_hwctx_size = sizeof(AVCUDADeviceContext),
405  .frames_priv_size = sizeof(CUDAFramesContext),
406 
407  .device_create = cuda_device_create,
408  .device_init = cuda_device_init,
409  .device_uninit = cuda_device_uninit,
410  .frames_get_constraints = cuda_frames_get_constraints,
411  .frames_init = cuda_frames_init,
412  .frames_get_buffer = cuda_get_buffer,
413  .transfer_get_formats = cuda_transfer_get_formats,
414  .transfer_data_to = cuda_transfer_data_to,
415  .transfer_data_from = cuda_transfer_data_from,
416 
417  .pix_fmts = (const enum AVPixelFormat[]){ AV_PIX_FMT_CUDA, AV_PIX_FMT_NONE },
418 };
This struct aggregates all the (hardware/vendor-specific) "high-level" state, i.e.
Definition: hwcontext.h:60
#define NULL
Definition: coverity.c:32
int size
This structure describes decoded (raw) audio or video data.
Definition: frame.h:218
planar YUV 4:4:4, 24bpp, (1 Cr & Cb sample per 1x1 Y samples)
Definition: pixfmt.h:67
Memory handling functions.
AVBufferRef * buf[AV_NUM_DATA_POINTERS]
AVBuffer references backing the data for this frame.
Definition: frame.h:410
AVCUDADeviceContextInternal * internal
int width
The allocated dimensions of the frames in this pool.
Definition: hwcontext.h:228
static int cuda_frames_get_constraints(AVHWDeviceContext *ctx, const void *hwconfig, AVHWFramesConstraints *constraints)
static int cuda_frames_init(AVHWFramesContext *ctx)
#define src
Definition: vp8dsp.c:254
#define AV_PIX_FMT_P016
Definition: pixfmt.h:414
#define AV_PIX_FMT_P010
Definition: pixfmt.h:413
AVBufferPool * pool_internal
enum AVHWDeviceType type
uint8_t
static enum AVPixelFormat supported_formats[]
static AVFrame * frame
void * hwctx
The format-specific data, allocated and freed by libavutil along with this context.
Definition: hwcontext.h:91
const char data[16]
Definition: mxf.c:90
static int flags
Definition: log.c:55
static int cuda_transfer_data_from(AVHWFramesContext *ctx, AVFrame *dst, const AVFrame *src)
#define AV_PIX_FMT_YUV444P16
Definition: pixfmt.h:381
#define FFALIGN(x, a)
Definition: macros.h:48
#define av_log(a,...)
static void cuda_buffer_free(void *opaque, uint8_t *data)
int width
Definition: frame.h:276
#define AV_LOG_ERROR
Something went wrong and cannot losslessly be recovered.
Definition: log.h:176
#define AVERROR(e)
Definition: error.h:43
int av_pix_fmt_get_chroma_sub_sample(enum AVPixelFormat pix_fmt, int *h_shift, int *v_shift)
Utility function to access log2_chroma_w log2_chroma_h from the pixel format AVPixFmtDescriptor.
Definition: pixdesc.c:2391
planar YUV 4:2:0, 12bpp, 1 plane for Y and 1 plane for the UV components, which are interleaved (firs...
Definition: pixfmt.h:85
void * av_mallocz(size_t size)
Allocate a memory block with alignment suitable for all memory accesses (including vectors if availab...
Definition: mem.c:236
AVBufferRef * av_buffer_create(uint8_t *data, int size, void(*free)(void *opaque, uint8_t *data), void *opaque, int flags)
Create an AVBuffer from an existing array.
Definition: buffer.c:28
static int cuda_device_init(AVHWDeviceContext *ctx)
#define fail()
Definition: checkasm.h:116
AVDictionary * opts
Definition: movenc.c:50
static int cuda_get_buffer(AVHWFramesContext *ctx, AVFrame *frame)
#define FFMIN(a, b)
Definition: common.h:96
AVHWDeviceContext * device_ctx
The parent AVHWDeviceContext.
Definition: hwcontext.h:148
static int cuda_transfer_get_formats(AVHWFramesContext *ctx, enum AVHWFrameTransferDirection dir, enum AVPixelFormat **formats)
static AVBufferRef * cuda_pool_alloc(void *opaque, int size)
AVFormatContext * ctx
Definition: movenc.c:48
FFmpeg internal API for CUDA.
int dummy
Definition: motion.c:64
HW acceleration through CUDA.
Definition: pixfmt.h:231
AVBufferPool * av_buffer_pool_init2(int size, void *opaque, AVBufferRef *(*alloc)(void *opaque, int size), void(*pool_free)(void *opaque))
Allocate and initialize a buffer pool with a more complex allocator.
Definition: buffer.c:218
static void error(const char *err)
#define FF_ARRAY_ELEMS(a)
#define CUDA_FRAME_ALIGNMENT
int format
format of the frame, -1 if unknown or unset Values correspond to enum AVPixelFormat for video frames...
Definition: frame.h:291
This struct describes the constraints on hardware frames attached to a given device with a hardware-s...
Definition: hwcontext.h:432
static int cuda_transfer_data_to(AVHWFramesContext *ctx, AVFrame *dst, const AVFrame *src)
int linesize[AV_NUM_DATA_POINTERS]
For video, size in bytes of each picture line.
Definition: frame.h:249
const HWContextType ff_hwcontext_type_cuda
uint8_t * data
The data buffer.
Definition: buffer.h:89
#define AVERROR_BUG
Internal bug, also see AVERROR_BUG2.
Definition: error.h:50
This struct is allocated as AVHWDeviceContext.hwctx.
This struct describes a set or pool of "hardware" frames (i.e.
Definition: hwcontext.h:123
refcounted data buffer API
enum AVPixelFormat * valid_hw_formats
A list of possible values for format in the hw_frames_ctx, terminated by AV_PIX_FMT_NONE.
Definition: hwcontext.h:437
AVHWFramesInternal * internal
Private data used internally by libavutil.
Definition: hwcontext.h:133
void av_frame_unref(AVFrame *frame)
Unreference all the buffers referenced by frame and reset the frame fields.
Definition: frame.c:551
uint8_t * data[AV_NUM_DATA_POINTERS]
pointer to the picture/channel planes.
Definition: frame.h:232
A reference to a data buffer.
Definition: buffer.h:81
planar YUV 4:2:0, 12bpp, (1 Cr & Cb sample per 2x2 Y samples)
Definition: pixfmt.h:62
common internal and external API header
#define AVERROR_UNKNOWN
Unknown error, typically from an external library.
Definition: error.h:71
AVHWFrameTransferDirection
Definition: hwcontext.h:394
pixel format definitions
AVBufferPool * pool
A pool from which the frames are allocated by av_hwframe_get_buffer().
Definition: hwcontext.h:189
static int cuda_device_create(AVHWDeviceContext *ctx, const char *device, AVDictionary *opts, int flags)
enum AVPixelFormat * valid_sw_formats
A list of possible values for sw_format in the hw_frames_ctx, terminated by AV_PIX_FMT_NONE.
Definition: hwcontext.h:444
int height
Definition: frame.h:276
#define av_freep(p)
AVBufferRef * av_buffer_pool_get(AVBufferPool *pool)
Allocate a new AVBuffer, reusing an old buffer from the pool when available.
Definition: buffer.c:334
#define av_malloc_array(a, b)
formats
Definition: signature.h:48
static void cuda_device_uninit(AVHWDeviceContext *ctx)
const char * av_get_pix_fmt_name(enum AVPixelFormat pix_fmt)
Return the short name for a pixel format, NULL in case pix_fmt is unknown.
Definition: pixdesc.c:2279
enum AVPixelFormat sw_format
The pixel format identifying the actual data layout of the hardware frames.
Definition: hwcontext.h:221
AVPixelFormat
Pixel format.
Definition: pixfmt.h:60