FFmpeg  4.0
vf_convolution_opencl.c
Go to the documentation of this file.
1 /*
2  * Copyright (c) 2018 Danil Iashchenko
3  *
4  * This file is part of FFmpeg.
5  *
6  * FFmpeg is free software; you can redistribute it and/or
7  * modify it under the terms of the GNU Lesser General Public
8  * License as published by the Free Software Foundation; either
9  * version 2.1 of the License, or (at your option) any later version.
10  *
11  * FFmpeg is distributed in the hope that it will be useful,
12  * but WITHOUT ANY WARRANTY; without even the implied warranty of
13  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14  * Lesser General Public License for more details.
15  *
16  * You should have received a copy of the GNU Lesser General Public
17  * License along with FFmpeg; if not, write to the Free Software
18  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
19  */
20 
21 #include "libavutil/common.h"
22 #include "libavutil/imgutils.h"
23 #include "libavutil/mem.h"
24 #include "libavutil/opt.h"
25 #include "libavutil/pixdesc.h"
26 #include "libavutil/avstring.h"
27 
28 
29 #include "avfilter.h"
30 #include "internal.h"
31 #include "opencl.h"
32 #include "opencl_source.h"
33 #include "video.h"
34 
35 typedef struct ConvolutionOpenCLContext {
37 
39  cl_kernel kernel;
40  cl_command_queue command_queue;
41 
42  char *matrix_str[4];
43 
44  cl_mem matrix[4];
45  cl_int matrix_sizes[4];
46  cl_int dims[4];
47  cl_float rdivs[4];
48  cl_float biases[4];
49 
51 
52 
54 {
56  cl_int cle;
57  int err;
58 
60  if (err < 0)
61  goto fail;
62 
63  ctx->command_queue = clCreateCommandQueue(ctx->ocf.hwctx->context,
64  ctx->ocf.hwctx->device_id,
65  0, &cle);
66  if (!ctx->command_queue) {
67  av_log(avctx, AV_LOG_ERROR, "Failed to create OpenCL "
68  "command queue: %d.\n", cle);
69  err = AVERROR(EIO);
70  goto fail;
71  }
72 
73  ctx->kernel = clCreateKernel(ctx->ocf.program, "convolution_global", &cle);
74  if (!ctx->kernel) {
75  av_log(avctx, AV_LOG_ERROR, "Failed to create kernel: %d.\n", cle);
76  err = AVERROR(EIO);
77  goto fail;
78  }
79 
80  ctx->initialised = 1;
81  return 0;
82 
83 fail:
84  if (ctx->command_queue)
85  clReleaseCommandQueue(ctx->command_queue);
86  if (ctx->kernel)
87  clReleaseKernel(ctx->kernel);
88  return err;
89 }
90 
91 
92 
94 {
96  float *matrix = NULL;
97  size_t matrix_bytes;
98  cl_mem buffer;
99  cl_int cle;
100  int i, j;
101  int sscanf_err;
102  char *p, *arg, *saveptr = NULL;
103  float input_matrix[4][49];
104 
105  for (i = 0; i < 4; i++) {
106  ctx->biases[i] = ctx->biases[i] / 255.0;
107  }
108 
109  for (i = 0; i < 4; i++) {
110  p = ctx->matrix_str[i];
111  while (ctx->matrix_sizes[i] < 49) {
112  arg = av_strtok(p, " ", &saveptr);
113  if (!arg) {
114  break;
115  }
116  p = NULL;
117  sscanf_err = sscanf(arg, "%f", &input_matrix[i][ctx->matrix_sizes[i]]);
118  if (sscanf_err != 1) {
119  av_log(ctx, AV_LOG_ERROR, "Matrix is sequence of 9, 25 or 49 signed numbers\n");
120  return AVERROR(EINVAL);
121  }
122  ctx->matrix_sizes[i]++;
123  }
124  if (ctx->matrix_sizes[i] == 9) {
125  ctx->dims[i] = 3;
126  } else if (ctx->matrix_sizes[i] == 25) {
127  ctx->dims[i] = 5;
128  } else if (ctx->matrix_sizes[i] == 49) {
129  ctx->dims[i] = 7;
130  } else {
131  av_log(ctx, AV_LOG_ERROR, "Invalid matrix size:%d\n", ctx->matrix_sizes[i]);
132  return AVERROR(EINVAL);
133  }
134 
135  }
136 
137  for (j = 0; j < 4; j++) {
138  matrix_bytes = sizeof(float)*ctx->matrix_sizes[j];
139  matrix = av_malloc(matrix_bytes);
140  if (!matrix) {
141  av_freep(&matrix);
142  return AVERROR(ENOMEM);
143  }
144 
145  for (i = 0; i < ctx->matrix_sizes[j]; i++)
146  matrix[i] = input_matrix[j][i];
147 
148  buffer = clCreateBuffer(ctx->ocf.hwctx->context,
149  CL_MEM_READ_ONLY |
150  CL_MEM_COPY_HOST_PTR |
151  CL_MEM_HOST_NO_ACCESS,
152  matrix_bytes, matrix, &cle);
153  if (!buffer) {
154  av_log(avctx, AV_LOG_ERROR, "Failed to create matrix buffer: "
155  "%d.\n", cle);
156  av_freep(&matrix);
157  return AVERROR(EIO);
158  }
159  ctx->matrix[j] = buffer;
160  av_freep(&matrix);
161  }
162 
163  return 0;
164 }
165 
167 {
168  AVFilterContext *avctx = inlink->dst;
169  AVFilterLink *outlink = avctx->outputs[0];
171  AVFrame *output = NULL;
172  cl_int cle;
173  size_t global_work[2];
174  cl_mem src, dst;
175  int err, p;
176 
177  av_log(ctx, AV_LOG_DEBUG, "Filter input: %s, %ux%u (%"PRId64").\n",
178  av_get_pix_fmt_name(input->format),
179  input->width, input->height, input->pts);
180 
181  if (!input->hw_frames_ctx)
182  return AVERROR(EINVAL);
183 
184  if (!ctx->initialised) {
185  err = convolution_opencl_init(avctx);
186  if (err < 0)
187  goto fail;
188 
190  if (err < 0)
191  goto fail;
192  }
193 
194  output = ff_get_video_buffer(outlink, outlink->w, outlink->h);
195  if (!output) {
196  err = AVERROR(ENOMEM);
197  goto fail;
198  }
199 
200  for (p = 0; p < FF_ARRAY_ELEMS(output->data); p++) {
201  src = (cl_mem) input->data[p];
202  dst = (cl_mem)output->data[p];
203 
204  if (!dst)
205  break;
206 
207  cle = clSetKernelArg(ctx->kernel, 0, sizeof(cl_mem), &dst);
208  if (cle != CL_SUCCESS) {
209  av_log(avctx, AV_LOG_ERROR, "Failed to set kernel "
210  "destination image argument: %d.\n", cle);
211  goto fail;
212  }
213  cle = clSetKernelArg(ctx->kernel, 1, sizeof(cl_mem), &src);
214  if (cle != CL_SUCCESS) {
215  av_log(avctx, AV_LOG_ERROR, "Failed to set kernel "
216  "source image argument: %d.\n", cle);
217  goto fail;
218  }
219  cle = clSetKernelArg(ctx->kernel, 2, sizeof(cl_int), &ctx->dims[p]);
220  if (cle != CL_SUCCESS) {
221  av_log(avctx, AV_LOG_ERROR, "Failed to set kernel "
222  "matrix size argument: %d.\n", cle);
223  goto fail;
224  }
225  cle = clSetKernelArg(ctx->kernel, 3, sizeof(cl_mem), &ctx->matrix[p]);
226  if (cle != CL_SUCCESS) {
227  av_log(avctx, AV_LOG_ERROR, "Failed to set kernel "
228  "matrix argument: %d.\n", cle);
229  goto fail;
230  }
231  cle = clSetKernelArg(ctx->kernel, 4, sizeof(cl_float), &ctx->rdivs[p]);
232  if (cle != CL_SUCCESS) {
233  av_log(avctx, AV_LOG_ERROR, "Failed to set kernel "
234  "rdiv argument: %d.\n", cle);
235  goto fail;
236  }
237  cle = clSetKernelArg(ctx->kernel, 5, sizeof(cl_float), &ctx->biases[p]);
238  if (cle != CL_SUCCESS) {
239  av_log(avctx, AV_LOG_ERROR, "Failed to set kernel "
240  "bias argument: %d.\n", cle);
241  goto fail;
242  }
243 
244 
245  err = ff_opencl_filter_work_size_from_image(avctx, global_work, output, p, 0);
246  if (err < 0)
247  goto fail;
248 
249  av_log(avctx, AV_LOG_DEBUG, "Run kernel on plane %d "
250  "(%"SIZE_SPECIFIER"x%"SIZE_SPECIFIER").\n",
251  p, global_work[0], global_work[1]);
252 
253  cle = clEnqueueNDRangeKernel(ctx->command_queue, ctx->kernel, 2, NULL,
254  global_work, NULL,
255  0, NULL, NULL);
256  if (cle != CL_SUCCESS) {
257  av_log(avctx, AV_LOG_ERROR, "Failed to enqueue kernel: %d.\n",
258  cle);
259  err = AVERROR(EIO);
260  goto fail;
261  }
262  }
263 
264  cle = clFinish(ctx->command_queue);
265  if (cle != CL_SUCCESS) {
266  av_log(avctx, AV_LOG_ERROR, "Failed to finish command queue: %d.\n",
267  cle);
268  err = AVERROR(EIO);
269  goto fail;
270  }
271 
272  err = av_frame_copy_props(output, input);
273  if (err < 0)
274  goto fail;
275 
276  av_frame_free(&input);
277 
278  av_log(ctx, AV_LOG_DEBUG, "Filter output: %s, %ux%u (%"PRId64").\n",
279  av_get_pix_fmt_name(output->format),
280  output->width, output->height, output->pts);
281 
282  return ff_filter_frame(outlink, output);
283 
284 fail:
285  clFinish(ctx->command_queue);
286  av_frame_free(&input);
287  av_frame_free(&output);
288  return err;
289 }
290 
292 {
294  cl_int cle;
295  int i;
296 
297  for (i = 0; i < 4; i++) {
298  clReleaseMemObject(ctx->matrix[i]);
299  }
300 
301  if (ctx->kernel) {
302  cle = clReleaseKernel(ctx->kernel);
303  if (cle != CL_SUCCESS)
304  av_log(avctx, AV_LOG_ERROR, "Failed to release "
305  "kernel: %d.\n", cle);
306  }
307 
308  if (ctx->command_queue) {
309  cle = clReleaseCommandQueue(ctx->command_queue);
310  if (cle != CL_SUCCESS)
311  av_log(avctx, AV_LOG_ERROR, "Failed to release "
312  "command queue: %d.\n", cle);
313  }
314 
316 }
317 
318 #define OFFSET(x) offsetof(ConvolutionOpenCLContext, x)
319 #define FLAGS (AV_OPT_FLAG_FILTERING_PARAM | AV_OPT_FLAG_VIDEO_PARAM)
321  { "0m", "set matrix for 2nd plane", OFFSET(matrix_str[0]), AV_OPT_TYPE_STRING, {.str="0 0 0 0 1 0 0 0 0"}, 0, 0, FLAGS },
322  { "1m", "set matrix for 2nd plane", OFFSET(matrix_str[1]), AV_OPT_TYPE_STRING, {.str="0 0 0 0 1 0 0 0 0"}, 0, 0, FLAGS },
323  { "2m", "set matrix for 3rd plane", OFFSET(matrix_str[2]), AV_OPT_TYPE_STRING, {.str="0 0 0 0 1 0 0 0 0"}, 0, 0, FLAGS },
324  { "3m", "set matrix for 4th plane", OFFSET(matrix_str[3]), AV_OPT_TYPE_STRING, {.str="0 0 0 0 1 0 0 0 0"}, 0, 0, FLAGS },
325  { "0rdiv", "set rdiv for 1nd plane", OFFSET(rdivs[0]), AV_OPT_TYPE_FLOAT, {.dbl=1.0}, 0.0, INT_MAX, FLAGS},
326  { "1rdiv", "set rdiv for 2nd plane", OFFSET(rdivs[1]), AV_OPT_TYPE_FLOAT, {.dbl=1.0}, 0.0, INT_MAX, FLAGS},
327  { "2rdiv", "set rdiv for 3rd plane", OFFSET(rdivs[2]), AV_OPT_TYPE_FLOAT, {.dbl=1.0}, 0.0, INT_MAX, FLAGS},
328  { "3rdiv", "set rdiv for 4th plane", OFFSET(rdivs[3]), AV_OPT_TYPE_FLOAT, {.dbl=1.0}, 0.0, INT_MAX, FLAGS},
329  { "0bias", "set bias for 1st plane", OFFSET(biases[0]), AV_OPT_TYPE_FLOAT, {.dbl=0.0}, 0.0, INT_MAX, FLAGS},
330  { "1bias", "set bias for 2nd plane", OFFSET(biases[1]), AV_OPT_TYPE_FLOAT, {.dbl=0.0}, 0.0, INT_MAX, FLAGS},
331  { "2bias", "set bias for 3rd plane", OFFSET(biases[2]), AV_OPT_TYPE_FLOAT, {.dbl=0.0}, 0.0, INT_MAX, FLAGS},
332  { "3bias", "set bias for 4th plane", OFFSET(biases[3]), AV_OPT_TYPE_FLOAT, {.dbl=0.0}, 0.0, INT_MAX, FLAGS},
333  { NULL }
334 };
335 
336 AVFILTER_DEFINE_CLASS(convolution_opencl);
337 
339  {
340  .name = "default",
341  .type = AVMEDIA_TYPE_VIDEO,
342  .filter_frame = &convolution_opencl_filter_frame,
343  .config_props = &ff_opencl_filter_config_input,
344  },
345  { NULL }
346 };
347 
349  {
350  .name = "default",
351  .type = AVMEDIA_TYPE_VIDEO,
352  .config_props = &ff_opencl_filter_config_output,
353  },
354  { NULL }
355 };
356 
358  .name = "convolution_opencl",
359  .description = NULL_IF_CONFIG_SMALL("Apply convolution mask to input video"),
360  .priv_size = sizeof(ConvolutionOpenCLContext),
361  .priv_class = &convolution_opencl_class,
365  .inputs = convolution_opencl_inputs,
366  .outputs = convolution_opencl_outputs,
367  .flags_internal = FF_FILTER_FLAG_HWFRAME_AWARE,
368 };
#define NULL
Definition: coverity.c:32
#define FF_FILTER_FLAG_HWFRAME_AWARE
The filter is aware of hardware frames, and any hardware frame context should not be automatically pr...
Definition: internal.h:385
This structure describes decoded (raw) audio or video data.
Definition: frame.h:218
AVOption.
Definition: opt.h:246
int ff_opencl_filter_work_size_from_image(AVFilterContext *avctx, size_t *work_size, AVFrame *frame, int plane, int block_alignment)
Find the work size needed needed for a given plane of an image.
Definition: opencl.c:281
#define FLAGS
int ff_opencl_filter_config_input(AVFilterLink *inlink)
Check that the input link contains a suitable hardware frames context and extract the device from it...
Definition: opencl.c:63
int ff_opencl_filter_query_formats(AVFilterContext *avctx)
Return that all inputs and outputs support only AV_PIX_FMT_OPENCL.
Definition: opencl.c:31
misc image utilities
Main libavfilter public API header.
Memory handling functions.
static av_cold int init(AVCodecContext *avctx)
Definition: avrndec.c:35
AVFrame * ff_get_video_buffer(AVFilterLink *link, int w, int h)
Request a picture buffer with a specific set of permissions.
Definition: video.c:99
#define src
Definition: vp8dsp.c:254
int ff_opencl_filter_init(AVFilterContext *avctx)
Initialise an OpenCL filter context.
Definition: opencl.c:150
static int convolution_opencl_filter_frame(AVFilterLink *inlink, AVFrame *input)
AVOpenCLDeviceContext * hwctx
Definition: opencl.h:40
AVBufferRef * hw_frames_ctx
For hwaccel-format frames, this should be a reference to the AVHWFramesContext describing the frame...
Definition: frame.h:556
const char * name
Pad name.
Definition: internal.h:60
int ff_filter_frame(AVFilterLink *link, AVFrame *frame)
Send a frame of data to the next filter.
Definition: avfilter.c:1080
static char buffer[20]
Definition: seek.c:32
#define av_cold
Definition: attributes.h:82
#define av_malloc(s)
static av_cold int uninit(AVCodecContext *avctx)
Definition: crystalhd.c:279
AVOptions.
cl_device_id device_id
The primary device ID of the device.
int64_t pts
Presentation timestamp in time_base units (time when frame should be shown to user).
Definition: frame.h:311
int ff_opencl_filter_config_output(AVFilterLink *outlink)
Create a suitable hardware frames context for the output.
Definition: opencl.c:99
#define av_log(a,...)
A filter pad used for either input or output.
Definition: internal.h:54
AVFilter ff_vf_convolution_opencl
int width
Definition: frame.h:276
#define AV_LOG_ERROR
Something went wrong and cannot losslessly be recovered.
Definition: log.h:176
#define AVERROR(e)
Definition: error.h:43
void av_frame_free(AVFrame **frame)
Free the frame and any dynamically allocated objects in it, e.g.
Definition: frame.c:202
#define NULL_IF_CONFIG_SMALL(x)
Return NULL if CONFIG_SMALL is true, otherwise the argument without modification. ...
Definition: internal.h:186
void * priv
private data for use by the filter
Definition: avfilter.h:353
#define AV_LOG_DEBUG
Stuff which is only useful for libav* developers.
Definition: log.h:197
const char * arg
Definition: jacosubdec.c:66
#define OFFSET(x)
#define fail()
Definition: checkasm.h:116
AVFILTER_DEFINE_CLASS(convolution_opencl)
static int convolution_opencl_make_filter_params(AVFilterContext *avctx)
AVFormatContext * ctx
Definition: movenc.c:48
static av_cold void convolution_opencl_uninit(AVFilterContext *avctx)
static const AVFilterPad convolution_opencl_outputs[]
static const AVFilterPad inputs[]
Definition: af_acontrast.c:193
static const AVFilterPad outputs[]
Definition: af_acontrast.c:203
#define FF_ARRAY_ELEMS(a)
int format
format of the frame, -1 if unknown or unset Values correspond to enum AVPixelFormat for video frames...
Definition: frame.h:291
Filter definition.
Definition: avfilter.h:144
const char * name
Filter name.
Definition: avfilter.h:148
AVFilterLink ** outputs
array of pointers to output links
Definition: avfilter.h:350
#define SIZE_SPECIFIER
Definition: internal.h:262
uint8_t * data[AV_NUM_DATA_POINTERS]
pointer to the picture/channel planes.
Definition: frame.h:232
char * av_strtok(char *s, const char *delim, char **saveptr)
Split the string into several tokens which can be accessed by successive calls to av_strtok()...
Definition: avstring.c:184
static int query_formats(AVFilterContext *ctx)
Definition: aeval.c:244
common internal and external API header
void ff_opencl_filter_uninit(AVFilterContext *avctx)
Uninitialise an OpenCL filter context.
Definition: opencl.c:159
static int convolution_opencl_init(AVFilterContext *avctx)
static const AVFilterPad convolution_opencl_inputs[]
cl_context context
The OpenCL context which will contain all operations and frames on this device.
static const AVOption convolution_opencl_options[]
An instance of a filter.
Definition: avfilter.h:338
int height
Definition: frame.h:276
#define av_freep(p)
cl_program program
Definition: opencl.h:42
const char * ff_opencl_source_convolution
int ff_opencl_filter_load_program(AVFilterContext *avctx, const char **program_source_array, int nb_strings)
Load a new OpenCL program from strings in memory.
Definition: opencl.c:174
const char * av_get_pix_fmt_name(enum AVPixelFormat pix_fmt)
Return the short name for a pixel format, NULL in case pix_fmt is unknown.
Definition: pixdesc.c:2279
internal API functions
int av_frame_copy_props(AVFrame *dst, const AVFrame *src)
Copy only "metadata" fields from src to dst.
Definition: frame.c:652