FFmpeg  4.0
vf_unsharp_opencl.c
Go to the documentation of this file.
1 /*
2  * This file is part of FFmpeg.
3  *
4  * FFmpeg is free software; you can redistribute it and/or
5  * modify it under the terms of the GNU Lesser General Public
6  * License as published by the Free Software Foundation; either
7  * version 2.1 of the License, or (at your option) any later version.
8  *
9  * FFmpeg is distributed in the hope that it will be useful,
10  * but WITHOUT ANY WARRANTY; without even the implied warranty of
11  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
12  * Lesser General Public License for more details.
13  *
14  * You should have received a copy of the GNU Lesser General Public
15  * License along with FFmpeg; if not, write to the Free Software
16  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
17  */
18 
19 #include "libavutil/common.h"
20 #include "libavutil/imgutils.h"
21 #include "libavutil/mem.h"
22 #include "libavutil/opt.h"
23 #include "libavutil/pixdesc.h"
24 
25 #include "avfilter.h"
26 #include "internal.h"
27 #include "opencl.h"
28 #include "opencl_source.h"
29 #include "video.h"
30 
31 #define MAX_DIAMETER 23
32 
33 typedef struct UnsharpOpenCLContext {
35 
37  cl_kernel kernel;
38  cl_command_queue command_queue;
39 
40  float luma_size_x;
41  float luma_size_y;
42  float luma_amount;
46 
47  int global;
48 
49  int nb_planes;
50  struct {
53 
54  cl_mem matrix;
55  cl_mem coef_x;
56  cl_mem coef_y;
57 
58  cl_int size_x;
59  cl_int size_y;
60  cl_float amount;
61  cl_float threshold;
62  } plane[4];
64 
65 
67 {
68  UnsharpOpenCLContext *ctx = avctx->priv;
69  cl_int cle;
70  int err;
71 
73  if (err < 0)
74  goto fail;
75 
76  ctx->command_queue = clCreateCommandQueue(ctx->ocf.hwctx->context,
77  ctx->ocf.hwctx->device_id,
78  0, &cle);
79  if (!ctx->command_queue) {
80  av_log(avctx, AV_LOG_ERROR, "Failed to create OpenCL "
81  "command queue: %d.\n", cle);
82  err = AVERROR(EIO);
83  goto fail;
84  }
85 
86  // Use global kernel if mask size will be too big for the local store..
87  ctx->global = (ctx->luma_size_x > 17.0f ||
88  ctx->luma_size_y > 17.0f ||
89  ctx->chroma_size_x > 17.0f ||
90  ctx->chroma_size_y > 17.0f);
91 
92  ctx->kernel = clCreateKernel(ctx->ocf.program,
93  ctx->global ? "unsharp_global"
94  : "unsharp_local", &cle);
95  if (!ctx->kernel) {
96  av_log(avctx, AV_LOG_ERROR, "Failed to create kernel: %d.\n", cle);
97  err = AVERROR(EIO);
98  goto fail;
99  }
100 
101  ctx->initialised = 1;
102  return 0;
103 
104 fail:
105  if (ctx->command_queue)
106  clReleaseCommandQueue(ctx->command_queue);
107  if (ctx->kernel)
108  clReleaseKernel(ctx->kernel);
109  return err;
110 }
111 
113 {
114  UnsharpOpenCLContext *ctx = avctx->priv;
115  const AVPixFmtDescriptor *desc;
116  float *matrix;
117  double val, sum;
118  cl_int cle;
119  cl_mem buffer;
120  size_t matrix_bytes;
121  float diam_x, diam_y, amount;
122  int err, p, x, y, size_x, size_y;
123 
125 
126  ctx->nb_planes = 0;
127  for (p = 0; p < desc->nb_components; p++)
128  ctx->nb_planes = FFMAX(ctx->nb_planes, desc->comp[p].plane + 1);
129 
130  for (p = 0; p < ctx->nb_planes; p++) {
131  if (p == 0 || (desc->flags & AV_PIX_FMT_FLAG_RGB)) {
132  diam_x = ctx->luma_size_x;
133  diam_y = ctx->luma_size_y;
134  amount = ctx->luma_amount;
135  } else {
136  diam_x = ctx->chroma_size_x;
137  diam_y = ctx->chroma_size_y;
138  amount = ctx->chroma_amount;
139  }
140  size_x = (int)ceil(diam_x) | 1;
141  size_y = (int)ceil(diam_y) | 1;
142  matrix_bytes = size_x * size_y * sizeof(float);
143 
144  matrix = av_malloc(matrix_bytes);
145  if (!matrix) {
146  err = AVERROR(ENOMEM);
147  goto fail;
148  }
149 
150  sum = 0.0;
151  for (x = 0; x < size_x; x++) {
152  double dx = (double)(x - size_x / 2) / diam_x;
153  sum += ctx->plane[p].blur_x[x] = exp(-16.0 * (dx * dx));
154  }
155  for (x = 0; x < size_x; x++)
156  ctx->plane[p].blur_x[x] /= sum;
157 
158  sum = 0.0;
159  for (y = 0; y < size_y; y++) {
160  double dy = (double)(y - size_y / 2) / diam_y;
161  sum += ctx->plane[p].blur_y[y] = exp(-16.0 * (dy * dy));
162  }
163  for (y = 0; y < size_y; y++)
164  ctx->plane[p].blur_y[y] /= sum;
165 
166  for (y = 0; y < size_y; y++) {
167  for (x = 0; x < size_x; x++) {
168  val = ctx->plane[p].blur_x[x] * ctx->plane[p].blur_y[y];
169  matrix[y * size_x + x] = val;
170  }
171  }
172 
173  if (ctx->global) {
174  buffer = clCreateBuffer(ctx->ocf.hwctx->context,
175  CL_MEM_READ_ONLY |
176  CL_MEM_COPY_HOST_PTR |
177  CL_MEM_HOST_NO_ACCESS,
178  matrix_bytes, matrix, &cle);
179  if (!buffer) {
180  av_log(avctx, AV_LOG_ERROR, "Failed to create matrix buffer: "
181  "%d.\n", cle);
182  err = AVERROR(EIO);
183  goto fail;
184  }
185  ctx->plane[p].matrix = buffer;
186  } else {
187  buffer = clCreateBuffer(ctx->ocf.hwctx->context,
188  CL_MEM_READ_ONLY |
189  CL_MEM_COPY_HOST_PTR |
190  CL_MEM_HOST_NO_ACCESS,
191  sizeof(ctx->plane[p].blur_x),
192  ctx->plane[p].blur_x, &cle);
193  if (!buffer) {
194  av_log(avctx, AV_LOG_ERROR, "Failed to create x-coef buffer: "
195  "%d.\n", cle);
196  err = AVERROR(EIO);
197  goto fail;
198  }
199  ctx->plane[p].coef_x = buffer;
200 
201  buffer = clCreateBuffer(ctx->ocf.hwctx->context,
202  CL_MEM_READ_ONLY |
203  CL_MEM_COPY_HOST_PTR |
204  CL_MEM_HOST_NO_ACCESS,
205  sizeof(ctx->plane[p].blur_y),
206  ctx->plane[p].blur_y, &cle);
207  if (!buffer) {
208  av_log(avctx, AV_LOG_ERROR, "Failed to create y-coef buffer: "
209  "%d.\n", cle);
210  err = AVERROR(EIO);
211  goto fail;
212  }
213  ctx->plane[p].coef_y = buffer;
214  }
215 
216  av_freep(&matrix);
217 
218  ctx->plane[p].size_x = size_x;
219  ctx->plane[p].size_y = size_y;
220  ctx->plane[p].amount = amount;
221  }
222 
223  err = 0;
224 fail:
225  av_freep(&matrix);
226  return err;
227 }
228 
230 {
231  AVFilterContext *avctx = inlink->dst;
232  AVFilterLink *outlink = avctx->outputs[0];
233  UnsharpOpenCLContext *ctx = avctx->priv;
234  AVFrame *output = NULL;
235  cl_int cle;
236  size_t global_work[2];
237  size_t local_work[2];
238  cl_mem src, dst;
239  int err, p;
240 
241  av_log(ctx, AV_LOG_DEBUG, "Filter input: %s, %ux%u (%"PRId64").\n",
242  av_get_pix_fmt_name(input->format),
243  input->width, input->height, input->pts);
244 
245  if (!input->hw_frames_ctx)
246  return AVERROR(EINVAL);
247 
248  if (!ctx->initialised) {
249  err = unsharp_opencl_init(avctx);
250  if (err < 0)
251  goto fail;
252 
254  if (err < 0)
255  goto fail;
256  }
257 
258  output = ff_get_video_buffer(outlink, outlink->w, outlink->h);
259  if (!output) {
260  err = AVERROR(ENOMEM);
261  goto fail;
262  }
263 
264  for (p = 0; p < FF_ARRAY_ELEMS(output->data); p++) {
265  src = (cl_mem) input->data[p];
266  dst = (cl_mem)output->data[p];
267 
268  if (!dst)
269  break;
270 
271  cle = clSetKernelArg(ctx->kernel, 0, sizeof(cl_mem), &dst);
272  if (cle != CL_SUCCESS) {
273  av_log(avctx, AV_LOG_ERROR, "Failed to set kernel "
274  "destination image argument: %d.\n", cle);
275  goto fail;
276  }
277  cle = clSetKernelArg(ctx->kernel, 1, sizeof(cl_mem), &src);
278  if (cle != CL_SUCCESS) {
279  av_log(avctx, AV_LOG_ERROR, "Failed to set kernel "
280  "source image argument: %d.\n", cle);
281  goto fail;
282  }
283  cle = clSetKernelArg(ctx->kernel, 2, sizeof(cl_int), &ctx->plane[p].size_x);
284  if (cle != CL_SUCCESS) {
285  av_log(avctx, AV_LOG_ERROR, "Failed to set kernel "
286  "matrix size argument: %d.\n", cle);
287  goto fail;
288  }
289  cle = clSetKernelArg(ctx->kernel, 3, sizeof(cl_int), &ctx->plane[p].size_y);
290  if (cle != CL_SUCCESS) {
291  av_log(avctx, AV_LOG_ERROR, "Failed to set kernel "
292  "matrix size argument: %d.\n", cle);
293  goto fail;
294  }
295  cle = clSetKernelArg(ctx->kernel, 4, sizeof(cl_float), &ctx->plane[p].amount);
296  if (cle != CL_SUCCESS) {
297  av_log(avctx, AV_LOG_ERROR, "Failed to set kernel "
298  "amount argument: %d.\n", cle);
299  goto fail;
300  }
301  if (ctx->global) {
302  cle = clSetKernelArg(ctx->kernel, 5, sizeof(cl_mem), &ctx->plane[p].matrix);
303  if (cle != CL_SUCCESS) {
304  av_log(avctx, AV_LOG_ERROR, "Failed to set kernel "
305  "matrix argument: %d.\n", cle);
306  goto fail;
307  }
308  } else {
309  cle = clSetKernelArg(ctx->kernel, 5, sizeof(cl_mem), &ctx->plane[p].coef_x);
310  if (cle != CL_SUCCESS) {
311  av_log(avctx, AV_LOG_ERROR, "Failed to set kernel "
312  "x-coef argument: %d.\n", cle);
313  goto fail;
314  }
315  cle = clSetKernelArg(ctx->kernel, 6, sizeof(cl_mem), &ctx->plane[p].coef_y);
316  if (cle != CL_SUCCESS) {
317  av_log(avctx, AV_LOG_ERROR, "Failed to set kernel "
318  "y-coef argument: %d.\n", cle);
319  goto fail;
320  }
321  }
322 
323  err = ff_opencl_filter_work_size_from_image(avctx, global_work, output, p,
324  ctx->global ? 0 : 16);
325  if (err < 0)
326  goto fail;
327 
328  local_work[0] = 16;
329  local_work[1] = 16;
330 
331  av_log(avctx, AV_LOG_DEBUG, "Run kernel on plane %d "
332  "(%"SIZE_SPECIFIER"x%"SIZE_SPECIFIER").\n",
333  p, global_work[0], global_work[1]);
334 
335  cle = clEnqueueNDRangeKernel(ctx->command_queue, ctx->kernel, 2, NULL,
336  global_work, ctx->global ? NULL : local_work,
337  0, NULL, NULL);
338  if (cle != CL_SUCCESS) {
339  av_log(avctx, AV_LOG_ERROR, "Failed to enqueue kernel: %d.\n",
340  cle);
341  err = AVERROR(EIO);
342  goto fail;
343  }
344  }
345 
346  cle = clFinish(ctx->command_queue);
347  if (cle != CL_SUCCESS) {
348  av_log(avctx, AV_LOG_ERROR, "Failed to finish command queue: %d.\n",
349  cle);
350  err = AVERROR(EIO);
351  goto fail;
352  }
353 
354  err = av_frame_copy_props(output, input);
355  if (err < 0)
356  goto fail;
357 
358  av_frame_free(&input);
359 
360  av_log(ctx, AV_LOG_DEBUG, "Filter output: %s, %ux%u (%"PRId64").\n",
361  av_get_pix_fmt_name(output->format),
362  output->width, output->height, output->pts);
363 
364  return ff_filter_frame(outlink, output);
365 
366 fail:
367  clFinish(ctx->command_queue);
368  av_frame_free(&input);
369  av_frame_free(&output);
370  return err;
371 }
372 
374 {
375  UnsharpOpenCLContext *ctx = avctx->priv;
376  cl_int cle;
377  int i;
378 
379  for (i = 0; i < ctx->nb_planes; i++) {
380  if (ctx->plane[i].matrix)
381  clReleaseMemObject(ctx->plane[i].matrix);
382  if (ctx->plane[i].coef_x)
383  clReleaseMemObject(ctx->plane[i].coef_x);
384  if (ctx->plane[i].coef_y)
385  clReleaseMemObject(ctx->plane[i].coef_y);
386  }
387 
388  if (ctx->kernel) {
389  cle = clReleaseKernel(ctx->kernel);
390  if (cle != CL_SUCCESS)
391  av_log(avctx, AV_LOG_ERROR, "Failed to release "
392  "kernel: %d.\n", cle);
393  }
394 
395  if (ctx->command_queue) {
396  cle = clReleaseCommandQueue(ctx->command_queue);
397  if (cle != CL_SUCCESS)
398  av_log(avctx, AV_LOG_ERROR, "Failed to release "
399  "command queue: %d.\n", cle);
400  }
401 
403 }
404 
405 #define OFFSET(x) offsetof(UnsharpOpenCLContext, x)
406 #define FLAGS (AV_OPT_FLAG_FILTERING_PARAM | AV_OPT_FLAG_VIDEO_PARAM)
408  { "luma_msize_x", "Set luma mask horizontal diameter (pixels)",
410  { .dbl = 5.0 }, 1, MAX_DIAMETER, FLAGS },
411  { "lx", "Set luma mask horizontal diameter (pixels)",
413  { .dbl = 5.0 }, 1, MAX_DIAMETER, FLAGS },
414  { "luma_msize_y", "Set luma mask vertical diameter (pixels)",
416  { .dbl = 5.0 }, 1, MAX_DIAMETER, FLAGS },
417  { "ly", "Set luma mask vertical diameter (pixels)",
419  { .dbl = 5.0 }, 1, MAX_DIAMETER, FLAGS },
420  { "luma_amount", "Set luma amount (multiplier)",
422  { .dbl = 1.0 }, -10, 10, FLAGS },
423  { "la", "Set luma amount (multiplier)",
425  { .dbl = 1.0 }, -10, 10, FLAGS },
426 
427  { "chroma_msize_x", "Set chroma mask horizontal diameter (pixels after subsampling)",
429  { .dbl = 5.0 }, 1, MAX_DIAMETER, FLAGS },
430  { "cx", "Set chroma mask horizontal diameter (pixels after subsampling)",
432  { .dbl = 5.0 }, 1, MAX_DIAMETER, FLAGS },
433  { "chroma_msize_y", "Set chroma mask vertical diameter (pixels after subsampling)",
435  { .dbl = 5.0 }, 1, MAX_DIAMETER, FLAGS },
436  { "cy", "Set chroma mask vertical diameter (pixels after subsampling)",
438  { .dbl = 5.0 }, 1, MAX_DIAMETER, FLAGS },
439  { "chroma_amount", "Set chroma amount (multiplier)",
441  { .dbl = 0.0 }, -10, 10, FLAGS },
442  { "ca", "Set chroma amount (multiplier)",
444  { .dbl = 0.0 }, -10, 10, FLAGS },
445 
446  { NULL }
447 };
448 
449 AVFILTER_DEFINE_CLASS(unsharp_opencl);
450 
452  {
453  .name = "default",
454  .type = AVMEDIA_TYPE_VIDEO,
455  .filter_frame = &unsharp_opencl_filter_frame,
456  .config_props = &ff_opencl_filter_config_input,
457  },
458  { NULL }
459 };
460 
462  {
463  .name = "default",
464  .type = AVMEDIA_TYPE_VIDEO,
465  .config_props = &ff_opencl_filter_config_output,
466  },
467  { NULL }
468 };
469 
471  .name = "unsharp_opencl",
472  .description = NULL_IF_CONFIG_SMALL("Apply unsharp mask to input video"),
473  .priv_size = sizeof(UnsharpOpenCLContext),
474  .priv_class = &unsharp_opencl_class,
478  .inputs = unsharp_opencl_inputs,
479  .outputs = unsharp_opencl_outputs,
480  .flags_internal = FF_FILTER_FLAG_HWFRAME_AWARE,
481 };
int plane
Which of the 4 planes contains the component.
Definition: pixdesc.h:35
#define NULL
Definition: coverity.c:32
const char const char void * val
Definition: avisynth_c.h:771
#define FF_FILTER_FLAG_HWFRAME_AWARE
The filter is aware of hardware frames, and any hardware frame context should not be automatically pr...
Definition: internal.h:385
AVFilter ff_vf_unsharp_opencl
static av_cold void unsharp_opencl_uninit(AVFilterContext *avctx)
const AVPixFmtDescriptor * av_pix_fmt_desc_get(enum AVPixelFormat pix_fmt)
Definition: pixdesc.c:2363
This structure describes decoded (raw) audio or video data.
Definition: frame.h:218
AVOption.
Definition: opt.h:246
int ff_opencl_filter_work_size_from_image(AVFilterContext *avctx, size_t *work_size, AVFrame *frame, int plane, int block_alignment)
Find the work size needed needed for a given plane of an image.
Definition: opencl.c:281
int ff_opencl_filter_config_input(AVFilterLink *inlink)
Check that the input link contains a suitable hardware frames context and extract the device from it...
Definition: opencl.c:63
int ff_opencl_filter_query_formats(AVFilterContext *avctx)
Return that all inputs and outputs support only AV_PIX_FMT_OPENCL.
Definition: opencl.c:31
misc image utilities
Main libavfilter public API header.
Memory handling functions.
const char * desc
Definition: nvenc.c:65
static av_cold int init(AVCodecContext *avctx)
Definition: avrndec.c:35
static const AVFilterPad unsharp_opencl_outputs[]
AVFrame * ff_get_video_buffer(AVFilterLink *link, int w, int h)
Request a picture buffer with a specific set of permissions.
Definition: video.c:99
static int unsharp_opencl_filter_frame(AVFilterLink *inlink, AVFrame *input)
#define src
Definition: vp8dsp.c:254
#define MAX_DIAMETER
int ff_opencl_filter_init(AVFilterContext *avctx)
Initialise an OpenCL filter context.
Definition: opencl.c:150
AVOpenCLDeviceContext * hwctx
Definition: opencl.h:40
AVBufferRef * hw_frames_ctx
For hwaccel-format frames, this should be a reference to the AVHWFramesContext describing the frame...
Definition: frame.h:556
const char * name
Pad name.
Definition: internal.h:60
int ff_filter_frame(AVFilterLink *link, AVFrame *frame)
Send a frame of data to the next filter.
Definition: avfilter.c:1080
static char buffer[20]
Definition: seek.c:32
AVComponentDescriptor comp[4]
Parameters that describe how pixels are packed.
Definition: pixdesc.h:117
#define av_cold
Definition: attributes.h:82
#define av_malloc(s)
static av_cold int uninit(AVCodecContext *avctx)
Definition: crystalhd.c:279
AVOptions.
static const AVOption unsharp_opencl_options[]
cl_device_id device_id
The primary device ID of the device.
cl_command_queue command_queue
int64_t pts
Presentation timestamp in time_base units (time when frame should be shown to user).
Definition: frame.h:311
OpenCLFilterContext ocf
int ff_opencl_filter_config_output(AVFilterLink *outlink)
Create a suitable hardware frames context for the output.
Definition: opencl.c:99
#define av_log(a,...)
A filter pad used for either input or output.
Definition: internal.h:54
#define OFFSET(x)
int width
Definition: frame.h:276
#define AV_LOG_ERROR
Something went wrong and cannot losslessly be recovered.
Definition: log.h:176
static int unsharp_opencl_make_filter_params(AVFilterContext *avctx)
#define AVERROR(e)
Definition: error.h:43
#define AV_PIX_FMT_FLAG_RGB
The pixel format contains RGB-like data (as opposed to YUV/grayscale).
Definition: pixdesc.h:148
void av_frame_free(AVFrame **frame)
Free the frame and any dynamically allocated objects in it, e.g.
Definition: frame.c:202
#define NULL_IF_CONFIG_SMALL(x)
Return NULL if CONFIG_SMALL is true, otherwise the argument without modification. ...
Definition: internal.h:186
void * priv
private data for use by the filter
Definition: avfilter.h:353
#define AV_LOG_DEBUG
Stuff which is only useful for libav* developers.
Definition: log.h:197
const char * ff_opencl_source_unsharp
#define FFMAX(a, b)
Definition: common.h:94
#define fail()
Definition: checkasm.h:116
int8_t exp
Definition: eval.c:72
enum AVPixelFormat output_format
Definition: opencl.h:44
uint64_t flags
Combination of AV_PIX_FMT_FLAG_...
Definition: pixdesc.h:106
uint8_t nb_components
The number of components each pixel has, (1-4)
Definition: pixdesc.h:83
float blur_y[MAX_DIAMETER]
AVFormatContext * ctx
Definition: movenc.c:48
static const AVFilterPad inputs[]
Definition: af_acontrast.c:193
static const AVFilterPad outputs[]
Definition: af_acontrast.c:203
#define FF_ARRAY_ELEMS(a)
int format
format of the frame, -1 if unknown or unset Values correspond to enum AVPixelFormat for video frames...
Definition: frame.h:291
#define FLAGS
Descriptor that unambiguously describes how the bits of a pixel are stored in the up to 4 data planes...
Definition: pixdesc.h:81
AVFILTER_DEFINE_CLASS(unsharp_opencl)
float blur_x[MAX_DIAMETER]
Filter definition.
Definition: avfilter.h:144
const char * name
Filter name.
Definition: avfilter.h:148
static int unsharp_opencl_init(AVFilterContext *avctx)
AVFilterLink ** outputs
array of pointers to output links
Definition: avfilter.h:350
#define SIZE_SPECIFIER
Definition: internal.h:262
uint8_t * data[AV_NUM_DATA_POINTERS]
pointer to the picture/channel planes.
Definition: frame.h:232
int
static int query_formats(AVFilterContext *ctx)
Definition: aeval.c:244
common internal and external API header
static const AVFilterPad unsharp_opencl_inputs[]
void ff_opencl_filter_uninit(AVFilterContext *avctx)
Uninitialise an OpenCL filter context.
Definition: opencl.c:159
struct UnsharpOpenCLContext::@205 plane[4]
cl_context context
The OpenCL context which will contain all operations and frames on this device.
An instance of a filter.
Definition: avfilter.h:338
int height
Definition: frame.h:276
#define av_freep(p)
cl_program program
Definition: opencl.h:42
int ff_opencl_filter_load_program(AVFilterContext *avctx, const char **program_source_array, int nb_strings)
Load a new OpenCL program from strings in memory.
Definition: opencl.c:174
const char * av_get_pix_fmt_name(enum AVPixelFormat pix_fmt)
Return the short name for a pixel format, NULL in case pix_fmt is unknown.
Definition: pixdesc.c:2279
internal API functions
for(j=16;j >0;--j)
int av_frame_copy_props(AVFrame *dst, const AVFrame *src)
Copy only "metadata" fields from src to dst.
Definition: frame.c:652