FFmpeg  4.4.5
vf_scale_cuda.c
Go to the documentation of this file.
1 /*
2 * Copyright (c) 2017, NVIDIA CORPORATION. All rights reserved.
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice shall be included in
12 * all copies or substantial portions of the Software.
13 *
14 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
17 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
18 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
19 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
20 * DEALINGS IN THE SOFTWARE.
21 */
22 
23 #include <float.h>
24 #include <stdio.h>
25 #include <string.h>
26 
27 #include "libavutil/avstring.h"
28 #include "libavutil/common.h"
29 #include "libavutil/hwcontext.h"
31 #include "libavutil/cuda_check.h"
32 #include "libavutil/internal.h"
33 #include "libavutil/opt.h"
34 #include "libavutil/pixdesc.h"
35 
36 #include "avfilter.h"
37 #include "formats.h"
38 #include "internal.h"
39 #include "scale_eval.h"
40 #include "video.h"
41 
42 #include "vf_scale_cuda.h"
43 
44 static const enum AVPixelFormat supported_formats[] = {
53 };
54 
55 #define DIV_UP(a, b) ( ((a) + (b) - 1) / (b) )
56 #define BLOCKX 32
57 #define BLOCKY 16
58 
59 #define CHECK_CU(x) FF_CUDA_CHECK_DL(ctx, s->hwctx->internal->cuda_dl, x)
60 
61 enum {
63 
68 
70 };
71 
72 typedef struct CUDAScaleContext {
73  const AVClass *class;
74 
76 
77  enum AVPixelFormat in_fmt;
79 
82 
85 
86  /**
87  * Output sw format. AV_PIX_FMT_NONE for no conversion.
88  */
89  enum AVPixelFormat format;
90 
91  char *w_expr; ///< width expression string
92  char *h_expr; ///< height expression string
93 
96 
97  CUcontext cu_ctx;
98  CUmodule cu_module;
99  CUfunction cu_func_uchar;
100  CUfunction cu_func_uchar2;
101  CUfunction cu_func_uchar4;
102  CUfunction cu_func_ushort;
103  CUfunction cu_func_ushort2;
104  CUfunction cu_func_ushort4;
105  CUstream cu_stream;
106 
107  CUdeviceptr srcBuffer;
108  CUdeviceptr dstBuffer;
110 
114 
115  float param;
117 
119 {
120  CUDAScaleContext *s = ctx->priv;
121 
122  s->format = AV_PIX_FMT_NONE;
123  s->frame = av_frame_alloc();
124  if (!s->frame)
125  return AVERROR(ENOMEM);
126 
127  s->tmp_frame = av_frame_alloc();
128  if (!s->tmp_frame)
129  return AVERROR(ENOMEM);
130 
131  return 0;
132 }
133 
135 {
136  CUDAScaleContext *s = ctx->priv;
137 
138  if (s->hwctx && s->cu_module) {
139  CudaFunctions *cu = s->hwctx->internal->cuda_dl;
140  CUcontext dummy;
141 
142  CHECK_CU(cu->cuCtxPushCurrent(s->hwctx->cuda_ctx));
143  CHECK_CU(cu->cuModuleUnload(s->cu_module));
144  s->cu_module = NULL;
145  CHECK_CU(cu->cuCtxPopCurrent(&dummy));
146  }
147 
148  av_frame_free(&s->frame);
149  av_buffer_unref(&s->frames_ctx);
150  av_frame_free(&s->tmp_frame);
151 }
152 
154 {
155  static const enum AVPixelFormat pixel_formats[] = {
157  };
158  AVFilterFormats *pix_fmts = ff_make_format_list(pixel_formats);
159  if (!pix_fmts)
160  return AVERROR(ENOMEM);
161 
163 }
164 
165 static av_cold int init_hwframe_ctx(CUDAScaleContext *s, AVBufferRef *device_ctx, int width, int height)
166 {
167  AVBufferRef *out_ref = NULL;
168  AVHWFramesContext *out_ctx;
169  int ret;
170 
171  out_ref = av_hwframe_ctx_alloc(device_ctx);
172  if (!out_ref)
173  return AVERROR(ENOMEM);
174  out_ctx = (AVHWFramesContext*)out_ref->data;
175 
176  out_ctx->format = AV_PIX_FMT_CUDA;
177  out_ctx->sw_format = s->out_fmt;
178  out_ctx->width = FFALIGN(width, 32);
179  out_ctx->height = FFALIGN(height, 32);
180 
181  ret = av_hwframe_ctx_init(out_ref);
182  if (ret < 0)
183  goto fail;
184 
185  av_frame_unref(s->frame);
186  ret = av_hwframe_get_buffer(out_ref, s->frame, 0);
187  if (ret < 0)
188  goto fail;
189 
190  s->frame->width = width;
191  s->frame->height = height;
192 
193  av_buffer_unref(&s->frames_ctx);
194  s->frames_ctx = out_ref;
195 
196  return 0;
197 fail:
198  av_buffer_unref(&out_ref);
199  return ret;
200 }
201 
202 static int format_is_supported(enum AVPixelFormat fmt)
203 {
204  int i;
205 
206  for (i = 0; i < FF_ARRAY_ELEMS(supported_formats); i++)
207  if (supported_formats[i] == fmt)
208  return 1;
209  return 0;
210 }
211 
212 static av_cold int init_processing_chain(AVFilterContext *ctx, int in_width, int in_height,
213  int out_width, int out_height)
214 {
215  CUDAScaleContext *s = ctx->priv;
216 
217  AVHWFramesContext *in_frames_ctx;
218 
219  enum AVPixelFormat in_format;
220  enum AVPixelFormat out_format;
221  int ret;
222 
223  /* check that we have a hw context */
224  if (!ctx->inputs[0]->hw_frames_ctx) {
225  av_log(ctx, AV_LOG_ERROR, "No hw context provided on input\n");
226  return AVERROR(EINVAL);
227  }
228  in_frames_ctx = (AVHWFramesContext*)ctx->inputs[0]->hw_frames_ctx->data;
229  in_format = in_frames_ctx->sw_format;
230  out_format = (s->format == AV_PIX_FMT_NONE) ? in_format : s->format;
231 
232  if (!format_is_supported(in_format)) {
233  av_log(ctx, AV_LOG_ERROR, "Unsupported input format: %s\n",
234  av_get_pix_fmt_name(in_format));
235  return AVERROR(ENOSYS);
236  }
237  if (!format_is_supported(out_format)) {
238  av_log(ctx, AV_LOG_ERROR, "Unsupported output format: %s\n",
239  av_get_pix_fmt_name(out_format));
240  return AVERROR(ENOSYS);
241  }
242 
243  s->in_fmt = in_format;
244  s->out_fmt = out_format;
245 
246  if (s->passthrough && in_width == out_width && in_height == out_height && in_format == out_format) {
247  s->frames_ctx = av_buffer_ref(ctx->inputs[0]->hw_frames_ctx);
248  if (!s->frames_ctx)
249  return AVERROR(ENOMEM);
250  } else {
251  s->passthrough = 0;
252 
253  ret = init_hwframe_ctx(s, in_frames_ctx->device_ref, out_width, out_height);
254  if (ret < 0)
255  return ret;
256  }
257 
258  ctx->outputs[0]->hw_frames_ctx = av_buffer_ref(s->frames_ctx);
259  if (!ctx->outputs[0]->hw_frames_ctx)
260  return AVERROR(ENOMEM);
261 
262  return 0;
263 }
264 
266 {
267  AVFilterContext *ctx = outlink->src;
268  AVFilterLink *inlink = outlink->src->inputs[0];
269  CUDAScaleContext *s = ctx->priv;
270  AVHWFramesContext *frames_ctx = (AVHWFramesContext*)inlink->hw_frames_ctx->data;
271  AVCUDADeviceContext *device_hwctx = frames_ctx->device_ctx->hwctx;
272  CUcontext dummy, cuda_ctx = device_hwctx->cuda_ctx;
273  CudaFunctions *cu = device_hwctx->internal->cuda_dl;
274  char buf[64];
275  int w, h;
276  int ret;
277 
278  char *scaler_ptx;
279  const char *function_infix = "";
280 
281  extern char vf_scale_cuda_ptx[];
282  extern char vf_scale_cuda_bicubic_ptx[];
283 
284  switch(s->interp_algo) {
285  case INTERP_ALGO_NEAREST:
286  scaler_ptx = vf_scale_cuda_ptx;
287  function_infix = "_Nearest";
288  s->interp_use_linear = 0;
289  s->interp_as_integer = 1;
290  break;
292  scaler_ptx = vf_scale_cuda_ptx;
293  function_infix = "_Bilinear";
294  s->interp_use_linear = 1;
295  s->interp_as_integer = 1;
296  break;
297  case INTERP_ALGO_DEFAULT:
298  case INTERP_ALGO_BICUBIC:
299  scaler_ptx = vf_scale_cuda_bicubic_ptx;
300  function_infix = "_Bicubic";
301  s->interp_use_linear = 0;
302  s->interp_as_integer = 0;
303  break;
304  case INTERP_ALGO_LANCZOS:
305  scaler_ptx = vf_scale_cuda_bicubic_ptx;
306  function_infix = "_Lanczos";
307  s->interp_use_linear = 0;
308  s->interp_as_integer = 0;
309  break;
310  default:
311  av_log(ctx, AV_LOG_ERROR, "Unknown interpolation algorithm\n");
312  return AVERROR_BUG;
313  }
314 
315  s->hwctx = device_hwctx;
316  s->cu_stream = s->hwctx->stream;
317 
318  ret = CHECK_CU(cu->cuCtxPushCurrent(cuda_ctx));
319  if (ret < 0)
320  goto fail;
321 
322  ret = CHECK_CU(cu->cuModuleLoadData(&s->cu_module, scaler_ptx));
323  if (ret < 0)
324  goto fail;
325 
326  snprintf(buf, sizeof(buf), "Subsample%s_uchar", function_infix);
327  CHECK_CU(cu->cuModuleGetFunction(&s->cu_func_uchar, s->cu_module, buf));
328  if (ret < 0)
329  goto fail;
330 
331  snprintf(buf, sizeof(buf), "Subsample%s_uchar2", function_infix);
332  CHECK_CU(cu->cuModuleGetFunction(&s->cu_func_uchar2, s->cu_module, buf));
333  if (ret < 0)
334  goto fail;
335 
336  snprintf(buf, sizeof(buf), "Subsample%s_uchar4", function_infix);
337  CHECK_CU(cu->cuModuleGetFunction(&s->cu_func_uchar4, s->cu_module, buf));
338  if (ret < 0)
339  goto fail;
340 
341  snprintf(buf, sizeof(buf), "Subsample%s_ushort", function_infix);
342  CHECK_CU(cu->cuModuleGetFunction(&s->cu_func_ushort, s->cu_module, buf));
343  if (ret < 0)
344  goto fail;
345 
346  snprintf(buf, sizeof(buf), "Subsample%s_ushort2", function_infix);
347  CHECK_CU(cu->cuModuleGetFunction(&s->cu_func_ushort2, s->cu_module, buf));
348  if (ret < 0)
349  goto fail;
350 
351  snprintf(buf, sizeof(buf), "Subsample%s_ushort4", function_infix);
352  CHECK_CU(cu->cuModuleGetFunction(&s->cu_func_ushort4, s->cu_module, buf));
353  if (ret < 0)
354  goto fail;
355 
356 
357  CHECK_CU(cu->cuCtxPopCurrent(&dummy));
358 
359  if ((ret = ff_scale_eval_dimensions(s,
360  s->w_expr, s->h_expr,
361  inlink, outlink,
362  &w, &h)) < 0)
363  goto fail;
364 
365  ff_scale_adjust_dimensions(inlink, &w, &h,
366  s->force_original_aspect_ratio, s->force_divisible_by);
367 
368  if (((int64_t)h * inlink->w) > INT_MAX ||
369  ((int64_t)w * inlink->h) > INT_MAX)
370  av_log(ctx, AV_LOG_ERROR, "Rescaled value for width or height is too big.\n");
371 
372  outlink->w = w;
373  outlink->h = h;
374 
375  ret = init_processing_chain(ctx, inlink->w, inlink->h, w, h);
376  if (ret < 0)
377  return ret;
378 
379  av_log(ctx, AV_LOG_VERBOSE, "w:%d h:%d -> w:%d h:%d%s\n",
380  inlink->w, inlink->h, outlink->w, outlink->h, s->passthrough ? " (passthrough)" : "");
381 
382  if (inlink->sample_aspect_ratio.num) {
383  outlink->sample_aspect_ratio = av_mul_q((AVRational){outlink->h*inlink->w,
384  outlink->w*inlink->h},
385  inlink->sample_aspect_ratio);
386  } else {
387  outlink->sample_aspect_ratio = inlink->sample_aspect_ratio;
388  }
389 
390  return 0;
391 
392 fail:
393  return ret;
394 }
395 
396 static int call_resize_kernel(AVFilterContext *ctx, CUfunction func, int channels,
397  uint8_t *src_dptr, int src_width, int src_height, int src_pitch,
398  uint8_t *dst_dptr, int dst_width, int dst_height, int dst_pitch,
399  int pixel_size, int bit_depth)
400 {
401  CUDAScaleContext *s = ctx->priv;
402  CudaFunctions *cu = s->hwctx->internal->cuda_dl;
403  CUdeviceptr dst_devptr = (CUdeviceptr)dst_dptr;
404  CUtexObject tex = 0;
405  void *args_uchar[] = { &tex, &dst_devptr, &dst_width, &dst_height, &dst_pitch,
406  &src_width, &src_height, &bit_depth, &s->param };
407  int ret;
408 
409  CUDA_TEXTURE_DESC tex_desc = {
410  .filterMode = s->interp_use_linear ?
411  CU_TR_FILTER_MODE_LINEAR :
412  CU_TR_FILTER_MODE_POINT,
413  .flags = s->interp_as_integer ? CU_TRSF_READ_AS_INTEGER : 0,
414  };
415 
416  CUDA_RESOURCE_DESC res_desc = {
417  .resType = CU_RESOURCE_TYPE_PITCH2D,
418  .res.pitch2D.format = pixel_size == 1 ?
419  CU_AD_FORMAT_UNSIGNED_INT8 :
420  CU_AD_FORMAT_UNSIGNED_INT16,
421  .res.pitch2D.numChannels = channels,
422  .res.pitch2D.width = src_width,
423  .res.pitch2D.height = src_height,
424  .res.pitch2D.pitchInBytes = src_pitch,
425  .res.pitch2D.devPtr = (CUdeviceptr)src_dptr,
426  };
427 
428  // Handling of channels is done via vector-types in cuda, so their size is implicitly part of the pitch
429  // Same for pixel_size, which is represented via datatypes on the cuda side of things.
430  dst_pitch /= channels * pixel_size;
431 
432  ret = CHECK_CU(cu->cuTexObjectCreate(&tex, &res_desc, &tex_desc, NULL));
433  if (ret < 0)
434  goto exit;
435 
436  ret = CHECK_CU(cu->cuLaunchKernel(func,
437  DIV_UP(dst_width, BLOCKX), DIV_UP(dst_height, BLOCKY), 1,
438  BLOCKX, BLOCKY, 1, 0, s->cu_stream, args_uchar, NULL));
439 
440 exit:
441  if (tex)
442  CHECK_CU(cu->cuTexObjectDestroy(tex));
443 
444  return ret;
445 }
446 
448  AVFrame *out, AVFrame *in)
449 {
450  AVHWFramesContext *in_frames_ctx = (AVHWFramesContext*)in->hw_frames_ctx->data;
451  CUDAScaleContext *s = ctx->priv;
452 
453  switch (in_frames_ctx->sw_format) {
454  case AV_PIX_FMT_YUV420P:
455  call_resize_kernel(ctx, s->cu_func_uchar, 1,
456  in->data[0], in->width, in->height, in->linesize[0],
457  out->data[0], out->width, out->height, out->linesize[0],
458  1, 8);
459  call_resize_kernel(ctx, s->cu_func_uchar, 1,
460  in->data[1], in->width / 2, in->height / 2, in->linesize[1],
461  out->data[1], out->width / 2, out->height / 2, out->linesize[1],
462  1, 8);
463  call_resize_kernel(ctx, s->cu_func_uchar, 1,
464  in->data[2], in->width / 2, in->height / 2, in->linesize[2],
465  out->data[2], out->width / 2, out->height / 2, out->linesize[2],
466  1, 8);
467  break;
468  case AV_PIX_FMT_YUV444P:
469  call_resize_kernel(ctx, s->cu_func_uchar, 1,
470  in->data[0], in->width, in->height, in->linesize[0],
471  out->data[0], out->width, out->height, out->linesize[0],
472  1, 8);
473  call_resize_kernel(ctx, s->cu_func_uchar, 1,
474  in->data[1], in->width, in->height, in->linesize[1],
475  out->data[1], out->width, out->height, out->linesize[1],
476  1, 8);
477  call_resize_kernel(ctx, s->cu_func_uchar, 1,
478  in->data[2], in->width, in->height, in->linesize[2],
479  out->data[2], out->width, out->height, out->linesize[2],
480  1, 8);
481  break;
483  call_resize_kernel(ctx, s->cu_func_ushort, 1,
484  in->data[0], in->width, in->height, in->linesize[0],
485  out->data[0], out->width, out->height, out->linesize[0],
486  2, 16);
487  call_resize_kernel(ctx, s->cu_func_ushort, 1,
488  in->data[1], in->width, in->height, in->linesize[1],
489  out->data[1], out->width, out->height, out->linesize[1],
490  2, 16);
491  call_resize_kernel(ctx, s->cu_func_ushort, 1,
492  in->data[2], in->width, in->height, in->linesize[2],
493  out->data[2], out->width, out->height, out->linesize[2],
494  2, 16);
495  break;
496  case AV_PIX_FMT_NV12:
497  call_resize_kernel(ctx, s->cu_func_uchar, 1,
498  in->data[0], in->width, in->height, in->linesize[0],
499  out->data[0], out->width, out->height, out->linesize[0],
500  1, 8);
501  call_resize_kernel(ctx, s->cu_func_uchar2, 2,
502  in->data[1], in->width / 2, in->height / 2, in->linesize[1],
503  out->data[1], out->width / 2, out->height / 2, out->linesize[1],
504  1, 8);
505  break;
506  case AV_PIX_FMT_P010LE:
507  call_resize_kernel(ctx, s->cu_func_ushort, 1,
508  in->data[0], in->width, in->height, in->linesize[0],
509  out->data[0], out->width, out->height, out->linesize[0],
510  2, 10);
511  call_resize_kernel(ctx, s->cu_func_ushort2, 2,
512  in->data[1], in->width / 2, in->height / 2, in->linesize[1],
513  out->data[1], out->width / 2, out->height / 2, out->linesize[1],
514  2, 10);
515  break;
516  case AV_PIX_FMT_P016LE:
517  call_resize_kernel(ctx, s->cu_func_ushort, 1,
518  in->data[0], in->width, in->height, in->linesize[0],
519  out->data[0], out->width, out->height, out->linesize[0],
520  2, 16);
521  call_resize_kernel(ctx, s->cu_func_ushort2, 2,
522  in->data[1], in->width / 2, in->height / 2, in->linesize[1],
523  out->data[1], out->width / 2, out->height / 2, out->linesize[1],
524  2, 16);
525  break;
526  case AV_PIX_FMT_0RGB32:
527  case AV_PIX_FMT_0BGR32:
528  call_resize_kernel(ctx, s->cu_func_uchar4, 4,
529  in->data[0], in->width, in->height, in->linesize[0],
530  out->data[0], out->width, out->height, out->linesize[0],
531  1, 8);
532  break;
533  default:
534  return AVERROR_BUG;
535  }
536 
537  return 0;
538 }
539 
541 {
542  CUDAScaleContext *s = ctx->priv;
543  AVFilterLink *outlink = ctx->outputs[0];
544  AVFrame *src = in;
545  int ret;
546 
547  ret = scalecuda_resize(ctx, s->frame, src);
548  if (ret < 0)
549  return ret;
550 
551  src = s->frame;
552  ret = av_hwframe_get_buffer(src->hw_frames_ctx, s->tmp_frame, 0);
553  if (ret < 0)
554  return ret;
555 
556  av_frame_move_ref(out, s->frame);
557  av_frame_move_ref(s->frame, s->tmp_frame);
558 
559  s->frame->width = outlink->w;
560  s->frame->height = outlink->h;
561 
562  ret = av_frame_copy_props(out, in);
563  if (ret < 0)
564  return ret;
565 
566  return 0;
567 }
568 
570 {
571  AVFilterContext *ctx = link->dst;
572  CUDAScaleContext *s = ctx->priv;
573  AVFilterLink *outlink = ctx->outputs[0];
574  CudaFunctions *cu = s->hwctx->internal->cuda_dl;
575 
576  AVFrame *out = NULL;
577  CUcontext dummy;
578  int ret = 0;
579 
580  if (s->passthrough)
581  return ff_filter_frame(outlink, in);
582 
583  out = av_frame_alloc();
584  if (!out) {
585  ret = AVERROR(ENOMEM);
586  goto fail;
587  }
588 
589  ret = CHECK_CU(cu->cuCtxPushCurrent(s->hwctx->cuda_ctx));
590  if (ret < 0)
591  goto fail;
592 
593  ret = cudascale_scale(ctx, out, in);
594 
595  CHECK_CU(cu->cuCtxPopCurrent(&dummy));
596  if (ret < 0)
597  goto fail;
598 
599  av_reduce(&out->sample_aspect_ratio.num, &out->sample_aspect_ratio.den,
600  (int64_t)in->sample_aspect_ratio.num * outlink->h * link->w,
601  (int64_t)in->sample_aspect_ratio.den * outlink->w * link->h,
602  INT_MAX);
603 
604  av_frame_free(&in);
605  return ff_filter_frame(outlink, out);
606 fail:
607  av_frame_free(&in);
608  av_frame_free(&out);
609  return ret;
610 }
611 
613 {
614  CUDAScaleContext *s = inlink->dst->priv;
615 
616  return s->passthrough ?
617  ff_null_get_video_buffer (inlink, w, h) :
618  ff_default_get_video_buffer(inlink, w, h);
619 }
620 
621 #define OFFSET(x) offsetof(CUDAScaleContext, x)
622 #define FLAGS (AV_OPT_FLAG_FILTERING_PARAM|AV_OPT_FLAG_VIDEO_PARAM)
623 static const AVOption options[] = {
624  { "w", "Output video width", OFFSET(w_expr), AV_OPT_TYPE_STRING, { .str = "iw" }, .flags = FLAGS },
625  { "h", "Output video height", OFFSET(h_expr), AV_OPT_TYPE_STRING, { .str = "ih" }, .flags = FLAGS },
626  { "interp_algo", "Interpolation algorithm used for resizing", OFFSET(interp_algo), AV_OPT_TYPE_INT, { .i64 = INTERP_ALGO_DEFAULT }, 0, INTERP_ALGO_COUNT - 1, FLAGS, "interp_algo" },
627  { "nearest", "nearest neighbour", 0, AV_OPT_TYPE_CONST, { .i64 = INTERP_ALGO_NEAREST }, 0, 0, FLAGS, "interp_algo" },
628  { "bilinear", "bilinear", 0, AV_OPT_TYPE_CONST, { .i64 = INTERP_ALGO_BILINEAR }, 0, 0, FLAGS, "interp_algo" },
629  { "bicubic", "bicubic", 0, AV_OPT_TYPE_CONST, { .i64 = INTERP_ALGO_BICUBIC }, 0, 0, FLAGS, "interp_algo" },
630  { "lanczos", "lanczos", 0, AV_OPT_TYPE_CONST, { .i64 = INTERP_ALGO_LANCZOS }, 0, 0, FLAGS, "interp_algo" },
631  { "passthrough", "Do not process frames at all if parameters match", OFFSET(passthrough), AV_OPT_TYPE_BOOL, { .i64 = 1 }, 0, 1, FLAGS },
632  { "param", "Algorithm-Specific parameter", OFFSET(param), AV_OPT_TYPE_FLOAT, { .dbl = SCALE_CUDA_PARAM_DEFAULT }, -FLT_MAX, FLT_MAX, FLAGS },
633  { "force_original_aspect_ratio", "decrease or increase w/h if necessary to keep the original AR", OFFSET(force_original_aspect_ratio), AV_OPT_TYPE_INT, { .i64 = 0 }, 0, 2, FLAGS, "force_oar" },
634  { "disable", NULL, 0, AV_OPT_TYPE_CONST, {.i64 = 0 }, 0, 0, FLAGS, "force_oar" },
635  { "decrease", NULL, 0, AV_OPT_TYPE_CONST, {.i64 = 1 }, 0, 0, FLAGS, "force_oar" },
636  { "increase", NULL, 0, AV_OPT_TYPE_CONST, {.i64 = 2 }, 0, 0, FLAGS, "force_oar" },
637  { "force_divisible_by", "enforce that the output resolution is divisible by a defined integer when force_original_aspect_ratio is used", OFFSET(force_divisible_by), AV_OPT_TYPE_INT, { .i64 = 1 }, 1, 256, FLAGS },
638  { NULL },
639 };
640 
641 static const AVClass cudascale_class = {
642  .class_name = "cudascale",
643  .item_name = av_default_item_name,
644  .option = options,
645  .version = LIBAVUTIL_VERSION_INT,
646 };
647 
648 static const AVFilterPad cudascale_inputs[] = {
649  {
650  .name = "default",
651  .type = AVMEDIA_TYPE_VIDEO,
652  .filter_frame = cudascale_filter_frame,
653  .get_video_buffer = cudascale_get_video_buffer,
654  },
655  { NULL }
656 };
657 
658 static const AVFilterPad cudascale_outputs[] = {
659  {
660  .name = "default",
661  .type = AVMEDIA_TYPE_VIDEO,
662  .config_props = cudascale_config_props,
663  },
664  { NULL }
665 };
666 
668  .name = "scale_cuda",
669  .description = NULL_IF_CONFIG_SMALL("GPU accelerated video resizer"),
670 
671  .init = cudascale_init,
672  .uninit = cudascale_uninit,
673  .query_formats = cudascale_query_formats,
674 
675  .priv_size = sizeof(CUDAScaleContext),
676  .priv_class = &cudascale_class,
677 
680 
681  .flags_internal = FF_FILTER_FLAG_HWFRAME_AWARE,
682 };
static const AVFilterPad inputs[]
Definition: af_acontrast.c:193
static const AVFilterPad outputs[]
Definition: af_acontrast.c:203
static void bit_depth(AudioStatsContext *s, uint64_t mask, uint64_t imask, AVRational *depth)
Definition: af_astats.c:254
channels
Definition: aptx.h:33
#define av_cold
Definition: attributes.h:88
uint8_t pi<< 24) CONV_FUNC_GROUP(AV_SAMPLE_FMT_FLT, float, AV_SAMPLE_FMT_U8, uint8_t,(*(const uint8_t *) pi - 0x80) *(1.0f/(1<< 7))) CONV_FUNC_GROUP(AV_SAMPLE_FMT_DBL, double, AV_SAMPLE_FMT_U8, uint8_t,(*(const uint8_t *) pi - 0x80) *(1.0/(1<< 7))) CONV_FUNC_GROUP(AV_SAMPLE_FMT_U8, uint8_t, AV_SAMPLE_FMT_S16, int16_t,(*(const int16_t *) pi >> 8)+0x80) CONV_FUNC_GROUP(AV_SAMPLE_FMT_FLT, float, AV_SAMPLE_FMT_S16, int16_t, *(const int16_t *) pi *(1.0f/(1<< 15))) CONV_FUNC_GROUP(AV_SAMPLE_FMT_DBL, double, AV_SAMPLE_FMT_S16, int16_t, *(const int16_t *) pi *(1.0/(1<< 15))) CONV_FUNC_GROUP(AV_SAMPLE_FMT_U8, uint8_t, AV_SAMPLE_FMT_S32, int32_t,(*(const int32_t *) pi >> 24)+0x80) CONV_FUNC_GROUP(AV_SAMPLE_FMT_FLT, float, AV_SAMPLE_FMT_S32, int32_t, *(const int32_t *) pi *(1.0f/(1U<< 31))) CONV_FUNC_GROUP(AV_SAMPLE_FMT_DBL, double, AV_SAMPLE_FMT_S32, int32_t, *(const int32_t *) pi *(1.0/(1U<< 31))) CONV_FUNC_GROUP(AV_SAMPLE_FMT_U8, uint8_t, AV_SAMPLE_FMT_FLT, float, av_clip_uint8(lrintf(*(const float *) pi *(1<< 7))+0x80)) CONV_FUNC_GROUP(AV_SAMPLE_FMT_S16, int16_t, AV_SAMPLE_FMT_FLT, float, av_clip_int16(lrintf(*(const float *) pi *(1<< 15)))) CONV_FUNC_GROUP(AV_SAMPLE_FMT_S32, int32_t, AV_SAMPLE_FMT_FLT, float, av_clipl_int32(llrintf(*(const float *) pi *(1U<< 31)))) CONV_FUNC_GROUP(AV_SAMPLE_FMT_U8, uint8_t, AV_SAMPLE_FMT_DBL, double, av_clip_uint8(lrint(*(const double *) pi *(1<< 7))+0x80)) CONV_FUNC_GROUP(AV_SAMPLE_FMT_S16, int16_t, AV_SAMPLE_FMT_DBL, double, av_clip_int16(lrint(*(const double *) pi *(1<< 15)))) CONV_FUNC_GROUP(AV_SAMPLE_FMT_S32, int32_t, AV_SAMPLE_FMT_DBL, double, av_clipl_int32(llrint(*(const double *) pi *(1U<< 31)))) #define SET_CONV_FUNC_GROUP(ofmt, ifmt) static void set_generic_function(AudioConvert *ac) { } void ff_audio_convert_free(AudioConvert **ac) { if(! *ac) return;ff_dither_free(&(*ac) ->dc);av_freep(ac);} AudioConvert *ff_audio_convert_alloc(AVAudioResampleContext *avr, enum AVSampleFormat out_fmt, enum AVSampleFormat in_fmt, int channels, int sample_rate, int apply_map) { AudioConvert *ac;int in_planar, out_planar;ac=av_mallocz(sizeof(*ac));if(!ac) return NULL;ac->avr=avr;ac->out_fmt=out_fmt;ac->in_fmt=in_fmt;ac->channels=channels;ac->apply_map=apply_map;if(avr->dither_method !=AV_RESAMPLE_DITHER_NONE &&av_get_packed_sample_fmt(out_fmt)==AV_SAMPLE_FMT_S16 &&av_get_bytes_per_sample(in_fmt) > 2) { ac->dc=ff_dither_alloc(avr, out_fmt, in_fmt, channels, sample_rate, apply_map);if(!ac->dc) { av_free(ac);return NULL;} return ac;} in_planar=ff_sample_fmt_is_planar(in_fmt, channels);out_planar=ff_sample_fmt_is_planar(out_fmt, channels);if(in_planar==out_planar) { ac->func_type=CONV_FUNC_TYPE_FLAT;ac->planes=in_planar ? ac->channels :1;} else if(in_planar) ac->func_type=CONV_FUNC_TYPE_INTERLEAVE;else ac->func_type=CONV_FUNC_TYPE_DEINTERLEAVE;set_generic_function(ac);if(ARCH_AARCH64) ff_audio_convert_init_aarch64(ac);if(ARCH_ARM) ff_audio_convert_init_arm(ac);if(ARCH_X86) ff_audio_convert_init_x86(ac);return ac;} int ff_audio_convert(AudioConvert *ac, AudioData *out, AudioData *in) { int use_generic=1;int len=in->nb_samples;int p;if(ac->dc) { av_log(ac->avr, AV_LOG_TRACE, "%d samples - audio_convert: %s to %s (dithered)\n", len, av_get_sample_fmt_name(ac->in_fmt), av_get_sample_fmt_name(ac->out_fmt));return ff_convert_dither(ac-> in
uint8_t
int ff_filter_frame(AVFilterLink *link, AVFrame *frame)
Send a frame of data to the next filter.
Definition: avfilter.c:1096
Main libavfilter public API header.
#define s(width, name)
Definition: cbs_vp9.c:257
#define fail()
Definition: checkasm.h:133
common internal and external API header
#define NULL
Definition: coverity.c:32
long long int64_t
Definition: coverity.c:34
int ff_set_common_formats(AVFilterContext *ctx, AVFilterFormats *formats)
A helper for query_formats() which sets all links to the same list of formats.
Definition: formats.c:587
AVFilterFormats * ff_make_format_list(const int *fmts)
Create a list of supported formats.
Definition: formats.c:286
@ AV_OPT_TYPE_CONST
Definition: opt.h:234
@ AV_OPT_TYPE_INT
Definition: opt.h:225
@ AV_OPT_TYPE_FLOAT
Definition: opt.h:228
@ AV_OPT_TYPE_BOOL
Definition: opt.h:242
@ AV_OPT_TYPE_STRING
Definition: opt.h:229
void av_buffer_unref(AVBufferRef **buf)
Free a given reference and automatically free the buffer if there are no more references to it.
Definition: buffer.c:125
AVBufferRef * av_buffer_ref(AVBufferRef *buf)
Create a new reference to an AVBuffer.
Definition: buffer.c:93
#define AVERROR_BUG
Internal bug, also see AVERROR_BUG2.
Definition: error.h:50
#define AVERROR(e)
Definition: error.h:43
void av_frame_unref(AVFrame *frame)
Unreference all the buffers referenced by frame and reset the frame fields.
Definition: frame.c:553
void av_frame_move_ref(AVFrame *dst, AVFrame *src)
Move everything contained in src to dst and reset src.
Definition: frame.c:582
void av_frame_free(AVFrame **frame)
Free the frame and any dynamically allocated objects in it, e.g.
Definition: frame.c:203
int av_frame_copy_props(AVFrame *dst, const AVFrame *src)
Copy only "metadata" fields from src to dst.
Definition: frame.c:658
AVFrame * av_frame_alloc(void)
Allocate an AVFrame and set its fields to default values.
Definition: frame.c:190
#define AV_LOG_VERBOSE
Detailed information.
Definition: log.h:210
#define AV_LOG_ERROR
Something went wrong and cannot losslessly be recovered.
Definition: log.h:194
const char * av_default_item_name(void *ptr)
Return the context name.
Definition: log.c:235
AVRational av_mul_q(AVRational b, AVRational c)
Multiply two rationals.
Definition: rational.c:80
int av_reduce(int *dst_num, int *dst_den, int64_t num, int64_t den, int64_t max)
Reduce a fraction.
Definition: rational.c:35
@ AVMEDIA_TYPE_VIDEO
Definition: avutil.h:201
#define LIBAVUTIL_VERSION_INT
Definition: version.h:85
int av_hwframe_ctx_init(AVBufferRef *ref)
Finalize the context before use.
Definition: hwcontext.c:333
AVBufferRef * av_hwframe_ctx_alloc(AVBufferRef *device_ref_in)
Allocate an AVHWFramesContext tied to a given device context.
Definition: hwcontext.c:247
int av_hwframe_get_buffer(AVBufferRef *hwframe_ref, AVFrame *frame, int flags)
Allocate a new frame attached to the given AVHWFramesContext.
Definition: hwcontext.c:502
FFmpeg internal API for CUDA.
int i
Definition: input.c:407
int(* func)(AVBPrint *dst, const char *in, const char *arg)
Definition: jacosubdec.c:67
#define FF_FILTER_FLAG_HWFRAME_AWARE
The filter is aware of hardware frames, and any hardware frame context should not be automatically pr...
Definition: internal.h:339
common internal API header
#define NULL_IF_CONFIG_SMALL(x)
Return NULL if CONFIG_SMALL is true, otherwise the argument without modification.
Definition: internal.h:117
static enum AVPixelFormat pix_fmts[]
Definition: libkvazaar.c:309
uint8_t w
Definition: llviddspenc.c:39
#define FFALIGN(x, a)
Definition: macros.h:48
int dummy
Definition: motion.c:64
AVOptions.
const char * av_get_pix_fmt_name(enum AVPixelFormat pix_fmt)
Return the short name for a pixel format, NULL in case pix_fmt is unknown.
Definition: pixdesc.c:2489
#define AV_PIX_FMT_0RGB32
Definition: pixfmt.h:376
#define AV_PIX_FMT_P010
Definition: pixfmt.h:448
#define AV_PIX_FMT_P016
Definition: pixfmt.h:449
AVPixelFormat
Pixel format.
Definition: pixfmt.h:64
@ AV_PIX_FMT_NV12
planar YUV 4:2:0, 12bpp, 1 plane for Y and 1 plane for the UV components, which are interleaved (firs...
Definition: pixfmt.h:89
@ AV_PIX_FMT_NONE
Definition: pixfmt.h:65
@ AV_PIX_FMT_P010LE
like NV12, with 10bpp per component, data in the high bits, zeros in the low bits,...
Definition: pixfmt.h:284
@ AV_PIX_FMT_YUV420P
planar YUV 4:2:0, 12bpp, (1 Cr & Cb sample per 2x2 Y samples)
Definition: pixfmt.h:66
@ AV_PIX_FMT_P016LE
like NV12, with 16bpp per component, little-endian
Definition: pixfmt.h:300
@ AV_PIX_FMT_CUDA
HW acceleration through CUDA.
Definition: pixfmt.h:235
@ AV_PIX_FMT_YUV444P
planar YUV 4:4:4, 24bpp, (1 Cr & Cb sample per 1x1 Y samples)
Definition: pixfmt.h:71
#define AV_PIX_FMT_YUV444P16
Definition: pixfmt.h:412
#define AV_PIX_FMT_0BGR32
Definition: pixfmt.h:377
int ff_scale_adjust_dimensions(AVFilterLink *inlink, int *ret_w, int *ret_h, int force_original_aspect_ratio, int force_divisible_by)
Transform evaluated width and height obtained from ff_scale_eval_dimensions into actual target width ...
Definition: scale_eval.c:113
int ff_scale_eval_dimensions(void *log_ctx, const char *w_expr, const char *h_expr, AVFilterLink *inlink, AVFilterLink *outlink, int *ret_w, int *ret_h)
Parse and evaluate string expressions for width and height.
Definition: scale_eval.c:57
#define FF_ARRAY_ELEMS(a)
#define snprintf
Definition: snprintf.h:34
A reference to a data buffer.
Definition: buffer.h:84
uint8_t * data
The data buffer.
Definition: buffer.h:92
This struct is allocated as AVHWDeviceContext.hwctx.
Describe the class of an AVClass context structure.
Definition: log.h:67
const char * class_name
The name of the class; usually it is the same name as the context structure type to which the AVClass...
Definition: log.h:72
An instance of a filter.
Definition: avfilter.h:341
AVFilterLink ** inputs
array of pointers to input links
Definition: avfilter.h:349
void * priv
private data for use by the filter
Definition: avfilter.h:356
A list of supported formats for one end of a filter link.
Definition: formats.h:65
A filter pad used for either input or output.
Definition: internal.h:54
const char * name
Pad name.
Definition: internal.h:60
Filter definition.
Definition: avfilter.h:145
const char * name
Filter name.
Definition: avfilter.h:149
This structure describes decoded (raw) audio or video data.
Definition: frame.h:318
void * hwctx
The format-specific data, allocated and freed by libavutil along with this context.
Definition: hwcontext.h:92
This struct describes a set or pool of "hardware" frames (i.e.
Definition: hwcontext.h:124
enum AVPixelFormat format
The pixel format identifying the underlying HW surface type.
Definition: hwcontext.h:209
AVBufferRef * device_ref
A reference to the parent AVHWDeviceContext.
Definition: hwcontext.h:141
enum AVPixelFormat sw_format
The pixel format identifying the actual data layout of the hardware frames.
Definition: hwcontext.h:222
int width
The allocated dimensions of the frames in this pool.
Definition: hwcontext.h:229
AVHWDeviceContext * device_ctx
The parent AVHWDeviceContext.
Definition: hwcontext.h:149
AVOption.
Definition: opt.h:248
Rational number (pair of numerator and denominator).
Definition: rational.h:58
int num
Numerator.
Definition: rational.h:59
CUfunction cu_func_uchar2
CUdeviceptr srcBuffer
CUfunction cu_func_uchar4
enum AVPixelFormat in_fmt
Definition: vf_scale_cuda.c:77
enum AVPixelFormat format
Output sw format.
Definition: vf_scale_cuda.c:89
int force_original_aspect_ratio
Definition: vf_scale_cuda.c:94
CUfunction cu_func_ushort2
AVFrame * tmp_frame
Definition: vf_scale_cuda.c:83
CUdeviceptr dstBuffer
AVCUDADeviceContext * hwctx
Definition: vf_scale_cuda.c:75
CUfunction cu_func_uchar
Definition: vf_scale_cuda.c:99
char * w_expr
width expression string
Definition: vf_scale_cuda.c:91
CUmodule cu_module
Definition: vf_scale_cuda.c:98
CUfunction cu_func_ushort
CUcontext cu_ctx
Definition: vf_scale_cuda.c:97
CUfunction cu_func_ushort4
char * h_expr
height expression string
Definition: vf_scale_cuda.c:92
enum AVPixelFormat out_fmt
Definition: vf_scale_cuda.c:78
AVBufferRef * frames_ctx
Definition: vf_scale_cuda.c:80
#define av_log(a,...)
#define src
Definition: vp8dsp.c:255
FILE * out
Definition: movenc.c:54
AVFormatContext * ctx
Definition: movenc.c:48
#define height
#define width
static enum AVPixelFormat supported_formats[]
Definition: vf_scale_cuda.c:44
static av_cold int init_hwframe_ctx(CUDAScaleContext *s, AVBufferRef *device_ctx, int width, int height)
static const AVOption options[]
static av_cold void cudascale_uninit(AVFilterContext *ctx)
static int cudascale_query_formats(AVFilterContext *ctx)
static int cudascale_scale(AVFilterContext *ctx, AVFrame *out, AVFrame *in)
#define DIV_UP(a, b)
Definition: vf_scale_cuda.c:55
static av_cold int cudascale_config_props(AVFilterLink *outlink)
static AVFrame * cudascale_get_video_buffer(AVFilterLink *inlink, int w, int h)
static av_cold int init_processing_chain(AVFilterContext *ctx, int in_width, int in_height, int out_width, int out_height)
#define FLAGS
static av_cold int cudascale_init(AVFilterContext *ctx)
static const AVClass cudascale_class
@ INTERP_ALGO_LANCZOS
Definition: vf_scale_cuda.c:67
@ INTERP_ALGO_BICUBIC
Definition: vf_scale_cuda.c:66
@ INTERP_ALGO_DEFAULT
Definition: vf_scale_cuda.c:62
@ INTERP_ALGO_BILINEAR
Definition: vf_scale_cuda.c:65
@ INTERP_ALGO_COUNT
Definition: vf_scale_cuda.c:69
@ INTERP_ALGO_NEAREST
Definition: vf_scale_cuda.c:64
#define BLOCKX
Definition: vf_scale_cuda.c:56
static int scalecuda_resize(AVFilterContext *ctx, AVFrame *out, AVFrame *in)
static int format_is_supported(enum AVPixelFormat fmt)
#define CHECK_CU(x)
Definition: vf_scale_cuda.c:59
#define BLOCKY
Definition: vf_scale_cuda.c:57
#define OFFSET(x)
static int cudascale_filter_frame(AVFilterLink *link, AVFrame *in)
AVFilter ff_vf_scale_cuda
static const AVFilterPad cudascale_outputs[]
static const AVFilterPad cudascale_inputs[]
static int call_resize_kernel(AVFilterContext *ctx, CUfunction func, int channels, uint8_t *src_dptr, int src_width, int src_height, int src_pitch, uint8_t *dst_dptr, int dst_width, int dst_height, int dst_pitch, int pixel_size, int bit_depth)
#define SCALE_CUDA_PARAM_DEFAULT
Definition: vf_scale_cuda.h:26
const char vf_scale_cuda_ptx[]
const char vf_scale_cuda_bicubic_ptx[]
AVFrame * ff_null_get_video_buffer(AVFilterLink *link, int w, int h)
Definition: video.c:39
AVFrame * ff_default_get_video_buffer(AVFilterLink *link, int w, int h)
Definition: video.c:99