FFmpeg  4.4.5
h264pred_mmi.c
Go to the documentation of this file.
1 /*
2  * Loongson SIMD optimized h264pred
3  *
4  * Copyright (c) 2015 Loongson Technology Corporation Limited
5  * Copyright (c) 2015 Zhou Xiaoyong <zhouxiaoyong@loongson.cn>
6  * Zhang Shuangshuang <zhangshuangshuang@ict.ac.cn>
7  *
8  * This file is part of FFmpeg.
9  *
10  * FFmpeg is free software; you can redistribute it and/or
11  * modify it under the terms of the GNU Lesser General Public
12  * License as published by the Free Software Foundation; either
13  * version 2.1 of the License, or (at your option) any later version.
14  *
15  * FFmpeg is distributed in the hope that it will be useful,
16  * but WITHOUT ANY WARRANTY; without even the implied warranty of
17  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
18  * Lesser General Public License for more details.
19  *
20  * You should have received a copy of the GNU Lesser General Public
21  * License along with FFmpeg; if not, write to the Free Software
22  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
23  */
24 
25 #include "h264pred_mips.h"
28 #include "constants.h"
29 
31 {
32  double ftmp[2];
33  uint64_t tmp[1];
34  DECLARE_VAR_ALL64;
35 
36  __asm__ volatile (
37  "dli %[tmp0], 0x08 \n\t"
38  MMI_LDC1(%[ftmp0], %[srcA], 0x00)
39  MMI_LDC1(%[ftmp1], %[srcA], 0x08)
40 
41  "1: \n\t"
42  MMI_SDC1(%[ftmp0], %[src], 0x00)
43  MMI_SDC1(%[ftmp1], %[src], 0x08)
44  PTR_ADDU "%[src], %[src], %[stride] \n\t"
45  MMI_SDC1(%[ftmp0], %[src], 0x00)
46  MMI_SDC1(%[ftmp1], %[src], 0x08)
47 
48  "daddi %[tmp0], %[tmp0], -0x01 \n\t"
49  PTR_ADDU "%[src], %[src], %[stride] \n\t"
50  "bnez %[tmp0], 1b \n\t"
51  : [ftmp0]"=&f"(ftmp[0]), [ftmp1]"=&f"(ftmp[1]),
52  [tmp0]"=&r"(tmp[0]),
53  RESTRICT_ASM_ALL64
54  [src]"+&r"(src)
55  : [stride]"r"((mips_reg)stride), [srcA]"r"((mips_reg)(src-stride))
56  : "memory"
57  );
58 }
59 
61 {
62  uint64_t tmp[3];
63  mips_reg addr[2];
64 
65  __asm__ volatile (
66  PTR_ADDI "%[addr0], %[src], -0x01 \n\t"
67  PTR_ADDU "%[addr1], %[src], $0 \n\t"
68  "dli %[tmp2], 0x08 \n\t"
69  "1: \n\t"
70  "lbu %[tmp0], 0x00(%[addr0]) \n\t"
71  "dmul %[tmp1], %[tmp0], %[ff_pb_1] \n\t"
72  "swl %[tmp1], 0x07(%[addr1]) \n\t"
73  "swr %[tmp1], 0x00(%[addr1]) \n\t"
74  "swl %[tmp1], 0x0f(%[addr1]) \n\t"
75  "swr %[tmp1], 0x08(%[addr1]) \n\t"
76  PTR_ADDU "%[addr0], %[addr0], %[stride] \n\t"
77  PTR_ADDU "%[addr1], %[addr1], %[stride] \n\t"
78  "lbu %[tmp0], 0x00(%[addr0]) \n\t"
79  "dmul %[tmp1], %[tmp0], %[ff_pb_1] \n\t"
80  "swl %[tmp1], 0x07(%[addr1]) \n\t"
81  "swr %[tmp1], 0x00(%[addr1]) \n\t"
82  "swl %[tmp1], 0x0f(%[addr1]) \n\t"
83  "swr %[tmp1], 0x08(%[addr1]) \n\t"
84  "daddi %[tmp2], %[tmp2], -0x01 \n\t"
85  PTR_ADDU "%[addr0], %[addr0], %[stride] \n\t"
86  PTR_ADDU "%[addr1], %[addr1], %[stride] \n\t"
87  "bnez %[tmp2], 1b \n\t"
88  : [tmp0]"=&r"(tmp[0]), [tmp1]"=&r"(tmp[1]),
89  [tmp2]"=&r"(tmp[2]),
90  [addr0]"=&r"(addr[0]), [addr1]"=&r"(addr[1])
91  : [src]"r"((mips_reg)src), [stride]"r"((mips_reg)stride),
92  [ff_pb_1]"r"(ff_pb_1)
93  : "memory"
94  );
95 }
96 
98 {
99  uint64_t tmp[4];
100  mips_reg addr[2];
101 
102  __asm__ volatile (
103  PTR_ADDI "%[addr0], %[src], -0x01 \n\t"
104  "dli %[tmp0], 0x08 \n\t"
105  "xor %[tmp3], %[tmp3], %[tmp3] \n\t"
106  "1: \n\t"
107  "lbu %[tmp1], 0x00(%[addr0]) \n\t"
108  "daddu %[tmp3], %[tmp3], %[tmp1] \n\t"
109  PTR_ADDU "%[addr0], %[addr0], %[stride] \n\t"
110  "lbu %[tmp1], 0x00(%[addr0]) \n\t"
111  "daddi %[tmp0], %[tmp0], -0x01 \n\t"
112  "daddu %[tmp3], %[tmp3], %[tmp1] \n\t"
113  PTR_ADDU "%[addr0], %[addr0], %[stride] \n\t"
114  "bnez %[tmp0], 1b \n\t"
115 
116  "dli %[tmp0], 0x08 \n\t"
117  PTR_SUBU "%[addr0], %[src], %[stride] \n\t"
118  "2: \n\t"
119  "lbu %[tmp1], 0x00(%[addr0]) \n\t"
120  "daddu %[tmp3], %[tmp3], %[tmp1] \n\t"
121  PTR_ADDIU "%[addr0], %[addr0], 0x01 \n\t"
122  "lbu %[tmp1], 0x00(%[addr0]) \n\t"
123  "daddi %[tmp0], %[tmp0], -0x01 \n\t"
124  "daddu %[tmp3], %[tmp3], %[tmp1] \n\t"
125  PTR_ADDIU "%[addr0], %[addr0], 0x01 \n\t"
126  "bnez %[tmp0], 2b \n\t"
127 
128  "daddiu %[tmp3], %[tmp3], 0x10 \n\t"
129  "dsra %[tmp3], 0x05 \n\t"
130  "dmul %[tmp2], %[tmp3], %[ff_pb_1] \n\t"
131  PTR_ADDU "%[addr0], %[src], $0 \n\t"
132  "dli %[tmp0], 0x08 \n\t"
133  "3: \n\t"
134  "swl %[tmp2], 0x07(%[addr0]) \n\t"
135  "swr %[tmp2], 0x00(%[addr0]) \n\t"
136  "swl %[tmp2], 0x0f(%[addr0]) \n\t"
137  "swr %[tmp2], 0x08(%[addr0]) \n\t"
138  PTR_ADDU "%[addr0], %[addr0], %[stride] \n\t"
139  "swl %[tmp2], 0x07(%[addr0]) \n\t"
140  "swr %[tmp2], 0x00(%[addr0]) \n\t"
141  "swl %[tmp2], 0x0f(%[addr0]) \n\t"
142  "swr %[tmp2], 0x08(%[addr0]) \n\t"
143  "daddi %[tmp0], %[tmp0], -0x01 \n\t"
144  PTR_ADDU "%[addr0], %[addr0], %[stride] \n\t"
145  "bnez %[tmp0], 3b \n\t"
146  : [tmp0]"=&r"(tmp[0]), [tmp1]"=&r"(tmp[1]),
147  [tmp2]"=&r"(tmp[2]), [tmp3]"=&r"(tmp[3]),
148  [addr0]"=&r"(addr[0]), [addr1]"=&r"(addr[1])
149  : [src]"r"((mips_reg)src), [stride]"r"((mips_reg)stride),
150  [ff_pb_1]"r"(ff_pb_1)
151  : "memory"
152  );
153 }
154 
155 void ff_pred8x8l_top_dc_8_mmi(uint8_t *src, int has_topleft,
156  int has_topright, ptrdiff_t stride)
157 {
158  uint32_t dc;
159  double ftmp[11];
160  mips_reg tmp[3];
161  DECLARE_VAR_ALL64;
162  DECLARE_VAR_ADDRT;
163 
164  __asm__ volatile (
165  "xor %[ftmp0], %[ftmp0], %[ftmp0] \n\t"
166  MMI_ULDC1(%[ftmp10], %[srcA], 0x00)
167  MMI_ULDC1(%[ftmp9], %[src0], 0x00)
168  MMI_ULDC1(%[ftmp8], %[src1], 0x00)
169 
170  "punpcklbh %[ftmp7], %[ftmp10], %[ftmp0] \n\t"
171  "punpckhbh %[ftmp6], %[ftmp10], %[ftmp0] \n\t"
172  "punpcklbh %[ftmp5], %[ftmp9], %[ftmp0] \n\t"
173  "punpckhbh %[ftmp4], %[ftmp9], %[ftmp0] \n\t"
174  "punpcklbh %[ftmp3], %[ftmp8], %[ftmp0] \n\t"
175  "punpckhbh %[ftmp2], %[ftmp8], %[ftmp0] \n\t"
176  "bnez %[has_topleft], 1f \n\t"
177  "pinsrh_0 %[ftmp7], %[ftmp7], %[ftmp5] \n\t"
178 
179  "1: \n\t"
180  "bnez %[has_topright], 2f \n\t"
181  "dli %[tmp0], 0xa4 \n\t"
182  "mtc1 %[tmp0], %[ftmp1] \n\t"
183  "pshufh %[ftmp2], %[ftmp2], %[ftmp1] \n\t"
184 
185  "2: \n\t"
186  "dli %[tmp0], 0x02 \n\t"
187  "mtc1 %[tmp0], %[ftmp1] \n\t"
188  "pmullh %[ftmp5], %[ftmp5], %[ff_pw_2] \n\t"
189  "pmullh %[ftmp4], %[ftmp4], %[ff_pw_2] \n\t"
190  "paddh %[ftmp7], %[ftmp7], %[ftmp5] \n\t"
191  "paddh %[ftmp6], %[ftmp6], %[ftmp4] \n\t"
192  "paddh %[ftmp7], %[ftmp7], %[ftmp3] \n\t"
193  "paddh %[ftmp6], %[ftmp6], %[ftmp2] \n\t"
194  "paddh %[ftmp7], %[ftmp7], %[ff_pw_2] \n\t"
195  "paddh %[ftmp6], %[ftmp6], %[ff_pw_2] \n\t"
196  "psrah %[ftmp7], %[ftmp7], %[ftmp1] \n\t"
197  "psrah %[ftmp6], %[ftmp6], %[ftmp1] \n\t"
198  "packushb %[ftmp9], %[ftmp7], %[ftmp6] \n\t"
199  "biadd %[ftmp10], %[ftmp9] \n\t"
200  "mfc1 %[tmp1], %[ftmp10] \n\t"
201  "addiu %[tmp1], %[tmp1], 0x04 \n\t"
202  "srl %[tmp1], %[tmp1], 0x03 \n\t"
203  "mul %[dc], %[tmp1], %[ff_pb_1] \n\t"
204  : [ftmp0]"=&f"(ftmp[0]), [ftmp1]"=&f"(ftmp[1]),
205  [ftmp2]"=&f"(ftmp[2]), [ftmp3]"=&f"(ftmp[3]),
206  [ftmp4]"=&f"(ftmp[4]), [ftmp5]"=&f"(ftmp[5]),
207  [ftmp6]"=&f"(ftmp[6]), [ftmp7]"=&f"(ftmp[7]),
208  [ftmp8]"=&f"(ftmp[8]), [ftmp9]"=&f"(ftmp[9]),
209  [ftmp10]"=&f"(ftmp[10]),
210  [tmp0]"=&r"(tmp[0]), [tmp1]"=&r"(tmp[1]),
211  RESTRICT_ASM_ALL64
212  [dc]"=r"(dc)
213  : [srcA]"r"((mips_reg)(src-stride-1)),
214  [src0]"r"((mips_reg)(src-stride)),
215  [src1]"r"((mips_reg)(src-stride+1)),
216  [has_topleft]"r"(has_topleft), [has_topright]"r"(has_topright),
217  [ff_pb_1]"r"(ff_pb_1), [ff_pw_2]"f"(ff_pw_2)
218  : "memory"
219  );
220 
221  __asm__ volatile (
222  "dli %[tmp0], 0x02 \n\t"
223  "punpcklwd %[ftmp0], %[dc], %[dc] \n\t"
224 
225  "1: \n\t"
226  MMI_SDC1(%[ftmp0], %[src], 0x00)
227  MMI_SDXC1(%[ftmp0], %[src], %[stride], 0x00)
228  PTR_ADDU "%[src], %[src], %[stride] \n\t"
229  PTR_ADDU "%[src], %[src], %[stride] \n\t"
230  MMI_SDC1(%[ftmp0], %[src], 0x00)
231  MMI_SDXC1(%[ftmp0], %[src], %[stride], 0x00)
232 
233  "daddi %[tmp0], %[tmp0], -0x01 \n\t"
234  PTR_ADDU "%[src], %[src], %[stride] \n\t"
235  PTR_ADDU "%[src], %[src], %[stride] \n\t"
236  "bnez %[tmp0], 1b \n\t"
237  : [ftmp0]"=&f"(ftmp[0]), [tmp0]"=&r"(tmp[0]),
238  RESTRICT_ASM_ALL64
239  RESTRICT_ASM_ADDRT
240  [src]"+&r"(src)
241  : [dc]"f"(dc), [stride]"r"((mips_reg)stride)
242  : "memory"
243  );
244 }
245 
246 void ff_pred8x8l_dc_8_mmi(uint8_t *src, int has_topleft, int has_topright,
247  ptrdiff_t stride)
248 {
249  uint32_t dc, dc1, dc2;
250  double ftmp[14];
251  mips_reg tmp[1];
252 
253  const int l0 = ((has_topleft ? src[-1+-1*stride] : src[-1+0*stride]) + 2*src[-1+0*stride] + src[-1+1*stride] + 2) >> 2;
254  const int l1 = (src[-1+0*stride] + 2*src[-1+1*stride] + src[-1+2*stride] + 2) >> 2;
255  const int l2 = (src[-1+1*stride] + 2*src[-1+2*stride] + src[-1+3*stride] + 2) >> 2;
256  const int l3 = (src[-1+2*stride] + 2*src[-1+3*stride] + src[-1+4*stride] + 2) >> 2;
257  const int l4 = (src[-1+3*stride] + 2*src[-1+4*stride] + src[-1+5*stride] + 2) >> 2;
258  const int l5 = (src[-1+4*stride] + 2*src[-1+5*stride] + src[-1+6*stride] + 2) >> 2;
259  const int l6 = (src[-1+5*stride] + 2*src[-1+6*stride] + src[-1+7*stride] + 2) >> 2;
260  const int l7 = (src[-1+6*stride] + 2*src[-1+7*stride] + src[-1+7*stride] + 2) >> 2;
261 
262  DECLARE_VAR_ALL64;
263  DECLARE_VAR_ADDRT;
264 
265  __asm__ volatile (
266  MMI_ULDC1(%[ftmp4], %[srcA], 0x00)
267  MMI_ULDC1(%[ftmp5], %[src0], 0x00)
268  MMI_ULDC1(%[ftmp6], %[src1], 0x00)
269  "xor %[ftmp0], %[ftmp0], %[ftmp0] \n\t"
270  "dli %[tmp0], 0x03 \n\t"
271  "punpcklbh %[ftmp7], %[ftmp4], %[ftmp0] \n\t"
272  "punpckhbh %[ftmp8], %[ftmp4], %[ftmp0] \n\t"
273  "mtc1 %[tmp0], %[ftmp1] \n\t"
274  "punpcklbh %[ftmp9], %[ftmp5], %[ftmp0] \n\t"
275  "punpckhbh %[ftmp10], %[ftmp5], %[ftmp0] \n\t"
276  "punpcklbh %[ftmp11], %[ftmp6], %[ftmp0] \n\t"
277  "punpckhbh %[ftmp12], %[ftmp6], %[ftmp0] \n\t"
278  "pshufh %[ftmp3], %[ftmp8], %[ftmp1] \n\t"
279  "pshufh %[ftmp13], %[ftmp12], %[ftmp1] \n\t"
280  "pinsrh_3 %[ftmp8], %[ftmp8], %[ftmp13] \n\t"
281  "pinsrh_3 %[ftmp12], %[ftmp12], %[ftmp3] \n\t"
282  "bnez %[has_topleft], 1f \n\t"
283  "pinsrh_0 %[ftmp7], %[ftmp7], %[ftmp9] \n\t"
284 
285  "1: \n\t"
286  "bnez %[has_topright], 2f \n\t"
287  "pshufh %[ftmp13], %[ftmp10], %[ftmp1] \n\t"
288  "pinsrh_3 %[ftmp8], %[ftmp8], %[ftmp13] \n\t"
289 
290  "2: \n\t"
291  "dli %[tmp0], 0x02 \n\t"
292  "mtc1 %[tmp0], %[ftmp1] \n\t"
293  "pshufh %[ftmp2], %[ftmp1], %[ftmp0] \n\t"
294  "pmullh %[ftmp9], %[ftmp9], %[ftmp2] \n\t"
295  "pmullh %[ftmp10], %[ftmp10], %[ftmp2] \n\t"
296  "paddh %[ftmp7], %[ftmp7], %[ftmp9] \n\t"
297  "paddh %[ftmp8], %[ftmp8], %[ftmp10] \n\t"
298  "paddh %[ftmp7], %[ftmp7], %[ftmp11] \n\t"
299  "paddh %[ftmp8], %[ftmp8], %[ftmp12] \n\t"
300  "paddh %[ftmp7], %[ftmp7], %[ftmp2] \n\t"
301  "paddh %[ftmp8], %[ftmp8], %[ftmp2] \n\t"
302  "psrah %[ftmp7], %[ftmp7], %[ftmp1] \n\t"
303  "psrah %[ftmp8], %[ftmp8], %[ftmp1] \n\t"
304  "packushb %[ftmp5], %[ftmp7], %[ftmp8] \n\t"
305  "biadd %[ftmp4], %[ftmp5] \n\t"
306  "mfc1 %[dc2], %[ftmp4] \n\t"
307  : [ftmp0]"=&f"(ftmp[0]), [ftmp1]"=&f"(ftmp[1]),
308  [ftmp2]"=&f"(ftmp[2]), [ftmp3]"=&f"(ftmp[3]),
309  [ftmp4]"=&f"(ftmp[4]), [ftmp5]"=&f"(ftmp[5]),
310  [ftmp6]"=&f"(ftmp[6]), [ftmp7]"=&f"(ftmp[7]),
311  [ftmp8]"=&f"(ftmp[8]), [ftmp9]"=&f"(ftmp[9]),
312  [ftmp10]"=&f"(ftmp[10]), [ftmp11]"=&f"(ftmp[11]),
313  [ftmp12]"=&f"(ftmp[12]), [ftmp13]"=&f"(ftmp[13]),
314  [tmp0]"=&r"(tmp[0]),
315  RESTRICT_ASM_ALL64
316  [dc2]"=r"(dc2)
317  : [srcA]"r"((mips_reg)(src-stride-1)),
318  [src0]"r"((mips_reg)(src-stride)),
319  [src1]"r"((mips_reg)(src-stride+1)),
320  [has_topleft]"r"(has_topleft), [has_topright]"r"(has_topright)
321  : "memory"
322  );
323 
324  dc1 = l0+l1+l2+l3+l4+l5+l6+l7;
325  dc = ((dc1+dc2+8)>>4)*0x01010101U;
326 
327  __asm__ volatile (
328  "dli %[tmp0], 0x02 \n\t"
329  "punpcklwd %[ftmp0], %[dc], %[dc] \n\t"
330 
331  "1: \n\t"
332  MMI_SDC1(%[ftmp0], %[src], 0x00)
333  MMI_SDXC1(%[ftmp0], %[src], %[stride], 0x00)
334  PTR_ADDU "%[src], %[src], %[stride] \n\t"
335  PTR_ADDU "%[src], %[src], %[stride] \n\t"
336  MMI_SDC1(%[ftmp0], %[src], 0x00)
337  MMI_SDXC1(%[ftmp0], %[src], %[stride], 0x00)
338 
339  "daddi %[tmp0], %[tmp0], -0x01 \n\t"
340  PTR_ADDU "%[src], %[src], %[stride] \n\t"
341  PTR_ADDU "%[src], %[src], %[stride] \n\t"
342  "bnez %[tmp0], 1b \n\t"
343  : [ftmp0]"=&f"(ftmp[0]), [tmp0]"=&r"(tmp[0]),
344  RESTRICT_ASM_ALL64
345  RESTRICT_ASM_ADDRT
346  [src]"+&r"(src)
347  : [dc]"f"(dc), [stride]"r"((mips_reg)stride)
348  : "memory"
349  );
350 }
351 
352 void ff_pred8x8l_vertical_8_mmi(uint8_t *src, int has_topleft,
353  int has_topright, ptrdiff_t stride)
354 {
355  double ftmp[12];
356  mips_reg tmp[1];
357  DECLARE_VAR_ALL64;
358 
359  __asm__ volatile (
360  "xor %[ftmp0], %[ftmp0], %[ftmp0] \n\t"
361  MMI_LDC1(%[ftmp3], %[srcA], 0x00)
362  MMI_LDC1(%[ftmp4], %[src0], 0x00)
363  MMI_LDC1(%[ftmp5], %[src1], 0x00)
364  "punpcklbh %[ftmp6], %[ftmp3], %[ftmp0] \n\t"
365  "punpckhbh %[ftmp7], %[ftmp3], %[ftmp0] \n\t"
366  "punpcklbh %[ftmp8], %[ftmp4], %[ftmp0] \n\t"
367  "punpckhbh %[ftmp9], %[ftmp4], %[ftmp0] \n\t"
368  "punpcklbh %[ftmp10], %[ftmp5], %[ftmp0] \n\t"
369  "punpckhbh %[ftmp11], %[ftmp5], %[ftmp0] \n\t"
370  "bnez %[has_topleft], 1f \n\t"
371  "pinsrh_0 %[ftmp6], %[ftmp6], %[ftmp8] \n\t"
372 
373  "1: \n\t"
374  "bnez %[has_topright], 2f \n\t"
375  "dli %[tmp0], 0xa4 \n\t"
376  "mtc1 %[tmp0], %[ftmp1] \n\t"
377  "pshufh %[ftmp11], %[ftmp11], %[ftmp1] \n\t"
378 
379  "2: \n\t"
380  "dli %[tmp0], 0x02 \n\t"
381  "mtc1 %[tmp0], %[ftmp1] \n\t"
382  "pshufh %[ftmp2], %[ftmp1], %[ftmp0] \n\t"
383  "pmullh %[ftmp8], %[ftmp8], %[ftmp2] \n\t"
384  "pmullh %[ftmp9], %[ftmp9], %[ftmp2] \n\t"
385  "paddh %[ftmp6], %[ftmp6], %[ftmp8] \n\t"
386  "paddh %[ftmp7], %[ftmp7], %[ftmp9] \n\t"
387  "paddh %[ftmp6], %[ftmp6], %[ftmp10] \n\t"
388  "paddh %[ftmp7], %[ftmp7], %[ftmp11] \n\t"
389  "paddh %[ftmp6], %[ftmp6], %[ftmp2] \n\t"
390  "paddh %[ftmp7], %[ftmp7], %[ftmp2] \n\t"
391  "psrah %[ftmp6], %[ftmp6], %[ftmp1] \n\t"
392  "psrah %[ftmp7], %[ftmp7], %[ftmp1] \n\t"
393  "packushb %[ftmp4], %[ftmp6], %[ftmp7] \n\t"
394  MMI_SDC1(%[ftmp4], %[src], 0x00)
395  : [ftmp0]"=&f"(ftmp[0]), [ftmp1]"=&f"(ftmp[1]),
396  [ftmp2]"=&f"(ftmp[2]), [ftmp3]"=&f"(ftmp[3]),
397  [ftmp4]"=&f"(ftmp[4]), [ftmp5]"=&f"(ftmp[5]),
398  [ftmp6]"=&f"(ftmp[6]), [ftmp7]"=&f"(ftmp[7]),
399  [ftmp8]"=&f"(ftmp[8]), [ftmp9]"=&f"(ftmp[9]),
400  [ftmp10]"=&f"(ftmp[10]), [ftmp11]"=&f"(ftmp[11]),
401  [tmp0]"=&r"(tmp[0]),
402  RESTRICT_ASM_ALL64
403  [src]"=r"(src)
404  : [srcA]"r"((mips_reg)(src-stride-1)),
405  [src0]"r"((mips_reg)(src-stride)),
406  [src1]"r"((mips_reg)(src-stride+1)),
407  [has_topleft]"r"(has_topleft), [has_topright]"r"(has_topright)
408  : "memory"
409  );
410 
411  __asm__ volatile (
412  "dli %[tmp0], 0x02 \n\t"
413 
414  "1: \n\t"
415  MMI_SDC1(%[ftmp0], %[src], 0x00)
416  PTR_ADDU "%[src], %[src], %[stride] \n\t"
417  MMI_SDC1(%[ftmp0], %[src], 0x00)
418  PTR_ADDU "%[src], %[src], %[stride] \n\t"
419  MMI_SDC1(%[ftmp0], %[src], 0x00)
420  PTR_ADDU "%[src], %[src], %[stride] \n\t"
421  MMI_SDC1(%[ftmp0], %[src], 0x00)
422 
423  "daddi %[tmp0], %[tmp0], -0x01 \n\t"
424  PTR_ADDU "%[src], %[src], %[stride] \n\t"
425  "bnez %[tmp0], 1b \n\t"
426  : [ftmp0]"=&f"(ftmp[0]), [tmp0]"=&r"(tmp[0]),
427  RESTRICT_ASM_ALL64
428  [src]"+&r"(src)
429  : [stride]"r"((mips_reg)stride)
430  : "memory"
431  );
432 }
433 
434 void ff_pred4x4_dc_8_mmi(uint8_t *src, const uint8_t *topright,
435  ptrdiff_t stride)
436 {
437  const int dc = (src[-stride] + src[1-stride] + src[2-stride]
438  + src[3-stride] + src[-1+0*stride] + src[-1+1*stride]
439  + src[-1+2*stride] + src[-1+3*stride] + 4) >>3;
440  uint64_t tmp[2];
441  mips_reg addr[1];
442  DECLARE_VAR_ADDRT;
443 
444  __asm__ volatile (
445  PTR_ADDU "%[tmp0], %[dc], $0 \n\t"
446  "dmul %[tmp1], %[tmp0], %[ff_pb_1] \n\t"
447  "xor %[addr0], %[addr0], %[addr0] \n\t"
448  MMI_SWX(%[tmp1], %[src], %[addr0], 0x00)
449  PTR_ADDU "%[addr0], %[addr0], %[stride] \n\t"
450  MMI_SWX(%[tmp1], %[src], %[addr0], 0x00)
451  PTR_ADDU "%[addr0], %[addr0], %[stride] \n\t"
452  MMI_SWX(%[tmp1], %[src], %[addr0], 0x00)
453  PTR_ADDU "%[addr0], %[addr0], %[stride] \n\t"
454  MMI_SWX(%[tmp1], %[src], %[addr0], 0x00)
455  : [tmp0]"=&r"(tmp[0]), [tmp1]"=&r"(tmp[1]),
456  RESTRICT_ASM_ADDRT
457  [addr0]"=&r"(addr[0])
458  : [src]"r"((mips_reg)src), [stride]"r"((mips_reg)stride),
459  [dc]"r"(dc), [ff_pb_1]"r"(ff_pb_1)
460  : "memory"
461  );
462 }
463 
465 {
466  uint64_t tmp[2];
467  mips_reg addr[2];
468 
469  __asm__ volatile (
470  PTR_SUBU "%[addr0], %[src], %[stride] \n\t"
471  PTR_ADDU "%[addr1], %[src], $0 \n\t"
472  "ldl %[tmp0], 0x07(%[addr0]) \n\t"
473  "ldr %[tmp0], 0x00(%[addr0]) \n\t"
474  "dli %[tmp1], 0x04 \n\t"
475  "1: \n\t"
476  "sdl %[tmp0], 0x07(%[addr1]) \n\t"
477  "sdr %[tmp0], 0x00(%[addr1]) \n\t"
478  PTR_ADDU "%[addr1], %[stride] \n\t"
479  "sdl %[tmp0], 0x07(%[addr1]) \n\t"
480  "sdr %[tmp0], 0x00(%[addr1]) \n\t"
481  "daddi %[tmp1], -0x01 \n\t"
482  PTR_ADDU "%[addr1], %[stride] \n\t"
483  "bnez %[tmp1], 1b \n\t"
484  : [tmp0]"=&r"(tmp[0]), [tmp1]"=&r"(tmp[1]),
485  [addr0]"=&r"(addr[0]), [addr1]"=&r"(addr[1])
486  : [src]"r"((mips_reg)src), [stride]"r"((mips_reg)stride)
487  : "memory"
488  );
489 }
490 
492 {
493  uint64_t tmp[3];
494  mips_reg addr[2];
495 
496  __asm__ volatile (
497  PTR_ADDI "%[addr0], %[src], -0x01 \n\t"
498  PTR_ADDU "%[addr1], %[src], $0 \n\t"
499  "dli %[tmp0], 0x04 \n\t"
500  "1: \n\t"
501  "lbu %[tmp1], 0x00(%[addr0]) \n\t"
502  "dmul %[tmp2], %[tmp1], %[ff_pb_1] \n\t"
503  "swl %[tmp2], 0x07(%[addr1]) \n\t"
504  "swr %[tmp2], 0x00(%[addr1]) \n\t"
505  PTR_ADDU "%[addr0], %[addr0], %[stride] \n\t"
506  PTR_ADDU "%[addr1], %[addr1], %[stride] \n\t"
507  "lbu %[tmp1], 0x00(%[addr0]) \n\t"
508  "dmul %[tmp2], %[tmp1], %[ff_pb_1] \n\t"
509  "swl %[tmp2], 0x07(%[addr1]) \n\t"
510  "swr %[tmp2], 0x00(%[addr1]) \n\t"
511  "daddi %[tmp0], %[tmp0], -0x01 \n\t"
512  PTR_ADDU "%[addr0], %[addr0], %[stride] \n\t"
513  PTR_ADDU "%[addr1], %[addr1], %[stride] \n\t"
514  "bnez %[tmp0], 1b \n\t"
515  : [tmp0]"=&r"(tmp[0]), [tmp1]"=&r"(tmp[1]),
516  [tmp2]"=&r"(tmp[2]),
517  [addr0]"=&r"(addr[0]), [addr1]"=&r"(addr[1])
518  : [src]"r"((mips_reg)src), [stride]"r"((mips_reg)stride),
519  [ff_pb_1]"r"(ff_pb_1)
520  : "memory"
521  );
522 }
523 
525 {
526  double ftmp[4];
527  uint64_t tmp[1];
528  mips_reg addr[1];
529  DECLARE_VAR_ALL64;
530 
531  __asm__ volatile (
532  "dli %[tmp0], 0x02 \n\t"
533  "xor %[ftmp0], %[ftmp0], %[ftmp0] \n\t"
534  PTR_SUBU "%[addr0], %[src], %[stride] \n\t"
535  MMI_LDC1(%[ftmp1], %[addr0], 0x00)
536  "punpcklbh %[ftmp2], %[ftmp1], %[ftmp0] \n\t"
537  "punpckhbh %[ftmp3], %[ftmp1], %[ftmp0] \n\t"
538  "biadd %[ftmp2], %[ftmp2] \n\t"
539  "biadd %[ftmp3], %[ftmp3] \n\t"
540  "mtc1 %[tmp0], %[ftmp1] \n\t"
541  "pshufh %[ftmp2], %[ftmp2], %[ftmp0] \n\t"
542  "pshufh %[ftmp3], %[ftmp3], %[ftmp0] \n\t"
543  "pshufh %[ftmp1], %[ftmp1], %[ftmp0] \n\t"
544  "paddush %[ftmp2], %[ftmp2], %[ftmp1] \n\t"
545  "paddush %[ftmp3], %[ftmp3], %[ftmp1] \n\t"
546  "mtc1 %[tmp0], %[ftmp1] \n\t"
547  "psrlh %[ftmp2], %[ftmp2], %[ftmp1] \n\t"
548  "psrlh %[ftmp3], %[ftmp3], %[ftmp1] \n\t"
549  "packushb %[ftmp1], %[ftmp2], %[ftmp3] \n\t"
550  MMI_SDC1(%[ftmp1], %[src], 0x00)
551  PTR_ADDU "%[src], %[src], %[stride] \n\t"
552  MMI_SDC1(%[ftmp1], %[src], 0x00)
553  PTR_ADDU "%[src], %[src], %[stride] \n\t"
554  MMI_SDC1(%[ftmp1], %[src], 0x00)
555  PTR_ADDU "%[src], %[src], %[stride] \n\t"
556  MMI_SDC1(%[ftmp1], %[src], 0x00)
557  PTR_ADDU "%[src], %[src], %[stride] \n\t"
558  MMI_SDC1(%[ftmp1], %[src], 0x00)
559  PTR_ADDU "%[src], %[src], %[stride] \n\t"
560  MMI_SDC1(%[ftmp1], %[src], 0x00)
561  PTR_ADDU "%[src], %[src], %[stride] \n\t"
562  MMI_SDC1(%[ftmp1], %[src], 0x00)
563  PTR_ADDU "%[src], %[src], %[stride] \n\t"
564  MMI_SDC1(%[ftmp1], %[src], 0x00)
565  : [ftmp0]"=&f"(ftmp[0]), [ftmp1]"=&f"(ftmp[1]),
566  [ftmp2]"=&f"(ftmp[2]), [ftmp3]"=&f"(ftmp[3]),
567  [tmp0]"=&r"(tmp[0]),
568  RESTRICT_ASM_ALL64
569  [addr0]"=&r"(addr[0]),
570  [src]"+&r"(src)
571  : [stride]"r"((mips_reg)stride)
572  : "memory"
573  );
574 }
575 
577 {
578  double ftmp[5];
579  mips_reg addr[7];
580 
581  __asm__ volatile (
582  "negu %[addr0], %[stride] \n\t"
583  PTR_ADDU "%[addr0], %[addr0], %[src] \n\t"
584  PTR_ADDIU "%[addr1], %[addr0], 0x04 \n\t"
585  "lbu %[addr2], 0x00(%[addr0]) \n\t"
586  PTR_ADDU "%[addr3], $0, %[addr2] \n\t"
587  PTR_ADDIU "%[addr0], 0x01 \n\t"
588  "lbu %[addr2], 0x00(%[addr1]) \n\t"
589  PTR_ADDU "%[addr4], $0, %[addr2] \n\t"
590  PTR_ADDIU "%[addr1], 0x01 \n\t"
591  "lbu %[addr2], 0x00(%[addr0]) \n\t"
592  PTR_ADDU "%[addr3], %[addr3], %[addr2] \n\t"
593  PTR_ADDIU "%[addr0], 0x01 \n\t"
594  "lbu %[addr2], 0x00(%[addr1]) \n\t"
595  PTR_ADDU "%[addr4], %[addr4], %[addr2] \n\t"
596  PTR_ADDIU "%[addr1], 0x01 \n\t"
597  "lbu %[addr2], 0x00(%[addr0]) \n\t"
598  PTR_ADDU "%[addr3], %[addr3], %[addr2] \n\t"
599  PTR_ADDIU "%[addr0], 0x01 \n\t"
600  "lbu %[addr2], 0x00(%[addr1]) \n\t"
601  PTR_ADDU "%[addr4], %[addr4], %[addr2] \n\t"
602  PTR_ADDIU "%[addr1], 0x01 \n\t"
603  "lbu %[addr2], 0x00(%[addr0]) \n\t"
604  PTR_ADDU "%[addr3], %[addr3], %[addr2] \n\t"
605  PTR_ADDIU "%[addr0], 0x01 \n\t"
606  "lbu %[addr2], 0x00(%[addr1]) \n\t"
607  PTR_ADDU "%[addr4], %[addr4], %[addr2] \n\t"
608  PTR_ADDIU "%[addr1], 0x01 \n\t"
609  "dli %[addr2], -0x01 \n\t"
610  PTR_ADDU "%[addr2], %[addr2], %[src] \n\t"
611  "lbu %[addr1], 0x00(%[addr2]) \n\t"
612  PTR_ADDU "%[addr5], $0, %[addr1] \n\t"
613  PTR_ADDU "%[addr2], %[addr2], %[stride] \n\t"
614  "lbu %[addr1], 0x00(%[addr2]) \n\t"
615  PTR_ADDU "%[addr5], %[addr5], %[addr1] \n\t"
616  PTR_ADDU "%[addr2], %[addr2], %[stride] \n\t"
617  "lbu %[addr1], 0x00(%[addr2]) \n\t"
618  PTR_ADDU "%[addr5], %[addr5], %[addr1] \n\t"
619  PTR_ADDU "%[addr2], %[addr2], %[stride] \n\t"
620  "lbu %[addr1], 0x00(%[addr2]) \n\t"
621  PTR_ADDU "%[addr5], %[addr5], %[addr1] \n\t"
622  PTR_ADDU "%[addr2], %[addr2], %[stride] \n\t"
623  "lbu %[addr1], 0x00(%[addr2]) \n\t"
624  PTR_ADDU "%[addr6], $0, %[addr1] \n\t"
625  PTR_ADDU "%[addr2], %[addr2], %[stride] \n\t"
626  "lbu %[addr1], 0x00(%[addr2]) \n\t"
627  PTR_ADDU "%[addr6], %[addr6], %[addr1] \n\t"
628  PTR_ADDU "%[addr2], %[addr2], %[stride] \n\t"
629  "lbu %[addr1], 0x00(%[addr2]) \n\t"
630  PTR_ADDU "%[addr6], %[addr6], %[addr1] \n\t"
631  PTR_ADDU "%[addr2], %[addr2], %[stride] \n\t"
632  "lbu %[addr1], 0x00(%[addr2]) \n\t"
633  PTR_ADDU "%[addr6], %[addr6], %[addr1] \n\t"
634  PTR_ADDU "%[addr3], %[addr3], %[addr5] \n\t"
635  PTR_ADDIU "%[addr3], %[addr3], 0x04 \n\t"
636  PTR_ADDIU "%[addr4], %[addr4], 0x02 \n\t"
637  PTR_ADDIU "%[addr1], %[addr6], 0x02 \n\t"
638  PTR_ADDU "%[addr2], %[addr4], %[addr1] \n\t"
639  PTR_SRL "%[addr3], 0x03 \n\t"
640  PTR_SRL "%[addr4], 0x02 \n\t"
641  PTR_SRL "%[addr1], 0x02 \n\t"
642  PTR_SRL "%[addr2], 0x03 \n\t"
643  "xor %[ftmp0], %[ftmp0], %[ftmp0] \n\t"
644  "dmtc1 %[addr3], %[ftmp1] \n\t"
645  "pshufh %[ftmp1], %[ftmp1], %[ftmp0] \n\t"
646  "dmtc1 %[addr4], %[ftmp2] \n\t"
647  "pshufh %[ftmp2], %[ftmp2], %[ftmp0] \n\t"
648  "dmtc1 %[addr1], %[ftmp3] \n\t"
649  "pshufh %[ftmp3], %[ftmp3], %[ftmp0] \n\t"
650  "dmtc1 %[addr2], %[ftmp4] \n\t"
651  "pshufh %[ftmp4], %[ftmp4], %[ftmp0] \n\t"
652  "packushb %[ftmp1], %[ftmp1], %[ftmp2] \n\t"
653  "packushb %[ftmp2], %[ftmp3], %[ftmp4] \n\t"
654  PTR_ADDU "%[addr0], $0, %[src] \n\t"
655  MMI_SDC1(%[ftmp1], %[addr0], 0x00)
656  PTR_ADDU "%[addr0], %[addr0], %[stride] \n\t"
657  MMI_SDC1(%[ftmp1], %[addr0], 0x00)
658  PTR_ADDU "%[addr0], %[addr0], %[stride] \n\t"
659  MMI_SDC1(%[ftmp1], %[addr0], 0x00)
660  PTR_ADDU "%[addr0], %[addr0], %[stride] \n\t"
661  MMI_SDC1(%[ftmp1], %[addr0], 0x00)
662  PTR_ADDU "%[addr0], %[addr0], %[stride] \n\t"
663  MMI_SDC1(%[ftmp2], %[addr0], 0x00)
664  PTR_ADDU "%[addr0], %[addr0], %[stride] \n\t"
665  MMI_SDC1(%[ftmp2], %[addr0], 0x00)
666  PTR_ADDU "%[addr0], %[addr0], %[stride] \n\t"
667  MMI_SDC1(%[ftmp2], %[addr0], 0x00)
668  PTR_ADDU "%[addr0], %[addr0], %[stride] \n\t"
669  MMI_SDC1(%[ftmp2], %[addr0], 0x00)
670  : [ftmp0]"=&f"(ftmp[0]), [ftmp1]"=&f"(ftmp[1]),
671  [ftmp2]"=&f"(ftmp[2]), [ftmp3]"=&f"(ftmp[3]),
672  [ftmp4]"=&f"(ftmp[4]),
673  [addr0]"=&r"(addr[0]), [addr1]"=&r"(addr[1]),
674  [addr2]"=&r"(addr[2]), [addr3]"=&r"(addr[3]),
675  [addr4]"=&r"(addr[4]), [addr5]"=&r"(addr[5]),
676  [addr6]"=&r"(addr[6])
677  : [src]"r"((mips_reg)src), [stride]"r"((mips_reg)stride)
678  : "memory"
679  );
680 }
681 
683 {
684  double ftmp[1];
685  uint64_t tmp[1];
686  DECLARE_VAR_ALL64;
687 
688  __asm__ volatile (
689  MMI_LDC1(%[ftmp0], %[srcA], 0x00)
690  "dli %[tmp0], 0x04 \n\t"
691 
692  "1: \n\t"
693  MMI_SDC1(%[ftmp0], %[src], 0x00)
694  PTR_ADDU "%[src], %[src], %[stride] \n\t"
695  MMI_SDC1(%[ftmp0], %[src], 0x00)
696  PTR_ADDU "%[src], %[src], %[stride] \n\t"
697  MMI_SDC1(%[ftmp0], %[src], 0x00)
698  PTR_ADDU "%[src], %[src], %[stride] \n\t"
699  MMI_SDC1(%[ftmp0], %[src], 0x00)
700 
701  "daddi %[tmp0], %[tmp0], -0x01 \n\t"
702  PTR_ADDU "%[src], %[src], %[stride] \n\t"
703  "bnez %[tmp0], 1b \n\t"
704  : [ftmp0]"=&f"(ftmp[0]),
705  [tmp0]"=&r"(tmp[0]),
706  RESTRICT_ASM_ALL64
707  [src]"+&r"(src)
708  : [stride]"r"((mips_reg)stride), [srcA]"r"((mips_reg)(src-stride))
709  : "memory"
710  );
711 }
712 
714 {
715  uint64_t tmp[3];
716  mips_reg addr[2];
717 
718  __asm__ volatile (
719  PTR_ADDI "%[addr0], %[src], -0x01 \n\t"
720  PTR_ADDU "%[addr1], %[src], $0 \n\t"
721  "dli %[tmp0], 0x08 \n\t"
722  "1: \n\t"
723  "lbu %[tmp1], 0x00(%[addr0]) \n\t"
724  "dmul %[tmp2], %[tmp1], %[ff_pb_1] \n\t"
725  "swl %[tmp2], 0x07(%[addr1]) \n\t"
726  "swr %[tmp2], 0x00(%[addr1]) \n\t"
727  PTR_ADDU "%[addr0], %[addr0], %[stride] \n\t"
728  PTR_ADDU "%[addr1], %[addr1], %[stride] \n\t"
729  "lbu %[tmp1], 0x00(%[addr0]) \n\t"
730  "dmul %[tmp2], %[tmp1], %[ff_pb_1] \n\t"
731  "swl %[tmp2], 0x07(%[addr1]) \n\t"
732  "swr %[tmp2], 0x00(%[addr1]) \n\t"
733  "daddi %[tmp0], %[tmp0], -0x01 \n\t"
734  PTR_ADDU "%[addr0], %[addr0], %[stride] \n\t"
735  PTR_ADDU "%[addr1], %[addr1], %[stride] \n\t"
736  "bnez %[tmp0], 1b \n\t"
737  : [tmp0]"=&r"(tmp[0]), [tmp1]"=&r"(tmp[1]),
738  [tmp2]"=&r"(tmp[2]),
739  [addr0]"=&r"(addr[0]), [addr1]"=&r"(addr[1])
740  : [src]"r"((mips_reg)src), [stride]"r"((mips_reg)stride),
741  [ff_pb_1]"r"(ff_pb_1)
742  : "memory"
743  );
744 }
745 
746 static inline void pred16x16_plane_compat_mmi(uint8_t *src, int stride,
747  const int svq3, const int rv40)
748 {
749  double ftmp[11];
750  uint64_t tmp[6];
751  mips_reg addr[1];
752  DECLARE_VAR_ALL64;
753 
754  __asm__ volatile(
755  PTR_SUBU "%[addr0], %[src], %[stride] \n\t"
756  "dli %[tmp0], 0x20 \n\t"
757  "dmtc1 %[tmp0], %[ftmp4] \n\t"
758  MMI_ULDC1(%[ftmp0], %[addr0], -0x01)
759  MMI_ULDC1(%[ftmp2], %[addr0], 0x08)
760  "dsrl %[ftmp1], %[ftmp0], %[ftmp4] \n\t"
761  "dsrl %[ftmp3], %[ftmp2], %[ftmp4] \n\t"
762  "xor %[ftmp4], %[ftmp4], %[ftmp4] \n\t"
763  "punpcklbh %[ftmp0], %[ftmp0], %[ftmp4] \n\t"
764  "punpcklbh %[ftmp1], %[ftmp1], %[ftmp4] \n\t"
765  "punpcklbh %[ftmp2], %[ftmp2], %[ftmp4] \n\t"
766  "punpcklbh %[ftmp3], %[ftmp3], %[ftmp4] \n\t"
767  "pmullh %[ftmp0], %[ftmp0], %[ff_pw_m8tom5] \n\t"
768  "pmullh %[ftmp1], %[ftmp1], %[ff_pw_m4tom1] \n\t"
769  "pmullh %[ftmp2], %[ftmp2], %[ff_pw_1to4] \n\t"
770  "pmullh %[ftmp3], %[ftmp3], %[ff_pw_5to8] \n\t"
771  "paddsh %[ftmp0], %[ftmp0], %[ftmp2] \n\t"
772  "paddsh %[ftmp1], %[ftmp1], %[ftmp3] \n\t"
773  "paddsh %[ftmp0], %[ftmp0], %[ftmp1] \n\t"
774  "dli %[tmp0], 0x0e \n\t"
775  "dmtc1 %[tmp0], %[ftmp4] \n\t"
776  "pshufh %[ftmp1], %[ftmp0], %[ftmp4] \n\t"
777  "paddsh %[ftmp0], %[ftmp0], %[ftmp1] \n\t"
778  "dli %[tmp0], 0x01 \n\t"
779  "dmtc1 %[tmp0], %[ftmp4] \n\t"
780  "pshufh %[ftmp1], %[ftmp0], %[ftmp4] \n\t"
781  "paddsh %[ftmp5], %[ftmp0], %[ftmp1] \n\t"
782 
783  PTR_ADDIU "%[addr0], %[src], -0x01 \n\t"
784  PTR_SUBU "%[addr0], %[addr0], %[stride] \n\t"
785  "lbu %[tmp2], 0x00(%[addr0]) \n\t"
786  "lbu %[tmp5], 0x10(%[addr0]) \n\t"
787  PTR_ADDU "%[addr0], %[addr0], %[stride] \n\t"
788  "lbu %[tmp3], 0x00(%[addr0]) \n\t"
789  PTR_ADDU "%[addr0], %[addr0], %[stride] \n\t"
790  "lbu %[tmp4], 0x00(%[addr0]) \n\t"
791  PTR_ADDU "%[addr0], %[addr0], %[stride] \n\t"
792  "lbu %[tmp0], 0x00(%[addr0]) \n\t"
793  "dsll %[tmp3], %[tmp3], 0x10 \n\t"
794  "dsll %[tmp4], %[tmp4], 0x20 \n\t"
795  "dsll %[tmp0], %[tmp0], 0x30 \n\t"
796  "or %[tmp4], %[tmp4], %[tmp0] \n\t"
797  "or %[tmp2], %[tmp2], %[tmp3] \n\t"
798  "or %[tmp2], %[tmp2], %[tmp4] \n\t"
799  "dmtc1 %[tmp2], %[ftmp0] \n\t"
800 
801  PTR_ADDU "%[addr0], %[addr0], %[stride] \n\t"
802  "lbu %[tmp2], 0x00(%[addr0]) \n\t"
803  PTR_ADDU "%[addr0], %[addr0], %[stride] \n\t"
804  "lbu %[tmp3], 0x00(%[addr0]) \n\t"
805  PTR_ADDU "%[addr0], %[addr0], %[stride] \n\t"
806  "lbu %[tmp4], 0x00(%[addr0]) \n\t"
807  PTR_ADDU "%[addr0], %[addr0], %[stride] \n\t"
808  "lbu %[tmp0], 0x00(%[addr0]) \n\t"
809  "dsll %[tmp3], %[tmp3], 0x10 \n\t"
810  "dsll %[tmp4], %[tmp4], 0x20 \n\t"
811  "dsll %[tmp0], %[tmp0], 0x30 \n\t"
812  "or %[tmp4], %[tmp4], %[tmp0] \n\t"
813  "or %[tmp2], %[tmp2], %[tmp3] \n\t"
814  "or %[tmp2], %[tmp2], %[tmp4] \n\t"
815  "dmtc1 %[tmp2], %[ftmp1] \n\t"
816 
817  PTR_ADDU "%[addr0], %[addr0], %[stride] \n\t"
818  PTR_ADDU "%[addr0], %[addr0], %[stride] \n\t"
819  "lbu %[tmp2], 0x00(%[addr0]) \n\t"
820  PTR_ADDU "%[addr0], %[addr0], %[stride] \n\t"
821  "lbu %[tmp3], 0x00(%[addr0]) \n\t"
822  PTR_ADDU "%[addr0], %[addr0], %[stride] \n\t"
823  "lbu %[tmp4], 0x00(%[addr0]) \n\t"
824  PTR_ADDU "%[addr0], %[addr0], %[stride] \n\t"
825  "lbu %[tmp0], 0x00(%[addr0]) \n\t"
826  "dsll %[tmp3], %[tmp3], 0x10 \n\t"
827  "dsll %[tmp4], %[tmp4], 0x20 \n\t"
828  "dsll %[tmp0], %[tmp0], 0x30 \n\t"
829  "or %[tmp4], %[tmp4], %[tmp0] \n\t"
830  "or %[tmp2], %[tmp2], %[tmp3] \n\t"
831  "or %[tmp2], %[tmp2], %[tmp4] \n\t"
832  "dmtc1 %[tmp2], %[ftmp2] \n\t"
833 
834  PTR_ADDU "%[addr0], %[addr0], %[stride] \n\t"
835  "lbu %[tmp2], 0x00(%[addr0]) \n\t"
836  PTR_ADDU "%[addr0], %[addr0], %[stride] \n\t"
837  "lbu %[tmp3], 0x00(%[addr0]) \n\t"
838  PTR_ADDU "%[addr0], %[addr0], %[stride] \n\t"
839  "lbu %[tmp4], 0x00(%[addr0]) \n\t"
840  PTR_ADDU "%[addr0], %[addr0], %[stride] \n\t"
841  "lbu %[tmp0], 0x00(%[addr0]) \n\t"
842  "daddu %[tmp5], %[tmp5], %[tmp0] \n\t"
843  "daddiu %[tmp5], %[tmp5], 0x01 \n\t"
844  "dsll %[tmp5], %[tmp5], 0x04 \n\t"
845 
846  "dsll %[tmp3], %[tmp3], 0x10 \n\t"
847  "dsll %[tmp4], %[tmp4], 0x20 \n\t"
848  "dsll %[tmp0], %[tmp0], 0x30 \n\t"
849  "or %[tmp4], %[tmp4], %[tmp0] \n\t"
850  "or %[tmp2], %[tmp2], %[tmp3] \n\t"
851  "or %[tmp2], %[tmp2], %[tmp4] \n\t"
852  "dmtc1 %[tmp2], %[ftmp3] \n\t"
853 
854  "pmullh %[ftmp0], %[ftmp0], %[ff_pw_m8tom5] \n\t"
855  "pmullh %[ftmp1], %[ftmp1], %[ff_pw_m4tom1] \n\t"
856  "pmullh %[ftmp2], %[ftmp2], %[ff_pw_1to4] \n\t"
857  "pmullh %[ftmp3], %[ftmp3], %[ff_pw_5to8] \n\t"
858  "paddsh %[ftmp0], %[ftmp0], %[ftmp2] \n\t"
859  "paddsh %[ftmp1], %[ftmp1], %[ftmp3] \n\t"
860  "paddsh %[ftmp0], %[ftmp0], %[ftmp1] \n\t"
861  "dli %[tmp0], 0x0e \n\t"
862  "dmtc1 %[tmp0], %[ftmp4] \n\t"
863  "pshufh %[ftmp1], %[ftmp0], %[ftmp4] \n\t"
864  "paddsh %[ftmp0], %[ftmp0], %[ftmp1] \n\t"
865 
866  "dli %[tmp0], 0x01 \n\t"
867  "dmtc1 %[tmp0], %[ftmp4] \n\t"
868  "pshufh %[ftmp1], %[ftmp0], %[ftmp4] \n\t"
869  "paddsh %[ftmp6], %[ftmp0], %[ftmp1] \n\t"
870 
871  "dmfc1 %[tmp0], %[ftmp5] \n\t"
872  "dsll %[tmp0], %[tmp0], 0x30 \n\t"
873  "dsra %[tmp0], %[tmp0], 0x30 \n\t"
874  "dmfc1 %[tmp1], %[ftmp6] \n\t"
875  "dsll %[tmp1], %[tmp1], 0x30 \n\t"
876  "dsra %[tmp1], %[tmp1], 0x30 \n\t"
877 
878  "beqz %[svq3], 1f \n\t"
879  "dli %[tmp2], 0x04 \n\t"
880  "ddiv %[tmp0], %[tmp0], %[tmp2] \n\t"
881  "ddiv %[tmp1], %[tmp1], %[tmp2] \n\t"
882  "dli %[tmp2], 0x05 \n\t"
883  "dmul %[tmp0], %[tmp0], %[tmp2] \n\t"
884  "dmul %[tmp1], %[tmp1], %[tmp2] \n\t"
885  "dli %[tmp2], 0x10 \n\t"
886  "ddiv %[tmp0], %[tmp0], %[tmp2] \n\t"
887  "ddiv %[tmp1], %[tmp1], %[tmp2] \n\t"
888  "daddu %[tmp2], %[tmp0], $0 \n\t"
889  "daddu %[tmp0], %[tmp1], $0 \n\t"
890  "daddu %[tmp1], %[tmp2], $0 \n\t"
891  "b 2f \n\t"
892 
893  "1: \n\t"
894  "beqz %[rv40], 1f \n\t"
895  "dsra %[tmp2], %[tmp0], 0x02 \n\t"
896  "daddu %[tmp0], %[tmp0], %[tmp2] \n\t"
897  "dsra %[tmp2], %[tmp1], 0x02 \n\t"
898  "daddu %[tmp1], %[tmp1], %[tmp2] \n\t"
899  "dsra %[tmp0], %[tmp0], 0x04 \n\t"
900  "dsra %[tmp1], %[tmp1], 0x04 \n\t"
901  "b 2f \n\t"
902 
903  "1: \n\t"
904  "dli %[tmp2], 0x05 \n\t"
905  "dmul %[tmp0], %[tmp0], %[tmp2] \n\t"
906  "dmul %[tmp1], %[tmp1], %[tmp2] \n\t"
907  "daddiu %[tmp0], %[tmp0], 0x20 \n\t"
908  "daddiu %[tmp1], %[tmp1], 0x20 \n\t"
909  "dsra %[tmp0], %[tmp0], 0x06 \n\t"
910  "dsra %[tmp1], %[tmp1], 0x06 \n\t"
911 
912  "2: \n\t"
913  "daddu %[tmp3], %[tmp0], %[tmp1] \n\t"
914  "dli %[tmp2], 0x07 \n\t"
915  "dmul %[tmp3], %[tmp3], %[tmp2] \n\t"
916  "dsubu %[tmp5], %[tmp5], %[tmp3] \n\t"
917 
918  "xor %[ftmp4], %[ftmp4], %[ftmp4] \n\t"
919  "dmtc1 %[tmp0], %[ftmp0] \n\t"
920  "pshufh %[ftmp0], %[ftmp0], %[ftmp4] \n\t"
921  "dmtc1 %[tmp1], %[ftmp5] \n\t"
922  "pshufh %[ftmp5], %[ftmp5], %[ftmp4] \n\t"
923  "dmtc1 %[tmp5], %[ftmp6] \n\t"
924  "pshufh %[ftmp6], %[ftmp6], %[ftmp4] \n\t"
925  "dli %[tmp0], 0x05 \n\t"
926  "dmtc1 %[tmp0], %[ftmp7] \n\t"
927  "pmullh %[ftmp1], %[ff_pw_0to3], %[ftmp0] \n\t"
928  "dmtc1 %[ff_pw_4to7], %[ftmp2] \n\t"
929  "pmullh %[ftmp2], %[ftmp2], %[ftmp0] \n\t"
930  "dmtc1 %[ff_pw_8tob], %[ftmp3] \n\t"
931  "pmullh %[ftmp3], %[ftmp3], %[ftmp0] \n\t"
932  "dmtc1 %[ff_pw_ctof], %[ftmp4] \n\t"
933  "pmullh %[ftmp4], %[ftmp4], %[ftmp0] \n\t"
934 
935  "dli %[tmp0], 0x10 \n\t"
936  PTR_ADDU "%[addr0], %[src], $0 \n\t"
937  "1: \n\t"
938  "paddsh %[ftmp8], %[ftmp1], %[ftmp6] \n\t"
939  "psrah %[ftmp8], %[ftmp8], %[ftmp7] \n\t"
940  "paddsh %[ftmp9], %[ftmp2], %[ftmp6] \n\t"
941  "psrah %[ftmp9], %[ftmp9], %[ftmp7] \n\t"
942  "packushb %[ftmp0], %[ftmp8], %[ftmp9] \n\t"
943  MMI_SDC1(%[ftmp0], %[addr0], 0x00)
944 
945  "paddsh %[ftmp8], %[ftmp3], %[ftmp6] \n\t"
946  "psrah %[ftmp8], %[ftmp8], %[ftmp7] \n\t"
947  "paddsh %[ftmp9], %[ftmp4], %[ftmp6] \n\t"
948  "psrah %[ftmp9], %[ftmp9], %[ftmp7] \n\t"
949  "packushb %[ftmp0], %[ftmp8], %[ftmp9] \n\t"
950  MMI_SDC1(%[ftmp0], %[addr0], 0x08)
951 
952  "paddsh %[ftmp6], %[ftmp6], %[ftmp5] \n\t"
953  PTR_ADDU "%[addr0], %[addr0], %[stride] \n\t"
954  "daddiu %[tmp0], %[tmp0], -0x01 \n\t"
955  "bnez %[tmp0], 1b \n\t"
956  : [ftmp0]"=&f"(ftmp[0]), [ftmp1]"=&f"(ftmp[1]),
957  [ftmp2]"=&f"(ftmp[2]), [ftmp3]"=&f"(ftmp[3]),
958  [ftmp4]"=&f"(ftmp[4]), [ftmp5]"=&f"(ftmp[5]),
959  [ftmp6]"=&f"(ftmp[6]), [ftmp7]"=&f"(ftmp[7]),
960  [ftmp8]"=&f"(ftmp[8]), [ftmp9]"=&f"(ftmp[9]),
961  [tmp0]"=&r"(tmp[0]), [tmp1]"=&r"(tmp[1]),
962  [tmp2]"=&r"(tmp[2]), [tmp3]"=&r"(tmp[3]),
963  [tmp4]"=&r"(tmp[4]), [tmp5]"=&r"(tmp[5]),
964  RESTRICT_ASM_ALL64
965  [addr0]"=&r"(addr[0])
966  : [src]"r"(src), [stride]"r"((mips_reg)stride),
967  [svq3]"r"(svq3), [rv40]"r"(rv40),
972  : "memory"
973  );
974 }
975 
977 {
979 }
980 
982 {
984 }
985 
987 {
989 }
#define PTR_SUBU
Definition: asmdefs.h:50
#define PTR_ADDI
Definition: asmdefs.h:49
#define PTR_ADDIU
Definition: asmdefs.h:48
#define mips_reg
Definition: asmdefs.h:44
#define PTR_ADDU
Definition: asmdefs.h:47
__asm__(".macro parse_r var r\n\t" "\\var = -1\n\t" _IFC_REG(0) _IFC_REG(1) _IFC_REG(2) _IFC_REG(3) _IFC_REG(4) _IFC_REG(5) _IFC_REG(6) _IFC_REG(7) _IFC_REG(8) _IFC_REG(9) _IFC_REG(10) _IFC_REG(11) _IFC_REG(12) _IFC_REG(13) _IFC_REG(14) _IFC_REG(15) _IFC_REG(16) _IFC_REG(17) _IFC_REG(18) _IFC_REG(19) _IFC_REG(20) _IFC_REG(21) _IFC_REG(22) _IFC_REG(23) _IFC_REG(24) _IFC_REG(25) _IFC_REG(26) _IFC_REG(27) _IFC_REG(28) _IFC_REG(29) _IFC_REG(30) _IFC_REG(31) ".iflt \\var\n\t" ".error \"Unable to parse register name \\r\"\n\t" ".endif\n\t" ".endm")
#define PTR_SRL
Definition: asmdefs.h:54
uint8_t pi<< 24) CONV_FUNC_GROUP(AV_SAMPLE_FMT_FLT, float, AV_SAMPLE_FMT_U8, uint8_t,(*(const uint8_t *) pi - 0x80) *(1.0f/(1<< 7))) CONV_FUNC_GROUP(AV_SAMPLE_FMT_DBL, double, AV_SAMPLE_FMT_U8, uint8_t,(*(const uint8_t *) pi - 0x80) *(1.0/(1<< 7))) CONV_FUNC_GROUP(AV_SAMPLE_FMT_U8, uint8_t, AV_SAMPLE_FMT_S16, int16_t,(*(const int16_t *) pi >> 8)+0x80) CONV_FUNC_GROUP(AV_SAMPLE_FMT_FLT, float, AV_SAMPLE_FMT_S16, int16_t, *(const int16_t *) pi *(1.0f/(1<< 15))) CONV_FUNC_GROUP(AV_SAMPLE_FMT_DBL, double, AV_SAMPLE_FMT_S16, int16_t, *(const int16_t *) pi *(1.0/(1<< 15))) CONV_FUNC_GROUP(AV_SAMPLE_FMT_U8, uint8_t, AV_SAMPLE_FMT_S32, int32_t,(*(const int32_t *) pi >> 24)+0x80) CONV_FUNC_GROUP(AV_SAMPLE_FMT_FLT, float, AV_SAMPLE_FMT_S32, int32_t, *(const int32_t *) pi *(1.0f/(1U<< 31))) CONV_FUNC_GROUP(AV_SAMPLE_FMT_DBL, double, AV_SAMPLE_FMT_S32, int32_t, *(const int32_t *) pi *(1.0/(1U<< 31))) CONV_FUNC_GROUP(AV_SAMPLE_FMT_U8, uint8_t, AV_SAMPLE_FMT_FLT, float, av_clip_uint8(lrintf(*(const float *) pi *(1<< 7))+0x80)) CONV_FUNC_GROUP(AV_SAMPLE_FMT_S16, int16_t, AV_SAMPLE_FMT_FLT, float, av_clip_int16(lrintf(*(const float *) pi *(1<< 15)))) CONV_FUNC_GROUP(AV_SAMPLE_FMT_S32, int32_t, AV_SAMPLE_FMT_FLT, float, av_clipl_int32(llrintf(*(const float *) pi *(1U<< 31)))) CONV_FUNC_GROUP(AV_SAMPLE_FMT_U8, uint8_t, AV_SAMPLE_FMT_DBL, double, av_clip_uint8(lrint(*(const double *) pi *(1<< 7))+0x80)) CONV_FUNC_GROUP(AV_SAMPLE_FMT_S16, int16_t, AV_SAMPLE_FMT_DBL, double, av_clip_int16(lrint(*(const double *) pi *(1<< 15)))) CONV_FUNC_GROUP(AV_SAMPLE_FMT_S32, int32_t, AV_SAMPLE_FMT_DBL, double, av_clipl_int32(llrint(*(const double *) pi *(1U<< 31)))) #define SET_CONV_FUNC_GROUP(ofmt, ifmt) static void set_generic_function(AudioConvert *ac) { } void ff_audio_convert_free(AudioConvert **ac) { if(! *ac) return;ff_dither_free(&(*ac) ->dc);av_freep(ac);} AudioConvert *ff_audio_convert_alloc(AVAudioResampleContext *avr, enum AVSampleFormat out_fmt, enum AVSampleFormat in_fmt, int channels, int sample_rate, int apply_map) { AudioConvert *ac;int in_planar, out_planar;ac=av_mallocz(sizeof(*ac));if(!ac) return NULL;ac->avr=avr;ac->out_fmt=out_fmt;ac->in_fmt=in_fmt;ac->channels=channels;ac->apply_map=apply_map;if(avr->dither_method !=AV_RESAMPLE_DITHER_NONE &&av_get_packed_sample_fmt(out_fmt)==AV_SAMPLE_FMT_S16 &&av_get_bytes_per_sample(in_fmt) > 2) { ac->dc=ff_dither_alloc(avr, out_fmt, in_fmt, channels, sample_rate, apply_map);if(!ac->dc) { av_free(ac);return NULL;} return ac;} in_planar=ff_sample_fmt_is_planar(in_fmt, channels);out_planar=ff_sample_fmt_is_planar(out_fmt, channels);if(in_planar==out_planar) { ac->func_type=CONV_FUNC_TYPE_FLAT;ac->planes=in_planar ? ac->channels :1;} else if(in_planar) ac->func_type=CONV_FUNC_TYPE_INTERLEAVE;else ac->func_type=CONV_FUNC_TYPE_DEINTERLEAVE;set_generic_function(ac);if(ARCH_AARCH64) ff_audio_convert_init_aarch64(ac);if(ARCH_ARM) ff_audio_convert_init_arm(ac);if(ARCH_X86) ff_audio_convert_init_x86(ac);return ac;} int ff_audio_convert(AudioConvert *ac, AudioData *out, AudioData *in) { int use_generic=1;int len=in->nb_samples;int p;if(ac->dc) { av_log(ac->avr, AV_LOG_TRACE, "%d samples - audio_convert: %s to %s (dithered)\n", len, av_get_sample_fmt_name(ac->in_fmt), av_get_sample_fmt_name(ac->out_fmt));return ff_convert_dither(ac-> dc
uint8_t
void ff_pred16x16_plane_svq3_8_mmi(uint8_t *src, ptrdiff_t stride)
Definition: h264pred_mmi.c:981
void ff_pred16x16_dc_8_mmi(uint8_t *src, ptrdiff_t stride)
Definition: h264pred_mmi.c:97
static void pred16x16_plane_compat_mmi(uint8_t *src, int stride, const int svq3, const int rv40)
Definition: h264pred_mmi.c:746
void ff_pred8x8_dc_8_mmi(uint8_t *src, ptrdiff_t stride)
Definition: h264pred_mmi.c:576
void ff_pred16x16_horizontal_8_mmi(uint8_t *src, ptrdiff_t stride)
Definition: h264pred_mmi.c:60
void ff_pred8x8_horizontal_8_mmi(uint8_t *src, ptrdiff_t stride)
Definition: h264pred_mmi.c:491
void ff_pred8x8_top_dc_8_mmi(uint8_t *src, ptrdiff_t stride)
Definition: h264pred_mmi.c:524
void ff_pred8x16_horizontal_8_mmi(uint8_t *src, ptrdiff_t stride)
Definition: h264pred_mmi.c:713
void ff_pred4x4_dc_8_mmi(uint8_t *src, const uint8_t *topright, ptrdiff_t stride)
Definition: h264pred_mmi.c:434
void ff_pred8x8l_vertical_8_mmi(uint8_t *src, int has_topleft, int has_topright, ptrdiff_t stride)
Definition: h264pred_mmi.c:352
void ff_pred16x16_plane_rv40_8_mmi(uint8_t *src, ptrdiff_t stride)
Definition: h264pred_mmi.c:986
void ff_pred16x16_plane_h264_8_mmi(uint8_t *src, ptrdiff_t stride)
Definition: h264pred_mmi.c:976
void ff_pred8x16_vertical_8_mmi(uint8_t *src, ptrdiff_t stride)
Definition: h264pred_mmi.c:682
void ff_pred8x8l_dc_8_mmi(uint8_t *src, int has_topleft, int has_topright, ptrdiff_t stride)
Definition: h264pred_mmi.c:246
void ff_pred16x16_vertical_8_mmi(uint8_t *src, ptrdiff_t stride)
Definition: h264pred_mmi.c:30
void ff_pred8x8_vertical_8_mmi(uint8_t *src, ptrdiff_t stride)
Definition: h264pred_mmi.c:464
void ff_pred8x8l_top_dc_8_mmi(uint8_t *src, int has_topleft, int has_topright, ptrdiff_t stride)
Definition: h264pred_mmi.c:155
int stride
Definition: mace.c:144
const uint64_t ff_pw_0to3
Definition: constants.c:52
const uint64_t ff_pw_4to7
Definition: constants.c:53
const uint64_t ff_pw_2
Definition: constants.c:27
const uint64_t ff_pw_8tob
Definition: constants.c:54
const uint64_t ff_pb_1
Definition: constants.c:57
const uint64_t ff_pw_m8tom5
Definition: constants.c:48
const uint64_t ff_pw_ctof
Definition: constants.c:55
const uint64_t ff_pw_m4tom1
Definition: constants.c:49
const uint64_t ff_pw_5to8
Definition: constants.c:51
const uint64_t ff_pw_1to4
Definition: constants.c:50
static uint8_t tmp[11]
Definition: aes_ctr.c:27
#define src1
Definition: h264pred.c:140
#define src0
Definition: h264pred.c:139
#define src
Definition: vp8dsp.c:255