git.sur5r.net Git - freertos/blob - FreeRTOS/Demo/T-HEAD_CB2201_CDK/csi/csi_core/include/csi-gcc/csi_simd.h

   1 /*
   2  * Copyright (C) 2017 C-SKY Microsystems Co., Ltd. All rights reserved.
   3  *
   4  * Licensed under the Apache License, Version 2.0 (the "License");
   5  * you may not use this file except in compliance with the License.
   6  * You may obtain a copy of the License at
   7  *
   8  *   http://www.apache.org/licenses/LICENSE-2.0
   9  *
  10  * Unless required by applicable law or agreed to in writing, software
  11  * distributed under the License is distributed on an "AS IS" BASIS,
  12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  13  * See the License for the specific language governing permissions and
  14  * limitations under the License.
  15  */
  16
  17 /******************************************************************************
  18  * @file     csi_simd.h
  19  * @brief    CSI Single Instruction Multiple Data (SIMD) Header File for GCC.
  20  * @version  V1.0
  21  * @date     02. June 2017
  22  ******************************************************************************/
  23
  24 #ifndef _CSI_SIMD_H_
  25 #define _CSI_SIMD_H_
  26
  27
  28 /**
  29   \brief   Halfword packing instruction. Combines bits[15:0] of val1 with bits[31:16]
  30            of val2 levitated with the val3.
  31   \details Combine a halfword from one register with a halfword from another register.
  32            The second argument can be left-shifted before extraction of the halfword.
  33   \param [in]    val1   first 16-bit operands
  34   \param [in]    val2   second 16-bit operands
  35   \param [in]    val3   value for left-shifting val2. Value range [0..31].
  36   \return               the combination of halfwords.
  37   \remark
  38                  res[15:0]  = val1[15:0]              \n
  39                  res[31:16] = val2[31:16] << val3
  40  */
  41 __ALWAYS_INLINE uint32_t __PKHBT(uint32_t val1, uint32_t val2, uint32_t val3)
  42 {
  43     return ((((int32_t)(val1) << 0) & (int32_t)0x0000FFFF) | (((int32_t)(val2) << val3) & (int32_t)0xFFFF0000));
  44 }
  45
  46 /**
  47   \brief   Halfword packing instruction. Combines bits[31:16] of val1 with bits[15:0]
  48            of val2 right-shifted with the val3.
  49   \details Combine a halfword from one register with a halfword from another register.
  50            The second argument can be right-shifted before extraction of the halfword.
  51   \param [in]    val1   first 16-bit operands
  52   \param [in]    val2   second 16-bit operands
  53   \param [in]    val3   value for right-shifting val2. Value range [1..32].
  54   \return               the combination of halfwords.
  55   \remark
  56                  res[15:0]  = val2[15:0] >> val3        \n
  57                  res[31:16] = val1[31:16]
  58  */
  59 __ALWAYS_INLINE uint32_t __PKHTB(uint32_t val1, uint32_t val2, uint32_t val3)
  60 {
  61     return ((((int32_t)(val1) << 0) & (int32_t)0xFFFF0000) | (((int32_t)(val2) >> val3) & (int32_t)0x0000FFFF));
  62 }
  63
  64 /**
  65   \brief   Dual 16-bit signed saturate.
  66   \details This function saturates a signed value.
  67   \param [in]    x   two signed 16-bit values to be saturated.
  68   \param [in]    y   bit position for saturation, an integral constant expression in the range 1 to 16.
  69   \return        the sum of the absolute differences of the following bytes, added to the accumulation value:\n
  70                  the signed saturation of the low halfword in val1, saturated to the bit position specified in
  71                  val2 and returned in the low halfword of the return value.\n
  72                  the signed saturation of the high halfword in val1, saturated to the bit position specified in
  73                  val2 and returned in the high halfword of the return value.
  74  */
  75 __ALWAYS_INLINE uint32_t __SSAT16(int32_t x, const uint32_t y)
  76 {
  77     int32_t r = 0, s = 0;
  78
  79     r = __SSAT((((int32_t)x << 16) >> 16), y) & (int32_t)0x0000FFFF;
  80     s = __SSAT((((int32_t)x) >> 16), y) & (int32_t)0x0000FFFF;
  81
  82     return ((uint32_t)((s << 16) | (r)));
  83 }
  84
  85 /**
  86   \brief   Dual 16-bit unsigned saturate.
  87   \details This function enables you to saturate two signed 16-bit values to a selected unsigned range.
  88   \param [in]    x   two signed 16-bit values to be saturated.
  89   \param [in]    y   bit position for saturation, an integral constant expression in the range 1 to 16.
  90   \return        the saturation of the two signed 16-bit values, as non-negative values:
  91                  the saturation of the low halfword in val1, saturated to the bit position specified in
  92                  val2 and returned in the low halfword of the return value.\n
  93                  the saturation of the high halfword in val1, saturated to the bit position specified in
  94                  val2 and returned in the high halfword of the return value.
  95  */
  96 __ALWAYS_INLINE uint32_t __USAT16(uint32_t x, const uint32_t y)
  97 {
  98     int32_t r = 0, s = 0;
  99
 100     r = __IUSAT(((x << 16) >> 16), y) & 0x0000FFFF;
 101     s = __IUSAT(((x) >> 16), y) & 0x0000FFFF;
 102
 103     return ((s << 16) | (r));
 104 }
 105
 106 /**
 107   \brief   Quad 8-bit saturating addition.
 108   \details This function enables you to perform four 8-bit integer additions,
 109            saturating the results to the 8-bit signed integer range -2^7 <= x <= 2^7 - 1.
 110   \param [in]    x   first four 8-bit summands.
 111   \param [in]    y   second four 8-bit summands.
 112   \return        the saturated addition of the first byte of each operand in the first byte of the return value.\n
 113                  the saturated addition of the second byte of each operand in the second byte of the return value.\n
 114                  the saturated addition of the third byte of each operand in the third byte of the return value.\n
 115                  the saturated addition of the fourth byte of each operand in the fourth byte of the return value.\n
 116                  The returned results are saturated to the 8-bit signed integer range -2^7 <= x <= 2^7 - 1.
 117   \remark
 118                  res[7:0]   = val1[7:0]   + val2[7:0]        \n
 119                  res[15:8]  = val1[15:8]  + val2[15:8]       \n
 120                  res[23:16] = val1[23:16] + val2[23:16]      \n
 121                  res[31:24] = val1[31:24] + val2[31:24]
 122  */
 123 __ALWAYS_INLINE uint32_t __QADD8(uint32_t x, uint32_t y)
 124 {
 125     int32_t r, s, t, u;
 126
 127     r = __SSAT(((((int32_t)x << 24) >> 24) + (((int32_t)y << 24) >> 24)), 8) & (int32_t)0x000000FF;
 128     s = __SSAT(((((int32_t)x << 16) >> 24) + (((int32_t)y << 16) >> 24)), 8) & (int32_t)0x000000FF;
 129     t = __SSAT(((((int32_t)x <<  8) >> 24) + (((int32_t)y <<  8) >> 24)), 8) & (int32_t)0x000000FF;
 130     u = __SSAT(((((int32_t)x) >> 24) + (((int32_t)y) >> 24)), 8) & (int32_t)0x000000FF;
 131
 132     return ((uint32_t)((u << 24) | (t << 16) | (s <<  8) | (r)));
 133 }
 134
 135 /**
 136   \brief   Quad 8-bit unsigned saturating addition.
 137   \details This function enables you to perform four unsigned 8-bit integer additions,
 138            saturating the results to the 8-bit unsigned integer range 0 < x < 2^8 - 1.
 139   \param [in]    x   first four 8-bit summands.
 140   \param [in]    y   second four 8-bit summands.
 141   \return        the saturated addition of the first byte of each operand in the first byte of the return value.\n
 142                  the saturated addition of the second byte of each operand in the second byte of the return value.\n
 143                  the saturated addition of the third byte of each operand in the third byte of the return value.\n
 144                  the saturated addition of the fourth byte of each operand in the fourth byte of the return value.\n
 145                  The returned results are saturated to the 8-bit signed integer range 0 <= x <= 2^8 - 1.
 146   \remark
 147                  res[7:0]   = val1[7:0]   + val2[7:0]        \n
 148                  res[15:8]  = val1[15:8]  + val2[15:8]       \n
 149                  res[23:16] = val1[23:16] + val2[23:16]      \n
 150                  res[31:24] = val1[31:24] + val2[31:24]
 151  */
 152 __ALWAYS_INLINE uint32_t __UQADD8(uint32_t x, uint32_t y)
 153 {
 154     int32_t r, s, t, u;
 155
 156     r = __IUSAT((((x << 24) >> 24) + ((y << 24) >> 24)), 8) & 0x000000FF;
 157     s = __IUSAT((((x << 16) >> 24) + ((y << 16) >> 24)), 8) & 0x000000FF;
 158     t = __IUSAT((((x <<  8) >> 24) + ((y <<  8) >> 24)), 8) & 0x000000FF;
 159     u = __IUSAT((((x) >> 24) + ((y) >> 24)), 8) & 0x000000FF;
 160
 161     return ((u << 24) | (t << 16) | (s <<  8) | (r));
 162 }
 163
 164 /**
 165   \brief   Quad 8-bit signed addition.
 166   \details This function performs four 8-bit signed integer additions.
 167   \param [in]    x  first four 8-bit summands.
 168   \param [in]    y  second four 8-bit summands.
 169   \return        the addition of the first bytes from each operand, in the first byte of the return value.\n
 170                  the addition of the second bytes of each operand, in the second byte of the return value.\n
 171                  the addition of the third bytes of each operand, in the third byte of the return value.\n
 172                  the addition of the fourth bytes of each operand, in the fourth byte of the return value.
 173   \remark
 174                  res[7:0]   = val1[7:0]   + val2[7:0]        \n
 175                  res[15:8]  = val1[15:8]  + val2[15:8]       \n
 176                  res[23:16] = val1[23:16] + val2[23:16]      \n
 177                  res[31:24] = val1[31:24] + val2[31:24]
 178  */
 179 __ALWAYS_INLINE uint32_t __SADD8(uint32_t x, uint32_t y)
 180 {
 181     int32_t r, s, t, u;
 182
 183     r = ((((int32_t)x << 24) >> 24) + (((int32_t)y << 24) >> 24)) & (int32_t)0x000000FF;
 184     s = ((((int32_t)x << 16) >> 24) + (((int32_t)y << 16) >> 24)) & (int32_t)0x000000FF;
 185     t = ((((int32_t)x <<  8) >> 24) + (((int32_t)y <<  8) >> 24)) & (int32_t)0x000000FF;
 186     u = ((((int32_t)x) >> 24) + (((int32_t)y) >> 24)) & (int32_t)0x000000FF;
 187
 188     return ((uint32_t)((u << 24) | (t << 16) | (s <<  8) | (r)));
 189 }
 190
 191 /**
 192   \brief   Quad 8-bit unsigned addition.
 193   \details This function performs four unsigned 8-bit integer additions.
 194   \param [in]    x  first four 8-bit summands.
 195   \param [in]    y  second four 8-bit summands.
 196   \return        the addition of the first bytes from each operand, in the first byte of the return value.\n
 197                  the addition of the second bytes of each operand, in the second byte of the return value.\n
 198                  the addition of the third bytes of each operand, in the third byte of the return value.\n
 199                  the addition of the fourth bytes of each operand, in the fourth byte of the return value.
 200   \remark
 201                  res[7:0]   = val1[7:0]   + val2[7:0]        \n
 202                  res[15:8]  = val1[15:8]  + val2[15:8]       \n
 203                  res[23:16] = val1[23:16] + val2[23:16]      \n
 204                  res[31:24] = val1[31:24] + val2[31:24]
 205  */
 206 __ALWAYS_INLINE uint32_t __UADD8(uint32_t x, uint32_t y)
 207 {
 208     int32_t r, s, t, u;
 209
 210     r = (((x << 24) >> 24) + ((y << 24) >> 24)) & 0x000000FF;
 211     s = (((x << 16) >> 24) + ((y << 16) >> 24)) & 0x000000FF;
 212     t = (((x <<  8) >> 24) + ((y <<  8) >> 24)) & 0x000000FF;
 213     u = (((x) >> 24) + ((y) >> 24)) & 0x000000FF;
 214
 215     return ((u << 24) | (t << 16) | (s <<  8) | (r));
 216 }
 217
 218 /**
 219   \brief   Quad 8-bit saturating subtract.
 220   \details This function enables you to perform four 8-bit integer subtractions,
 221            saturating the results to the 8-bit signed integer range -2^7 <= x <= 2^7 - 1.
 222   \param [in]    x   first four 8-bit summands.
 223   \param [in]    y   second four 8-bit summands.
 224   \return        the subtraction of the first byte of each operand in the first byte of the return value.\n
 225                  the subtraction of the second byte of each operand in the second byte of the return value.\n
 226                  the subtraction of the third byte of each operand in the third byte of the return value.\n
 227                  the subtraction of the fourth byte of each operand in the fourth byte of the return value.\n
 228                  The returned results are saturated to the 8-bit signed integer range -2^7 <= x <= 2^7 - 1.
 229   \remark
 230                  res[7:0]   = val1[7:0]   - val2[7:0]        \n
 231                  res[15:8]  = val1[15:8]  - val2[15:8]       \n
 232                  res[23:16] = val1[23:16] - val2[23:16]      \n
 233                  res[31:24] = val1[31:24] - val2[31:24]
 234  */
 235 __ALWAYS_INLINE uint32_t __QSUB8(uint32_t x, uint32_t y)
 236 {
 237     int32_t r, s, t, u;
 238
 239     r = __SSAT(((((int32_t)x << 24) >> 24) - (((int32_t)y << 24) >> 24)), 8) & (int32_t)0x000000FF;
 240     s = __SSAT(((((int32_t)x << 16) >> 24) - (((int32_t)y << 16) >> 24)), 8) & (int32_t)0x000000FF;
 241     t = __SSAT(((((int32_t)x <<  8) >> 24) - (((int32_t)y <<  8) >> 24)), 8) & (int32_t)0x000000FF;
 242     u = __SSAT(((((int32_t)x) >> 24) - (((int32_t)y) >> 24)), 8) & (int32_t)0x000000FF;
 243
 244     return ((uint32_t)((u << 24) | (t << 16) | (s <<  8) | (r)));
 245 }
 246
 247 /**
 248   \brief   Quad 8-bit unsigned saturating subtraction.
 249   \details This function enables you to perform four unsigned 8-bit integer subtractions,
 250            saturating the results to the 8-bit unsigned integer range 0 < x < 2^8 - 1.
 251   \param [in]    x   first four 8-bit summands.
 252   \param [in]    y   second four 8-bit summands.
 253   \return        the subtraction of the first byte of each operand in the first byte of the return value.\n
 254                  the subtraction of the second byte of each operand in the second byte of the return value.\n
 255                  the subtraction of the third byte of each operand in the third byte of the return value.\n
 256                  the subtraction of the fourth byte of each operand in the fourth byte of the return value.\n
 257                  The returned results are saturated to the 8-bit unsigned integer range 0 <= x <= 2^8 - 1.
 258   \remark
 259                  res[7:0]   = val1[7:0]   - val2[7:0]        \n
 260                  res[15:8]  = val1[15:8]  - val2[15:8]       \n
 261                  res[23:16] = val1[23:16] - val2[23:16]      \n
 262                  res[31:24] = val1[31:24] - val2[31:24]
 263  */
 264 __ALWAYS_INLINE uint32_t __UQSUB8(uint32_t x, uint32_t y)
 265 {
 266     int32_t r, s, t, u;
 267
 268     r = __IUSAT((((x << 24) >> 24) - ((y << 24) >> 24)), 8) & 0x000000FF;
 269     s = __IUSAT((((x << 16) >> 24) - ((y << 16) >> 24)), 8) & 0x000000FF;
 270     t = __IUSAT((((x <<  8) >> 24) - ((y <<  8) >> 24)), 8) & 0x000000FF;
 271     u = __IUSAT((((x) >> 24) - ((y) >> 24)), 8) & 0x000000FF;
 272
 273     return ((u << 24) | (t << 16) | (s <<  8) | (r));
 274 }
 275
 276 /**
 277   \brief   Quad 8-bit signed subtraction.
 278   \details This function enables you to perform four 8-bit signed integer subtractions.
 279   \param [in]    x  first four 8-bit operands of each subtraction.
 280   \param [in]    y  second four 8-bit operands of each subtraction.
 281   \return        the subtraction of the first bytes from each operand, in the first byte of the return value.\n
 282                  the subtraction of the second bytes of each operand, in the second byte of the return value.\n
 283                  the subtraction of the third bytes of each operand, in the third byte of the return value.\n
 284                  the subtraction of the fourth bytes of each operand, in the fourth byte of the return value.
 285   \remark
 286                  res[7:0]   = val1[7:0]   - val2[7:0]        \n
 287                  res[15:8]  = val1[15:8]  - val2[15:8]       \n
 288                  res[23:16] = val1[23:16] - val2[23:16]      \n
 289                  res[31:24] = val1[31:24] - val2[31:24]
 290  */
 291 __ALWAYS_INLINE uint32_t __SSUB8(uint32_t x, uint32_t y)
 292 {
 293     int32_t r, s, t, u;
 294
 295     r = ((((int32_t)x << 24) >> 24) - (((int32_t)y << 24) >> 24)) & (int32_t)0x000000FF;
 296     s = ((((int32_t)x << 16) >> 24) - (((int32_t)y << 16) >> 24)) & (int32_t)0x000000FF;
 297     t = ((((int32_t)x <<  8) >> 24) - (((int32_t)y <<  8) >> 24)) & (int32_t)0x000000FF;
 298     u = ((((int32_t)x) >> 24) - (((int32_t)y) >> 24)) & (int32_t)0x000000FF;
 299
 300     return ((uint32_t)((u << 24) | (t << 16) | (s <<  8) | (r)));
 301 }
 302
 303 /**
 304   \brief   Quad 8-bit unsigned subtract.
 305   \details This function enables you to perform four 8-bit unsigned integer subtractions.
 306   \param [in]    x  first four 8-bit operands of each subtraction.
 307   \param [in]    y  second four 8-bit operands of each subtraction.
 308   \return        the subtraction of the first bytes from each operand, in the first byte of the return value.\n
 309                  the subtraction of the second bytes of each operand, in the second byte of the return value.\n
 310                  the subtraction of the third bytes of each operand, in the third byte of the return value.\n
 311                  the subtraction of the fourth bytes of each operand, in the fourth byte of the return value.
 312   \remark
 313                  res[7:0]   = val1[7:0]   - val2[7:0]        \n
 314                  res[15:8]  = val1[15:8]  - val2[15:8]       \n
 315                  res[23:16] = val1[23:16] - val2[23:16]      \n
 316                  res[31:24] = val1[31:24] - val2[31:24]
 317  */
 318 __ALWAYS_INLINE uint32_t __USUB8(uint32_t x, uint32_t y)
 319 {
 320     int32_t r, s, t, u;
 321
 322     r = (((x << 24) >> 24) - ((y << 24) >> 24)) & 0x000000FF;
 323     s = (((x << 16) >> 24) - ((y << 16) >> 24)) & 0x000000FF;
 324     t = (((x <<  8) >> 24) - ((y <<  8) >> 24)) & 0x000000FF;
 325     u = (((x) >> 24) - ((y) >> 24)) & 0x000000FF;
 326
 327     return ((u << 24) | (t << 16) | (s <<  8) | (r));
 328 }
 329
 330 /**
 331   \brief   Unsigned sum of quad 8-bit unsigned absolute difference.
 332   \details This function enables you to perform four unsigned 8-bit subtractions, and add the absolute values
 333            of the differences together, returning the result as a single unsigned integer.
 334   \param [in]    x  first four 8-bit operands of each subtraction.
 335   \param [in]    y  second four 8-bit operands of each subtraction.
 336   \return        the subtraction of the first bytes from each operand, in the first byte of the return value.\n
 337                  the subtraction of the second bytes of each operand, in the second byte of the return value.\n
 338                  the subtraction of the third bytes of each operand, in the third byte of the return value.\n
 339                  the subtraction of the fourth bytes of each operand, in the fourth byte of the return value.\n
 340                  The sum is returned as a single unsigned integer.
 341   \remark
 342                  absdiff1   = val1[7:0]   - val2[7:0]        \n
 343                  absdiff2   = val1[15:8]  - val2[15:8]       \n
 344                  absdiff3   = val1[23:16] - val2[23:16]      \n
 345                  absdiff4   = val1[31:24] - val2[31:24]      \n
 346                  res[31:0]  = absdiff1 + absdiff2 + absdiff3 + absdiff4
 347  */
 348 __ALWAYS_INLINE uint32_t __USAD8(uint32_t x, uint32_t y)
 349 {
 350     int32_t r, s, t, u;
 351
 352     r = (((x << 24) >> 24) - ((y << 24) >> 24)) & 0x000000FF;
 353     s = (((x << 16) >> 24) - ((y << 16) >> 24)) & 0x000000FF;
 354     t = (((x <<  8) >> 24) - ((y <<  8) >> 24)) & 0x000000FF;
 355     u = (((x) >> 24) - ((y) >> 24)) & 0x000000FF;
 356
 357     return (u + t + s + r);
 358 }
 359
 360 /**
 361   \brief   Unsigned sum of quad 8-bit unsigned absolute difference with 32-bit accumulate.
 362   \details This function enables you to perform four unsigned 8-bit subtractions, and add the absolute values
 363            of the differences to a 32-bit accumulate operand.
 364   \param [in]    x  first four 8-bit operands of each subtraction.
 365   \param [in]    y  second four 8-bit operands of each subtraction.
 366   \param [in]  sum  accumulation value.
 367   \return        the sum of the absolute differences of the following bytes, added to the accumulation value:
 368                  the subtraction of the first bytes from each operand, in the first byte of the return value.\n
 369                  the subtraction of the second bytes of each operand, in the second byte of the return value.\n
 370                  the subtraction of the third bytes of each operand, in the third byte of the return value.\n
 371                  the subtraction of the fourth bytes of each operand, in the fourth byte of the return value.
 372   \remark
 373                  absdiff1 = val1[7:0]   - val2[7:0]        \n
 374                  absdiff2 = val1[15:8]  - val2[15:8]       \n
 375                  absdiff3 = val1[23:16] - val2[23:16]      \n
 376                  absdiff4 = val1[31:24] - val2[31:24]      \n
 377                  sum = absdiff1 + absdiff2 + absdiff3 + absdiff4 \n
 378                  res[31:0] = sum[31:0] + val3[31:0]
 379  */
 380 __ALWAYS_INLINE uint32_t __USADA8(uint32_t x, uint32_t y, uint32_t sum)
 381 {
 382     int32_t r, s, t, u;
 383
 384     r = (abs(((x << 24) >> 24) - ((y << 24) >> 24))) & 0x000000FF;
 385     s = (abs(((x << 16) >> 24) - ((y << 16) >> 24))) & 0x000000FF;
 386     t = (abs(((x <<  8) >> 24) - ((y <<  8) >> 24))) & 0x000000FF;
 387     u = (abs(((x) >> 24) - ((y) >> 24))) & 0x000000FF;
 388
 389     return (u + t + s + r + sum);
 390 }
 391
 392 /**
 393   \brief   Dual 16-bit saturating addition.
 394   \details This function enables you to perform two 16-bit integer arithmetic additions in parallel,
 395            saturating the results to the 16-bit signed integer range -2^15 <= x <= 2^15 - 1.
 396   \param [in]    x   first two 16-bit summands.
 397   \param [in]    y   second two 16-bit summands.
 398   \return        the saturated addition of the low halfwords, in the low halfword of the return value.\n
 399                  the saturated addition of the high halfwords, in the high halfword of the return value.\n
 400                  The returned results are saturated to the 16-bit signed integer range -2^15 <= x <= 2^15 - 1.
 401   \remark
 402                  res[15:0]  = val1[15:0]  + val2[15:0]        \n
 403                  res[31:16] = val1[31:16] + val2[31:16]
 404  */
 405 __ALWAYS_INLINE uint32_t __QADD16(uint32_t x, uint32_t y)
 406 {
 407     int32_t r = 0, s = 0;
 408
 409     r = __SSAT(((((int32_t)x << 16) >> 16) + (((int32_t)y << 16) >> 16)), 16) & (int32_t)0x0000FFFF;
 410     s = __SSAT(((((int32_t)x) >> 16) + (((int32_t)y) >> 16)), 16) & (int32_t)0x0000FFFF;
 411
 412     return ((uint32_t)((s << 16) | (r)));
 413 }
 414
 415 /**
 416   \brief   Dual 16-bit unsigned saturating addition.
 417   \details This function enables you to perform two unsigned 16-bit integer additions, saturating
 418            the results to the 16-bit unsigned integer range 0 < x < 2^16 - 1.
 419   \param [in]    x   first two 16-bit summands.
 420   \param [in]    y   second two 16-bit summands.
 421   \return        the saturated addition of the low halfwords, in the low halfword of the return value.\n
 422                  the saturated addition of the high halfwords, in the high halfword of the return value.\n
 423                  The results are saturated to the 16-bit unsigned integer range 0 < x < 2^16 - 1.
 424   \remark
 425                  res[15:0]  = val1[15:0]  + val2[15:0]        \n
 426                  res[31:16] = val1[31:16] + val2[31:16]
 427  */
 428 __ALWAYS_INLINE uint32_t __UQADD16(uint32_t x, uint32_t y)
 429 {
 430     int32_t r = 0, s = 0;
 431
 432     r = __IUSAT((((x << 16) >> 16) + ((y << 16) >> 16)), 16) & 0x0000FFFF;
 433     s = __IUSAT((((x) >> 16) + ((y) >> 16)), 16) & 0x0000FFFF;
 434
 435     return ((s << 16) | (r));
 436 }
 437
 438 /**
 439   \brief   Dual 16-bit signed addition.
 440   \details This function enables you to perform two 16-bit signed integer additions.
 441   \param [in]    x   first two 16-bit summands.
 442   \param [in]    y   second two 16-bit summands.
 443   \return        the addition of the low halfwords in the low halfword of the return value.\n
 444                  the addition of the high halfwords in the high halfword of the return value.
 445   \remark
 446                  res[15:0]  = val1[15:0]  + val2[15:0]        \n
 447                  res[31:16] = val1[31:16] + val2[31:16]
 448  */
 449 __ALWAYS_INLINE uint32_t __SADD16(uint32_t x, uint32_t y)
 450 {
 451     int32_t r = 0, s = 0;
 452
 453     r = ((((int32_t)x << 16) >> 16) + (((int32_t)y << 16) >> 16)) & (int32_t)0x0000FFFF;
 454     s = ((((int32_t)x) >> 16) + (((int32_t)y) >> 16)) & (int32_t)0x0000FFFF;
 455
 456     return ((uint32_t)((s << 16) | (r)));
 457 }
 458
 459 /**
 460   \brief   Dual 16-bit unsigned addition
 461   \details This function enables you to perform two 16-bit unsigned integer additions.
 462   \param [in]    x   first two 16-bit summands for each addition.
 463   \param [in]    y   second two 16-bit summands for each addition.
 464   \return        the addition of the low halfwords in the low halfword of the return value.\n
 465                  the addition of the high halfwords in the high halfword of the return value.
 466   \remark
 467                  res[15:0]  = val1[15:0]  + val2[15:0]        \n
 468                  res[31:16] = val1[31:16] + val2[31:16]
 469  */
 470 __ALWAYS_INLINE uint32_t __UADD16(uint32_t x, uint32_t y)
 471 {
 472     int32_t r = 0, s = 0;
 473
 474     r = (((x << 16) >> 16) + ((y << 16) >> 16)) & 0x0000FFFF;
 475     s = (((x) >> 16) + ((y) >> 16)) & 0x0000FFFF;
 476
 477     return ((s << 16) | (r));
 478 }
 479
 480
 481 /**
 482   \brief   Dual 16-bit signed addition with halved results.
 483   \details This function enables you to perform two signed 16-bit integer additions, halving the results.
 484   \param [in]    x   first two 16-bit summands.
 485   \param [in]    y   second two 16-bit summands.
 486   \return        the halved addition of the low halfwords, in the low halfword of the return value.\n
 487                  the halved addition of the high halfwords, in the high halfword of the return value.
 488   \remark
 489                  res[15:0]  = (val1[15:0]  + val2[15:0]) >> 1        \n
 490                  res[31:16] = (val1[31:16] + val2[31:16]) >> 1
 491  */
 492 __ALWAYS_INLINE uint32_t __SHADD16(uint32_t x, uint32_t y)
 493 {
 494     int32_t r, s;
 495
 496     r = (((((int32_t)x << 16) >> 16) + (((int32_t)y << 16) >> 16)) >> 1) & (int32_t)0x0000FFFF;
 497     s = (((((int32_t)x) >> 16) + (((int32_t)y) >> 16)) >> 1) & (int32_t)0x0000FFFF;
 498
 499     return ((uint32_t)((s << 16) | (r)));
 500 }
 501
 502 /**
 503   \brief   Dual 16-bit unsigned addition with halved results.
 504   \details This function enables you to perform two unsigned 16-bit integer additions, halving the results.
 505   \param [in]    x   first two 16-bit summands.
 506   \param [in]    y   second two 16-bit summands.
 507   \return        the halved addition of the low halfwords, in the low halfword of the return value.\n
 508                  the halved addition of the high halfwords, in the high halfword of the return value.
 509   \remark
 510                  res[15:0]  = (val1[15:0]  + val2[15:0]) >> 1        \n
 511                  res[31:16] = (val1[31:16] + val2[31:16]) >> 1
 512  */
 513 __ALWAYS_INLINE uint32_t __UHADD16(uint32_t x, uint32_t y)
 514 {
 515     int32_t r, s;
 516
 517     r = ((((x << 16) >> 16) + ((y << 16) >> 16)) >> 1) & 0x0000FFFF;
 518     s = ((((x) >> 16) + ((y) >> 16)) >> 1) & 0x0000FFFF;
 519
 520     return ((s << 16) | (r));
 521 }
 522
 523 /**
 524   \brief   Quad 8-bit signed addition with halved results.
 525   \details This function enables you to perform four signed 8-bit integer additions, halving the results.
 526   \param [in]    x   first four 8-bit summands.
 527   \param [in]    y   second four 8-bit summands.
 528   \return        the halved addition of the first bytes from each operand, in the first byte of the return value.\n
 529                  the halved addition of the second bytes from each operand, in the second byte of the return value.\n
 530                  the halved addition of the third bytes from each operand, in the third byte of the return value.\n
 531                  the halved addition of the fourth bytes from each operand, in the fourth byte of the return value.
 532   \remark
 533                  res[7:0]   = (val1[7:0]   + val2[7:0]  ) >> 1    \n
 534                  res[15:8]  = (val1[15:8]  + val2[15:8] ) >> 1    \n
 535                  res[23:16] = (val1[23:16] + val2[23:16]) >> 1    \n
 536                  res[31:24] = (val1[31:24] + val2[31:24]) >> 1
 537  */
 538 __ALWAYS_INLINE uint32_t __SHADD8(uint32_t x, uint32_t y)
 539 {
 540     int32_t r, s, t, u;
 541
 542     r = (((((int32_t)x << 24) >> 24) + (((int32_t)y << 24) >> 24)) >> 1) & (int32_t)0x000000FF;
 543     s = (((((int32_t)x << 16) >> 24) + (((int32_t)y << 16) >> 24)) >> 1) & (int32_t)0x000000FF;
 544     t = (((((int32_t)x <<  8) >> 24) + (((int32_t)y <<  8) >> 24)) >> 1) & (int32_t)0x000000FF;
 545     u = (((((int32_t)x) >> 24) + (((int32_t)y) >> 24)) >> 1) & (int32_t)0x000000FF;
 546
 547     return ((uint32_t)((u << 24) | (t << 16) | (s <<  8) | (r)));
 548 }
 549
 550 /**
 551   \brief   Quad 8-bit unsigned addition with halved results.
 552   \details This function enables you to perform four unsigned 8-bit integer additions, halving the results.
 553   \param [in]    x   first four 8-bit summands.
 554   \param [in]    y   second four 8-bit summands.
 555   \return        the halved addition of the first bytes from each operand, in the first byte of the return value.\n
 556                  the halved addition of the second bytes from each operand, in the second byte of the return value.\n
 557                  the halved addition of the third bytes from each operand, in the third byte of the return value.\n
 558                  the halved addition of the fourth bytes from each operand, in the fourth byte of the return value.
 559   \remark
 560                  res[7:0]   = (val1[7:0]   + val2[7:0]  ) >> 1    \n
 561                  res[15:8]  = (val1[15:8]  + val2[15:8] ) >> 1    \n
 562                  res[23:16] = (val1[23:16] + val2[23:16]) >> 1    \n
 563                  res[31:24] = (val1[31:24] + val2[31:24]) >> 1
 564  */
 565 __ALWAYS_INLINE uint32_t __UHADD8(uint32_t x, uint32_t y)
 566 {
 567     int32_t r, s, t, u;
 568
 569     r = ((((x << 24) >> 24) + ((y << 24) >> 24)) >> 1) & 0x000000FF;
 570     s = ((((x << 16) >> 24) + ((y << 16) >> 24)) >> 1) & 0x000000FF;
 571     t = ((((x <<  8) >> 24) + ((y <<  8) >> 24)) >> 1) & 0x000000FF;
 572     u = ((((x) >> 24) + ((y) >> 24)) >> 1) & 0x000000FF;
 573
 574     return ((u << 24) | (t << 16) | (s <<  8) | (r));
 575 }
 576
 577 /**
 578   \brief   Dual 16-bit saturating subtract.
 579   \details This function enables you to perform two 16-bit integer subtractions in parallel,
 580            saturating the results to the 16-bit signed integer range -2^15 <= x <= 2^15 - 1.
 581   \param [in]    x   first two 16-bit summands.
 582   \param [in]    y   second two 16-bit summands.
 583   \return        the saturated subtraction of the low halfwords, in the low halfword of the return value.\n
 584                  the saturated subtraction of the high halfwords, in the high halfword of the return value.\n
 585                  The returned results are saturated to the 16-bit signed integer range -2^15 <= x <= 2^15 - 1.
 586   \remark
 587                  res[15:0]  = val1[15:0]  - val2[15:0]        \n
 588                  res[31:16] = val1[31:16] - val2[31:16]
 589  */
 590 __ALWAYS_INLINE uint32_t __QSUB16(uint32_t x, uint32_t y)
 591 {
 592     int32_t r, s;
 593
 594     r = __SSAT(((((int32_t)x << 16) >> 16) - (((int32_t)y << 16) >> 16)), 16) & (int32_t)0x0000FFFF;
 595     s = __SSAT(((((int32_t)x) >> 16) - (((int32_t)y) >> 16)), 16) & (int32_t)0x0000FFFF;
 596
 597     return ((uint32_t)((s << 16) | (r)));
 598 }
 599
 600 /**
 601   \brief   Dual 16-bit unsigned saturating subtraction.
 602   \details This function enables you to perform two unsigned 16-bit integer subtractions,
 603            saturating the results to the 16-bit unsigned integer range 0 < x < 2^16 - 1.
 604   \param [in]    x   first two 16-bit operands for each subtraction.
 605   \param [in]    y   second two 16-bit operands for each subtraction.
 606   \return        the saturated subtraction of the low halfwords, in the low halfword of the return value.\n
 607                  the saturated subtraction of the high halfwords, in the high halfword of the return value.\n
 608                  The returned results are saturated to the 16-bit signed integer range -2^15 <= x <= 2^15 - 1.
 609   \remark
 610                  res[15:0]  = val1[15:0]  - val2[15:0]        \n
 611                  res[31:16] = val1[31:16] - val2[31:16]
 612  */
 613 __ALWAYS_INLINE uint32_t __UQSUB16(uint32_t x, uint32_t y)
 614 {
 615     int32_t r, s;
 616
 617     r = __IUSAT((((x << 16) >> 16) - ((y << 16) >> 16)), 16) & 0x0000FFFF;
 618     s = __IUSAT((((x) >> 16) - ((y) >> 16)), 16) & 0x0000FFFF;
 619
 620     return ((s << 16) | (r));
 621 }
 622
 623 /**
 624   \brief   Dual 16-bit signed subtraction.
 625   \details This function enables you to perform two 16-bit signed integer subtractions.
 626   \param [in]    x   first two 16-bit operands of each subtraction.
 627   \param [in]    y   second two 16-bit operands of each subtraction.
 628   \return        the subtraction of the low halfword in the second operand from the low
 629                  halfword in the first operand, in the low halfword of the return value. \n
 630                  the subtraction of the high halfword in the second operand from the high
 631                  halfword in the first operand, in the high halfword of the return value.
 632   \remark
 633                  res[15:0]  = val1[15:0]  - val2[15:0]        \n
 634                  res[31:16] = val1[31:16] - val2[31:16]
 635  */
 636 __ALWAYS_INLINE uint32_t __SSUB16(uint32_t x, uint32_t y)
 637 {
 638     int32_t r, s;
 639
 640     r = ((((int32_t)x << 16) >> 16) - (((int32_t)y << 16) >> 16)) & (int32_t)0x0000FFFF;
 641     s = ((((int32_t)x) >> 16) - (((int32_t)y) >> 16)) & (int32_t)0x0000FFFF;
 642
 643     return ((uint32_t)((s << 16) | (r)));
 644 }
 645
 646 /**
 647   \brief   Dual 16-bit unsigned subtract.
 648   \details This function enables you to perform two 16-bit unsigned integer subtractions.
 649   \param [in]    x   first two 16-bit operands of each subtraction.
 650   \param [in]    y   second two 16-bit operands of each subtraction.
 651   \return        the subtraction of the low halfword in the second operand from the low
 652                  halfword in the first operand, in the low halfword of the return value. \n
 653                  the subtraction of the high halfword in the second operand from the high
 654                  halfword in the first operand, in the high halfword of the return value.
 655   \remark
 656                  res[15:0]  = val1[15:0]  - val2[15:0]        \n
 657                  res[31:16] = val1[31:16] - val2[31:16]
 658  */
 659 __ALWAYS_INLINE uint32_t __USUB16(uint32_t x, uint32_t y)
 660 {
 661     int32_t r, s;
 662
 663     r = (((x << 16) >> 16) - ((y << 16) >> 16)) & 0x0000FFFF;
 664     s = (((x) >> 16) - ((y) >> 16)) & 0x0000FFFF;
 665
 666     return ((s << 16) | (r));
 667 }
 668
 669 /**
 670   \brief   Dual 16-bit signed subtraction with halved results.
 671   \details This function enables you to perform two signed 16-bit integer subtractions, halving the results.
 672   \param [in]    x   first two 16-bit summands.
 673   \param [in]    y   second two 16-bit summands.
 674   \return        the halved subtraction of the low halfwords, in the low halfword of the return value.\n
 675                  the halved subtraction of the high halfwords, in the high halfword of the return value.
 676   \remark
 677                  res[15:0]  = (val1[15:0]  - val2[15:0]) >> 1        \n
 678                  res[31:16] = (val1[31:16] - val2[31:16]) >> 1
 679  */
 680 __ALWAYS_INLINE uint32_t __SHSUB16(uint32_t x, uint32_t y)
 681 {
 682     int32_t r, s;
 683
 684     r = (((((int32_t)x << 16) >> 16) - (((int32_t)y << 16) >> 16)) >> 1) & (int32_t)0x0000FFFF;
 685     s = (((((int32_t)x) >> 16) - (((int32_t)y) >> 16)) >> 1) & (int32_t)0x0000FFFF;
 686
 687     return ((uint32_t)((s << 16) | (r)));
 688 }
 689
 690 /**
 691   \brief   Dual 16-bit unsigned subtraction with halved results.
 692   \details This function enables you to perform two unsigned 16-bit integer subtractions, halving the results.
 693   \param [in]    x   first two 16-bit summands.
 694   \param [in]    y   second two 16-bit summands.
 695   \return        the halved subtraction of the low halfwords, in the low halfword of the return value.\n
 696                  the halved subtraction of the high halfwords, in the high halfword of the return value.
 697   \remark
 698                  res[15:0]  = (val1[15:0]  - val2[15:0]) >> 1        \n
 699                  res[31:16] = (val1[31:16] - val2[31:16]) >> 1
 700  */
 701 __ALWAYS_INLINE uint32_t __UHSUB16(uint32_t x, uint32_t y)
 702 {
 703     int32_t r, s;
 704
 705     r = ((((x << 16) >> 16) - ((y << 16) >> 16)) >> 1) & 0x0000FFFF;
 706     s = ((((x) >> 16) - ((y) >> 16)) >> 1) & 0x0000FFFF;
 707
 708     return ((s << 16) | (r));
 709 }
 710
 711 /**
 712   \brief   Quad 8-bit signed addition with halved results.
 713   \details This function enables you to perform four signed 8-bit integer subtractions, halving the results.
 714   \param [in]    x   first four 8-bit summands.
 715   \param [in]    y   second four 8-bit summands.
 716   \return        the halved subtraction of the first bytes from each operand, in the first byte of the return value.\n
 717                  the halved subtraction of the second bytes from each operand, in the second byte of the return value.\n
 718                  the halved subtraction of the third bytes from each operand, in the third byte of the return value.\n
 719                  the halved subtraction of the fourth bytes from each operand, in the fourth byte of the return value.
 720   \remark
 721                  res[7:0]   = (val1[7:0]   - val2[7:0]  ) >> 1    \n
 722                  res[15:8]  = (val1[15:8]  - val2[15:8] ) >> 1    \n
 723                  res[23:16] = (val1[23:16] - val2[23:16]) >> 1    \n
 724                  res[31:24] = (val1[31:24] - val2[31:24]) >> 1
 725  */
 726 __ALWAYS_INLINE uint32_t __SHSUB8(uint32_t x, uint32_t y)
 727 {
 728     int32_t r, s, t, u;
 729
 730     r = (((((int32_t)x << 24) >> 24) - (((int32_t)y << 24) >> 24)) >> 1) & (int32_t)0x000000FF;
 731     s = (((((int32_t)x << 16) >> 24) - (((int32_t)y << 16) >> 24)) >> 1) & (int32_t)0x000000FF;
 732     t = (((((int32_t)x <<  8) >> 24) - (((int32_t)y <<  8) >> 24)) >> 1) & (int32_t)0x000000FF;
 733     u = (((((int32_t)x) >> 24) - (((int32_t)y) >> 24)) >> 1) & (int32_t)0x000000FF;
 734
 735     return ((uint32_t)((u << 24) | (t << 16) | (s <<  8) | (r)));
 736 }
 737
 738 /**
 739   \brief   Quad 8-bit unsigned subtraction with halved results.
 740   \details This function enables you to perform four unsigned 8-bit integer subtractions, halving the results.
 741   \param [in]    x   first four 8-bit summands.
 742   \param [in]    y   second four 8-bit summands.
 743   \return        the halved subtraction of the first bytes from each operand, in the first byte of the return value.\n
 744                  the halved subtraction of the second bytes from each operand, in the second byte of the return value.\n
 745                  the halved subtraction of the third bytes from each operand, in the third byte of the return value.\n
 746                  the halved subtraction of the fourth bytes from each operand, in the fourth byte of the return value.
 747   \remark
 748                  res[7:0]   = (val1[7:0]   - val2[7:0]  ) >> 1    \n
 749                  res[15:8]  = (val1[15:8]  - val2[15:8] ) >> 1    \n
 750                  res[23:16] = (val1[23:16] - val2[23:16]) >> 1    \n
 751                  res[31:24] = (val1[31:24] - val2[31:24]) >> 1
 752  */
 753 __ALWAYS_INLINE uint32_t __UHSUB8(uint32_t x, uint32_t y)
 754 {
 755     int32_t r, s, t, u;
 756
 757     r = ((((x << 24) >> 24) - ((y << 24) >> 24)) >> 1) & 0x000000FF;
 758     s = ((((x << 16) >> 24) - ((y << 16) >> 24)) >> 1) & 0x000000FF;
 759     t = ((((x <<  8) >> 24) - ((y <<  8) >> 24)) >> 1) & 0x000000FF;
 760     u = ((((x) >> 24) - ((y) >> 24)) >> 1) & 0x000000FF;
 761
 762     return ((u << 24) | (t << 16) | (s <<  8) | (r));
 763 }
 764
 765 /**
 766   \brief   Dual 16-bit add and subtract with exchange.
 767   \details This function enables you to exchange the halfwords of the one operand,
 768            then add the high halfwords and subtract the low halfwords,
 769            saturating the results to the 16-bit signed integer range -2^15 <= x <= 2^15 - 1.
 770   \param [in]    x   first operand for the subtraction in the low halfword,
 771                      and the first operand for the addition in the high halfword.
 772   \param [in]    y   second operand for the subtraction in the high halfword,
 773                      and the second operand for the addition in the low halfword.
 774   \return        the saturated subtraction of the high halfword in the second operand from the
 775                  low halfword in the first operand, in the low halfword of the return value.\n
 776                  the saturated addition of the high halfword in the first operand and the
 777                  low halfword in the second operand, in the high halfword of the return value.\n
 778                  The returned results are saturated to the 16-bit signed integer range -2^15 <= x <= 2^15 - 1.
 779   \remark
 780                  res[15:0]  = val1[15:0]  - val2[31:16]        \n
 781                  res[31:16] = val1[31:16] + val2[15:0]
 782  */
 783 __ALWAYS_INLINE uint32_t __QASX(uint32_t x, uint32_t y)
 784 {
 785     int32_t r, s;
 786
 787     r = __SSAT(((((int32_t)x << 16) >> 16) - (((int32_t)y) >> 16)), 16) & (int32_t)0x0000FFFF;
 788     s = __SSAT(((((int32_t)x) >> 16) + (((int32_t)y << 16) >> 16)), 16) & (int32_t)0x0000FFFF;
 789
 790     return ((uint32_t)((s << 16) | (r)));
 791 }
 792
 793 /**
 794   \brief   Dual 16-bit unsigned saturating addition and subtraction with exchange.
 795   \details This function enables you to exchange the halfwords of the second operand and
 796            perform one unsigned 16-bit integer addition and one unsigned 16-bit subtraction,
 797            saturating the results to the 16-bit unsigned integer range 0 <= x <= 2^16 - 1.
 798   \param [in]    x   first operand for the subtraction in the low halfword,
 799                      and the first operand for the addition in the high halfword.
 800   \param [in]    y   second operand for the subtraction in the high halfword,
 801                      and the second operand for the addition in the low halfword.
 802   \return        the saturated subtraction of the high halfword in the second operand from the
 803                  low halfword in the first operand, in the low halfword of the return value.\n
 804                  the saturated addition of the high halfword in the first operand and the
 805                  low halfword in the second operand, in the high halfword of the return value.\n
 806                  The returned results are saturated to the 16-bit unsigned integer range 0 <= x <= 2^16 - 1.
 807   \remark
 808                  res[15:0]  = val1[15:0]  - val2[31:16]        \n
 809                  res[31:16] = val1[31:16] + val2[15:0]
 810  */
 811 __ALWAYS_INLINE uint32_t __UQASX(uint32_t x, uint32_t y)
 812 {
 813     int32_t r, s;
 814
 815     r = __IUSAT((((x << 16) >> 16) - ((y) >> 16)), 16) & 0x0000FFFF;
 816     s = __IUSAT((((x) >> 16) + ((y << 16) >> 16)), 16) & 0x0000FFFF;
 817
 818     return ((s << 16) | (r));
 819 }
 820
 821 /**
 822   \brief   Dual 16-bit addition and subtraction with exchange.
 823   \details It enables you to exchange the halfwords of the second operand, add the high halfwords
 824            and subtract the low halfwords.
 825   \param [in]    x   first operand for the subtraction in the low halfword,
 826                      and the first operand for the addition in the high halfword.
 827   \param [in]    y   second operand for the subtraction in the high halfword,
 828                      and the second operand for the addition in the low halfword.
 829   \return        the subtraction of the high halfword in the second operand from the
 830                  low halfword in the first operand, in the low halfword of the return value.\n
 831                  the addition of the high halfword in the first operand and the
 832                  low halfword in the second operand, in the high halfword of the return value.
 833   \remark
 834                  res[15:0]  = val1[15:0]  - val2[31:16]        \n
 835                  res[31:16] = val1[31:16] + val2[15:0]
 836  */
 837 __ALWAYS_INLINE uint32_t __SASX(uint32_t x, uint32_t y)
 838 {
 839     int32_t r, s;
 840
 841     r = ((((int32_t)x << 16) >> 16) - (((int32_t)y) >> 16)) & (int32_t)0x0000FFFF;
 842     s = ((((int32_t)x) >> 16) + (((int32_t)y << 16) >> 16)) & (int32_t)0x0000FFFF;
 843
 844     return ((uint32_t)((s << 16) | (r)));
 845 }
 846
 847 /**
 848   \brief   Dual 16-bit unsigned addition and subtraction with exchange.
 849   \details This function enables you to exchange the two halfwords of the second operand,
 850            add the high halfwords and subtract the low halfwords.
 851   \param [in]    x   first operand for the subtraction in the low halfword,
 852                      and the first operand for the addition in the high halfword.
 853   \param [in]    y   second operand for the subtraction in the high halfword,
 854                      and the second operand for the addition in the low halfword.
 855   \return        the subtraction of the high halfword in the second operand from the
 856                  low halfword in the first operand, in the low halfword of the return value.\n
 857                  the addition of the high halfword in the first operand and the
 858                  low halfword in the second operand, in the high halfword of the return value.
 859   \remark
 860                  res[15:0]  = val1[15:0]  - val2[31:16]        \n
 861                  res[31:16] = val1[31:16] + val2[15:0]
 862  */
 863 __ALWAYS_INLINE uint32_t __UASX(uint32_t x, uint32_t y)
 864 {
 865     int32_t r, s;
 866
 867     r = (((x << 16) >> 16) - ((y) >> 16)) & 0x0000FFFF;
 868     s = (((x) >> 16) + ((y << 16) >> 16)) & 0x0000FFFF;
 869
 870     return ((s << 16) | (r));
 871 }
 872
 873 /**
 874   \brief   Dual 16-bit signed addition and subtraction with halved results.
 875   \details This function enables you to exchange the two halfwords of one operand, perform one
 876            signed 16-bit integer addition and one signed 16-bit subtraction, and halve the results.
 877   \param [in]    x   first 16-bit operands.
 878   \param [in]    y   second 16-bit operands.
 879   \return        the halved subtraction of the high halfword in the second operand from the
 880                  low halfword in the first operand, in the low halfword of the return value.\n
 881                  the halved addition of the low halfword in the second operand from the high
 882                  halfword in the first operand, in the high halfword of the return value.
 883   \remark
 884                  res[15:0]  = (val1[15:0]  - val2[31:16]) >> 1        \n
 885                  res[31:16] = (val1[31:16] + val2[15:0]) >> 1
 886  */
 887 __ALWAYS_INLINE uint32_t __SHASX(uint32_t x, uint32_t y)
 888 {
 889     int32_t r, s;
 890
 891     r = (((((int32_t)x << 16) >> 16) - (((int32_t)y) >> 16)) >> 1) & (int32_t)0x0000FFFF;
 892     s = (((((int32_t)x) >> 16) + (((int32_t)y << 16) >> 16)) >> 1) & (int32_t)0x0000FFFF;
 893
 894     return ((uint32_t)((s << 16) | (r)));
 895 }
 896
 897 /**
 898   \brief   Dual 16-bit unsigned addition and subtraction with halved results and exchange.
 899   \details This function enables you to exchange the halfwords of the second operand,
 900            add the high halfwords and subtract the low halfwords, halving the results.
 901   \param [in]    x   first operand for the subtraction in the low halfword, and
 902                      the first operand for the addition in the high halfword.
 903   \param [in]    y   second operand for the subtraction in the high halfword, and
 904                      the second operand for the addition in the low halfword.
 905   \return        the halved subtraction of the high halfword in the second operand from the
 906                  low halfword in the first operand, in the low halfword of the return value.\n
 907                  the halved addition of the low halfword in the second operand from the high
 908                  halfword in the first operand, in the high halfword of the return value.
 909   \remark
 910                  res[15:0]  = (val1[15:0]  - val2[31:16]) >> 1        \n
 911                  res[31:16] = (val1[31:16] + val2[15:0]) >> 1
 912  */
 913 __ALWAYS_INLINE uint32_t __UHASX(uint32_t x, uint32_t y)
 914 {
 915     int32_t r, s;
 916
 917     r = ((((x << 16) >> 16) - ((y) >> 16)) >> 1) & 0x0000FFFF;
 918     s = ((((x) >> 16) + ((y << 16) >> 16)) >> 1) & 0x0000FFFF;
 919
 920     return ((s << 16) | (r));
 921 }
 922
 923 /**
 924   \brief   Dual 16-bit subtract and add with exchange.
 925   \details This function enables you to exchange the halfwords of one operand,
 926            then subtract the high halfwords and add the low halfwords,
 927            saturating the results to the 16-bit signed integer range -2^15 <= x <= 2^15 - 1.
 928   \param [in]    x   first operand for the addition in the low halfword,
 929                      and the first operand for the subtraction in the high halfword.
 930   \param [in]    y   second operand for the addition in the high halfword,
 931                      and the second operand for the subtraction in the low halfword.
 932   \return        the saturated addition of the low halfword of the first operand and the high
 933                  halfword of the second operand, in the low halfword of the return value.\n
 934                  the saturated subtraction of the low halfword of the second operand from the
 935                  high halfword of the first operand, in the high halfword of the return value.\n
 936                  The returned results are saturated to the 16-bit signed integer range -2^15 <= x <= 2^15 - 1.
 937   \remark
 938                  res[15:0]  = val1[15:0]  + val2[31:16]        \n
 939                  res[31:16] = val1[31:16] - val2[15:0]
 940  */
 941 __ALWAYS_INLINE uint32_t __QSAX(uint32_t x, uint32_t y)
 942 {
 943     int32_t r, s;
 944
 945     r = __SSAT(((((int32_t)x << 16) >> 16) + (((int32_t)y) >> 16)), 16) & (int32_t)0x0000FFFF;
 946     s = __SSAT(((((int32_t)x) >> 16) - (((int32_t)y << 16) >> 16)), 16) & (int32_t)0x0000FFFF;
 947
 948     return ((uint32_t)((s << 16) | (r)));
 949 }
 950
 951 /**
 952   \brief   Dual 16-bit unsigned saturating subtraction and addition with exchange.
 953   \details This function enables you to exchange the halfwords of the second operand and perform
 954            one unsigned 16-bit integer subtraction and one unsigned 16-bit addition, saturating
 955            the results to the 16-bit unsigned integer range 0 <= x <= 2^16 - 1.
 956   \param [in]    x   first operand for the addition in the low halfword,
 957                      and the first operand for the subtraction in the high halfword.
 958   \param [in]    y   second operand for the addition in the high halfword,
 959                      and the second operand for the subtraction in the low halfword.
 960   \return        the saturated addition of the low halfword of the first operand and the high
 961                  halfword of the second operand, in the low halfword of the return value.\n
 962                  the saturated subtraction of the low halfword of the second operand from the
 963                  high halfword of the first operand, in the high halfword of the return value.\n
 964                  The returned results are saturated to the 16-bit unsigned integer range 0 <= x <= 2^16 - 1.
 965   \remark
 966                  res[15:0]  = val1[15:0]  + val2[31:16]        \n
 967                  res[31:16] = val1[31:16] - val2[15:0]
 968  */
 969 __ALWAYS_INLINE uint32_t __UQSAX(uint32_t x, uint32_t y)
 970 {
 971     int32_t r, s;
 972
 973     r = __IUSAT((((x << 16) >> 16) + ((y) >> 16)), 16) & 0x0000FFFF;
 974     s = __IUSAT((((x) >> 16) - ((y << 16) >> 16)), 16) & 0x0000FFFF;
 975
 976     return ((s << 16) | (r));
 977 }
 978
 979 /**
 980   \brief   Dual 16-bit unsigned subtract and add with exchange.
 981   \details This function enables you to exchange the halfwords of the second operand,
 982            subtract the high halfwords and add the low halfwords.
 983   \param [in]    x   first operand for the addition in the low halfword,
 984                      and the first operand for the subtraction in the high halfword.
 985   \param [in]    y   second operand for the addition in the high halfword,
 986                      and the second operand for the subtraction in the low halfword.
 987   \return        the addition of the low halfword of the first operand and the high
 988                  halfword of the second operand, in the low halfword of the return value.\n
 989                  the subtraction of the low halfword of the second operand from the
 990                  high halfword of the first operand, in the high halfword of the return value.\n
 991   \remark
 992                  res[15:0]  = val1[15:0]  + val2[31:16]        \n
 993                  res[31:16] = val1[31:16] - val2[15:0]
 994  */
 995 __ALWAYS_INLINE uint32_t __USAX(uint32_t x, uint32_t y)
 996 {
 997     int32_t r, s;
 998
 999     r = (((x << 16) >> 16) + ((y) >> 16)) & 0x0000FFFF;
1000     s = (((x) >> 16) - ((y << 16) >> 16)) & 0x0000FFFF;
1001
1002     return ((s << 16) | (r));
1003 }
1004
1005 /**
1006   \brief   Dual 16-bit signed subtraction and addition with exchange.
1007   \details This function enables you to exchange the two halfwords of one operand and perform one
1008            16-bit integer subtraction and one 16-bit addition.
1009   \param [in]    x   first operand for the addition in the low halfword, and the first operand
1010                      for the subtraction in the high halfword.
1011   \param [in]    y   second operand for the addition in the high halfword, and the second
1012                      operand for the subtraction in the low halfword.
1013   \return        the addition of the low halfword of the first operand and the high
1014                  halfword of the second operand, in the low halfword of the return value.\n
1015                  the subtraction of the low halfword of the second operand from the
1016                  high halfword of the first operand, in the high halfword of the return value.\n
1017   \remark
1018                  res[15:0]  = val1[15:0]  + val2[31:16]        \n
1019                  res[31:16] = val1[31:16] - val2[15:0]
1020  */
1021 __ALWAYS_INLINE uint32_t __SSAX(uint32_t x, uint32_t y)
1022 {
1023     int32_t r, s;
1024
1025     r = ((((int32_t)x << 16) >> 16) + (((int32_t)y) >> 16)) & (int32_t)0x0000FFFF;
1026     s = ((((int32_t)x) >> 16) - (((int32_t)y << 16) >> 16)) & (int32_t)0x0000FFFF;
1027
1028     return ((uint32_t)((s << 16) | (r)));
1029 }
1030
1031
1032 /**
1033   \brief   Dual 16-bit signed subtraction and addition with halved results.
1034   \details This function enables you to exchange the two halfwords of one operand, perform one signed
1035            16-bit integer subtraction and one signed 16-bit addition, and halve the results.
1036   \param [in]    x   first 16-bit operands.
1037   \param [in]    y   second 16-bit operands.
1038   \return        the halved addition of the low halfword in the first operand and the
1039                  high halfword in the second operand, in the low halfword of the return value.\n
1040                  the halved subtraction of the low halfword in the second operand from the
1041                  high halfword in the first operand, in the high halfword of the return value.
1042   \remark
1043                  res[15:0]  = (val1[15:0]  + val2[31:16]) >> 1        \n
1044                  res[31:16] = (val1[31:16] - val2[15:0]) >> 1
1045  */
1046 __ALWAYS_INLINE uint32_t __SHSAX(uint32_t x, uint32_t y)
1047 {
1048     int32_t r, s;
1049
1050     r = (((((int32_t)x << 16) >> 16) + (((int32_t)y) >> 16)) >> 1) & (int32_t)0x0000FFFF;
1051     s = (((((int32_t)x) >> 16) - (((int32_t)y << 16) >> 16)) >> 1) & (int32_t)0x0000FFFF;
1052
1053     return ((uint32_t)((s << 16) | (r)));
1054 }
1055
1056 /**
1057   \brief   Dual 16-bit unsigned subtraction and addition with halved results and exchange.
1058   \details This function enables you to exchange the halfwords of the second operand,
1059            subtract the high halfwords and add the low halfwords, halving the results.
1060   \param [in]    x   first operand for the addition in the low halfword, and
1061                      the first operand for the subtraction in the high halfword.
1062   \param [in]    y   second operand for the addition in the high halfword, and
1063                      the second operand for the subtraction in the low halfword.
1064   \return        the halved addition of the low halfword in the first operand and the
1065                  high halfword in the second operand, in the low halfword of the return value.\n
1066                  the halved subtraction of the low halfword in the second operand from the
1067                  high halfword in the first operand, in the high halfword of the return value.
1068   \remark
1069                  res[15:0]  = (val1[15:0]  + val2[31:16]) >> 1        \n
1070                  res[31:16] = (val1[31:16] - val2[15:0]) >> 1
1071  */
1072 __ALWAYS_INLINE uint32_t __UHSAX(uint32_t x, uint32_t y)
1073 {
1074     int32_t r, s;
1075
1076     r = ((((x << 16) >> 16) + ((y) >> 16)) >> 1) & 0x0000FFFF;
1077     s = ((((x) >> 16) - ((y << 16) >> 16)) >> 1) & 0x0000FFFF;
1078
1079     return ((s << 16) | (r));
1080 }
1081
1082 /**
1083   \brief   Dual 16-bit signed multiply with exchange returning difference.
1084   \details This function enables you to perform two 16-bit signed multiplications, subtracting
1085            one of the products from the other. The halfwords of the second operand are exchanged
1086            before performing the arithmetic. This produces top * bottom and bottom * top multiplication.
1087   \param [in]    x   first 16-bit operands for each multiplication.
1088   \param [in]    y   second 16-bit operands for each multiplication.
1089   \return        the difference of the products of the two 16-bit signed multiplications.
1090   \remark
1091                  p1 = val1[15:0]  * val2[31:16]       \n
1092                  p2 = val1[31:16] * val2[15:0]        \n
1093                  res[31:0] = p1 - p2
1094  */
1095 __ALWAYS_INLINE uint32_t __SMUSDX(uint32_t x, uint32_t y)
1096 {
1097     return ((uint32_t)(((((int32_t)x << 16) >> 16) * (((int32_t)y) >> 16)) -
1098                        ((((int32_t)x) >> 16) * (((int32_t)y << 16) >> 16))));
1099 }
1100
1101 /**
1102   \brief   Sum of dual 16-bit signed multiply with exchange.
1103   \details This function enables you to perform two 16-bit signed multiplications with exchanged
1104            halfwords of the second operand, adding the products together.
1105   \param [in]    x   first 16-bit operands for each multiplication.
1106   \param [in]    y   second 16-bit operands for each multiplication.
1107   \return        the sum of the products of the two 16-bit signed multiplications with exchanged halfwords of the second operand.
1108   \remark
1109                  p1 = val1[15:0]  * val2[31:16]       \n
1110                  p2 = val1[31:16] * val2[15:0]        \n
1111                  res[31:0] = p1 + p2
1112  */
1113 __ALWAYS_INLINE uint32_t __SMUADX(uint32_t x, uint32_t y)
1114 {
1115     return ((uint32_t)(((((int32_t)x << 16) >> 16) * (((int32_t)y) >> 16)) +
1116                        ((((int32_t)x) >> 16) * (((int32_t)y << 16) >> 16))));
1117 }
1118
1119
1120 /**
1121   \brief   Saturating add.
1122   \details This function enables you to obtain the saturating add of two integers.
1123   \param [in]    x   first summand of the saturating add operation.
1124   \param [in]    y   second summand of the saturating add operation.
1125   \return        the saturating addition of val1 and val2.
1126   \remark
1127                  res[31:0] = SAT(val1 + SAT(val2))
1128  */
1129 __ALWAYS_INLINE int32_t __QADD(int32_t x, int32_t y)
1130 {
1131     int32_t result;
1132
1133     if (y >= 0) {
1134         if (x + y >= x) {
1135             result = x + y;
1136         } else {
1137             result = 0x7FFFFFFF;
1138         }
1139     } else {
1140         if (x + y < x) {
1141             result = x + y;
1142         } else {
1143             result = 0x80000000;
1144         }
1145     }
1146
1147     return result;
1148 }
1149
1150 /**
1151   \brief   Saturating subtract.
1152   \details This function enables you to obtain the saturating add of two integers.
1153   \param [in]    x   first summand of the saturating add operation.
1154   \param [in]    y   second summand of the saturating add operation.
1155   \return        the saturating addition of val1 and val2.
1156   \remark
1157                  res[31:0] = SAT(val1 + SAT(val2))
1158  */
1159 __ALWAYS_INLINE int32_t __QSUB(int32_t x, int32_t y)
1160 {
1161     int64_t tmp;
1162     int32_t result;
1163
1164     tmp = (int64_t)x - (int64_t)y;
1165
1166     if (tmp > 0x7fffffff) {
1167         tmp = 0x7fffffff;
1168     } else if (tmp < (-2147483647 - 1)) {
1169         tmp = -2147483647 - 1;
1170     }
1171
1172     result = tmp;
1173     return result;
1174 }
1175
1176 /**
1177   \brief   Dual 16-bit signed multiply with single 32-bit accumulator.
1178   \details This function enables you to perform two signed 16-bit multiplications,
1179            adding both results to a 32-bit accumulate operand.
1180   \param [in]    x   first 16-bit operands for each multiplication.
1181   \param [in]    y   second 16-bit operands for each multiplication.
1182   \param [in]  sum   accumulate value.
1183   \return        the product of each multiplication added to the accumulate value, as a 32-bit integer.
1184   \remark
1185                  p1 = val1[15:0]  * val2[15:0]      \n
1186                  p2 = val1[31:16] * val2[31:16]     \n
1187                  res[31:0] = p1 + p2 + val3[31:0]
1188  */
1189 __ALWAYS_INLINE uint32_t __SMLAD(uint32_t x, uint32_t y, uint32_t sum)
1190 {
1191     return ((uint32_t)(((((int32_t)x << 16) >> 16) * (((int32_t)y << 16) >> 16)) +
1192                        ((((int32_t)x) >> 16) * (((int32_t)y) >> 16)) +
1193                        (((int32_t)sum))));
1194 }
1195
1196 /**
1197   \brief   Pre-exchanged dual 16-bit signed multiply with single 32-bit accumulator.
1198   \details This function enables you to perform two signed 16-bit multiplications with exchanged
1199            halfwords of the second operand, adding both results to a 32-bit accumulate operand.
1200   \param [in]    x   first 16-bit operands for each multiplication.
1201   \param [in]    y   second 16-bit operands for each multiplication.
1202   \param [in]  sum   accumulate value.
1203   \return        the product of each multiplication with exchanged halfwords of the second
1204                  operand added to the accumulate value, as a 32-bit integer.
1205   \remark
1206                  p1 = val1[15:0]  * val2[31:16]     \n
1207                  p2 = val1[31:16] * val2[15:0]      \n
1208                  res[31:0] = p1 + p2 + val3[31:0]
1209  */
1210 __ALWAYS_INLINE uint32_t __SMLADX(uint32_t x, uint32_t y, uint32_t sum)
1211 {
1212     return ((uint32_t)(((((int32_t)x << 16) >> 16) * (((int32_t)y) >> 16)) +
1213                        ((((int32_t)x) >> 16) * (((int32_t)y << 16) >> 16)) +
1214                        (((int32_t)sum))));
1215 }
1216
1217 /**
1218   \brief   Dual 16-bit signed multiply with exchange subtract with 32-bit accumulate.
1219   \details This function enables you to perform two 16-bit signed multiplications, take the
1220            difference of the products, subtracting the high halfword product from the low
1221            halfword product, and add the difference to a 32-bit accumulate operand.
1222   \param [in]    x   first 16-bit operands for each multiplication.
1223   \param [in]    y   second 16-bit operands for each multiplication.
1224   \param [in]  sum   accumulate value.
1225   \return        the difference of the product of each multiplication, added to the accumulate value.
1226   \remark
1227                  p1 = val1[15:0]  * val2[15:0]       \n
1228                  p2 = val1[31:16] * val2[31:16]      \n
1229                  res[31:0] = p1 - p2 + val3[31:0]
1230  */
1231 __ALWAYS_INLINE uint32_t __SMLSD(uint32_t x, uint32_t y, uint32_t sum)
1232 {
1233     return ((uint32_t)(((((int32_t)x << 16) >> 16) * (((int32_t)y << 16) >> 16)) -
1234                        ((((int32_t)x) >> 16) * (((int32_t)y) >> 16)) +
1235                        (((int32_t)sum))));
1236 }
1237
1238 /**
1239   \brief   Dual 16-bit signed multiply with exchange subtract with 32-bit accumulate.
1240   \details This function enables you to exchange the halfwords in the second operand, then perform two 16-bit
1241            signed multiplications. The difference of the products is added to a 32-bit accumulate operand.
1242   \param [in]    x   first 16-bit operands for each multiplication.
1243   \param [in]    y   second 16-bit operands for each multiplication.
1244   \param [in]  sum   accumulate value.
1245   \return        the difference of the product of each multiplication, added to the accumulate value.
1246   \remark
1247                  p1 = val1[15:0]  * val2[31:16]     \n
1248                  p2 = val1[31:16] * val2[15:0]      \n
1249                  res[31:0] = p1 - p2 + val3[31:0]
1250  */
1251 __ALWAYS_INLINE uint32_t __SMLSDX(uint32_t x, uint32_t y, uint32_t sum)
1252 {
1253     return ((uint32_t)(((((int32_t)x << 16) >> 16) * (((int32_t)y) >> 16)) -
1254                        ((((int32_t)x) >> 16) * (((int32_t)y << 16) >> 16)) +
1255                        (((int32_t)sum))));
1256 }
1257
1258 /**
1259   \brief   Dual 16-bit signed multiply with single 64-bit accumulator.
1260   \details This function enables you to perform two signed 16-bit multiplications, adding both results
1261            to a 64-bit accumulate operand. Overflow is only possible as a result of the 64-bit addition.
1262            This overflow is not detected if it occurs. Instead, the result wraps around modulo2^64.
1263   \param [in]    x   first 16-bit operands for each multiplication.
1264   \param [in]    y   second 16-bit operands for each multiplication.
1265   \param [in]  sum   accumulate value.
1266   \return        the product of each multiplication added to the accumulate value.
1267   \remark
1268                  p1 = val1[15:0]  * val2[15:0]      \n
1269                  p2 = val1[31:16] * val2[31:16]     \n
1270                  sum = p1 + p2 + val3[63:32][31:0]  \n
1271                  res[63:32] = sum[63:32]            \n
1272                  res[31:0]  = sum[31:0]
1273  */
1274 __ALWAYS_INLINE uint64_t __SMLALD(uint32_t x, uint32_t y, uint64_t sum)
1275 {
1276     return ((uint64_t)(((((int32_t)x << 16) >> 16) * (((int32_t)y << 16) >> 16)) +
1277                        ((((int32_t)x) >> 16) * (((int32_t)y) >> 16)) +
1278                        (((uint64_t)sum))));
1279 }
1280
1281 /**
1282   \brief   Dual 16-bit signed multiply with exchange with single 64-bit accumulator.
1283   \details This function enables you to exchange the halfwords of the second operand, and perform two
1284            signed 16-bit multiplications, adding both results to a 64-bit accumulate operand. Overflow
1285            is only possible as a result of the 64-bit addition. This overflow is not detected if it occurs.
1286            Instead, the result wraps around modulo2^64.
1287   \param [in]    x   first 16-bit operands for each multiplication.
1288   \param [in]    y   second 16-bit operands for each multiplication.
1289   \param [in]  sum   accumulate value.
1290   \return        the product of each multiplication added to the accumulate value.
1291   \remark
1292                  p1 = val1[15:0]  * val2[31:16]     \n
1293                  p2 = val1[31:16] * val2[15:0]      \n
1294                  sum = p1 + p2 + val3[63:32][31:0]  \n
1295                  res[63:32] = sum[63:32]            \n
1296                  res[31:0]  = sum[31:0]
1297  */
1298 __ALWAYS_INLINE uint64_t __SMLALDX(uint32_t x, uint32_t y, uint64_t sum)
1299 {
1300     return ((uint64_t)(((((int32_t)x << 16) >> 16) * (((int32_t)y) >> 16)) +
1301                        ((((int32_t)x) >> 16) * (((int32_t)y << 16) >> 16)) +
1302                        (((uint64_t)sum))));
1303 }
1304
1305 /**
1306   \brief   dual 16-bit signed multiply subtract with 64-bit accumulate.
1307   \details This function It enables you to perform two 16-bit signed multiplications, take the difference
1308            of the products, subtracting the high halfword product from the low halfword product, and add the
1309            difference to a 64-bit accumulate operand. Overflow cannot occur during the multiplications or the
1310            subtraction. Overflow can occur as a result of the 64-bit addition, and this overflow is not
1311            detected. Instead, the result wraps round to modulo2^64.
1312   \param [in]    x   first 16-bit operands for each multiplication.
1313   \param [in]    y   second 16-bit operands for each multiplication.
1314   \param [in]  sum   accumulate value.
1315   \return        the difference of the product of each multiplication, added to the accumulate value.
1316   \remark
1317                  p1 = val1[15:0]  * val2[15:0]      \n
1318                  p2 = val1[31:16] * val2[31:16]     \n
1319                  res[63:0] = p1 - p2 + val3[63:0]
1320  */
1321 __ALWAYS_INLINE uint64_t __SMLSLD(uint32_t x, uint32_t y, uint64_t sum)
1322 {
1323     return ((uint64_t)(((((int32_t)x << 16) >> 16) * (((int32_t)y << 16) >> 16)) -
1324                        ((((int32_t)x) >> 16) * (((int32_t)y) >> 16)) +
1325                        (((uint64_t)sum))));
1326 }
1327
1328 /**
1329   \brief   Dual 16-bit signed multiply with exchange subtract with 64-bit accumulate.
1330   \details This function enables you to exchange the halfwords of the second operand, perform two 16-bit multiplications,
1331            adding the difference of the products to a 64-bit accumulate operand. Overflow cannot occur during the
1332            multiplications or the subtraction. Overflow can occur as a result of the 64-bit addition, and this overflow
1333            is not detected. Instead, the result wraps round to modulo2^64.
1334   \param [in]    x   first 16-bit operands for each multiplication.
1335   \param [in]    y   second 16-bit operands for each multiplication.
1336   \param [in]  sum   accumulate value.
1337   \return        the difference of the product of each multiplication, added to the accumulate value.
1338   \remark
1339                  p1 = val1[15:0]  * val2[31:16]      \n
1340                  p2 = val1[31:16] * val2[15:0]       \n
1341                  res[63:0] = p1 - p2 + val3[63:0]
1342  */
1343 __ALWAYS_INLINE uint64_t __SMLSLDX(uint32_t x, uint32_t y, uint64_t sum)
1344 {
1345     return ((uint64_t)(((((int32_t)x << 16) >> 16) * (((int32_t)y) >> 16)) -
1346                        ((((int32_t)x) >> 16) * (((int32_t)y << 16) >> 16)) +
1347                        (((uint64_t)sum))));
1348 }
1349
1350 /**
1351   \brief   32-bit signed multiply with 32-bit truncated accumulator.
1352   \details This function enables you to perform a signed 32-bit multiplications, adding the most
1353            significant 32 bits of the 64-bit result to a 32-bit accumulate operand.
1354   \param [in]    x   first operand for multiplication.
1355   \param [in]    y   second operand for multiplication.
1356   \param [in]  sum   accumulate value.
1357   \return        the product of multiplication (most significant 32 bits) is added to the accumulate value, as a 32-bit integer.
1358   \remark
1359                  p = val1 * val2      \n
1360                  res[31:0] = p[61:32] + val3[31:0]
1361  */
1362 __ALWAYS_INLINE uint32_t __SMMLA(int32_t x, int32_t y, int32_t sum)
1363 {
1364     return (uint32_t)((int32_t)((int64_t)((int64_t)x * (int64_t)y) >> 32) + sum);
1365 }
1366
1367 /**
1368   \brief   Sum of dual 16-bit signed multiply.
1369   \details This function enables you to perform two 16-bit signed multiplications, adding the products together.
1370   \param [in]    x   first 16-bit operands for each multiplication.
1371   \param [in]    y   second 16-bit operands for each multiplication.
1372   \return        the sum of the products of the two 16-bit signed multiplications.
1373   \remark
1374                  p1 = val1[15:0]  * val2[15:0]      \n
1375                  p2 = val1[31:16] * val2[31:16]     \n
1376                  res[31:0] = p1 + p2
1377  */
1378 __ALWAYS_INLINE uint32_t __SMUAD(uint32_t x, uint32_t y)
1379 {
1380     return ((uint32_t)(((((int32_t)x << 16) >> 16) * (((int32_t)y << 16) >> 16)) +
1381                        ((((int32_t)x) >> 16) * (((int32_t)y) >> 16))));
1382 }
1383
1384 /**
1385   \brief   Dual 16-bit signed multiply returning difference.
1386   \details This function enables you to perform two 16-bit signed multiplications, taking the difference
1387            of the products by subtracting the high halfword product from the low halfword product.
1388   \param [in]    x   first 16-bit operands for each multiplication.
1389   \param [in]    y   second 16-bit operands for each multiplication.
1390   \return        the difference of the products of the two 16-bit signed multiplications.
1391   \remark
1392                  p1 = val1[15:0]  * val2[15:0]      \n
1393                  p2 = val1[31:16] * val2[31:16]     \n
1394                  res[31:0] = p1 - p2
1395  */
1396 __ALWAYS_INLINE uint32_t __SMUSD(uint32_t x, uint32_t y)
1397 {
1398     return ((uint32_t)(((((int32_t)x << 16) >> 16) * (((int32_t)y << 16) >> 16)) -
1399                        ((((int32_t)x) >> 16) * (((int32_t)y) >> 16))));
1400 }
1401
1402 /**
1403   \brief   Dual extracted 8-bit to 16-bit signed addition.
1404   \details This function enables you to extract two 8-bit values from the second operand (at bit positions
1405            [7:0] and [23:16]), sign-extend them to 16-bits each, and add the results to the first operand.
1406   \param [in]    x   values added to the sign-extended to 16-bit values.
1407   \param [in]    y   two 8-bit values to be extracted and sign-extended.
1408   \return        the addition of val1 and val2, where the 8-bit values in val2[7:0] and
1409                  val2[23:16] have been extracted and sign-extended prior to the addition.
1410   \remark
1411                  res[15:0]  = val1[15:0] + SignExtended(val2[7:0])      \n
1412                  res[31:16] = val1[31:16] + SignExtended(val2[23:16])
1413  */
1414 __ALWAYS_INLINE uint32_t __SXTAB16(uint32_t x, uint32_t y)
1415 {
1416     return ((uint32_t)((((((int32_t)y << 24) >> 24) + (((int32_t)x << 16) >> 16)) & (int32_t)0x0000FFFF) |
1417                        (((((int32_t)y <<  8) >>  8)  + (((int32_t)x >> 16) << 16)) & (int32_t)0xFFFF0000)));
1418 }
1419
1420 /**
1421   \brief   Extracted 16-bit to 32-bit unsigned addition.
1422   \details This function enables you to extract two 8-bit values from one operand, zero-extend
1423            them to 16 bits each, and add the results to two 16-bit values from another operand.
1424   \param [in]    x   values added to the zero-extended to 16-bit values.
1425   \param [in]    y   two 8-bit values to be extracted and zero-extended.
1426   \return        the addition of val1 and val2, where the 8-bit values in val2[7:0] and
1427                  val2[23:16] have been extracted and zero-extended prior to the addition.
1428   \remark
1429                  res[15:0]  = ZeroExt(val2[7:0]   to 16 bits) + val1[15:0]      \n
1430                  res[31:16] = ZeroExt(val2[31:16] to 16 bits) + val1[31:16]
1431  */
1432 __ALWAYS_INLINE uint32_t __UXTAB16(uint32_t x, uint32_t y)
1433 {
1434     return ((uint32_t)(((((y << 24) >> 24) + ((x << 16) >> 16)) & 0x0000FFFF) |
1435                        ((((y <<  8) >>  8) + ((x >> 16) << 16)) & 0xFFFF0000)));
1436 }
1437
1438 /**
1439   \brief   Dual extract 8-bits and sign extend each to 16-bits.
1440   \details This function enables you to extract two 8-bit values from an operand and sign-extend them to 16 bits each.
1441   \param [in]    x   two 8-bit values in val[7:0] and val[23:16] to be sign-extended.
1442   \return        the 8-bit values sign-extended to 16-bit values.\n
1443                  sign-extended value of val[7:0] in the low halfword of the return value.\n
1444                  sign-extended value of val[23:16] in the high halfword of the return value.
1445   \remark
1446                  res[15:0]  = SignExtended(val[7:0])       \n
1447                  res[31:16] = SignExtended(val[23:16])
1448  */
1449 __ALWAYS_INLINE uint32_t __SXTB16(uint32_t x)
1450 {
1451     return ((uint32_t)(((((int32_t)x << 24) >> 24) & (int32_t)0x0000FFFF) |
1452                        ((((int32_t)x <<  8) >>  8) & (int32_t)0xFFFF0000)));
1453 }
1454
1455 /**
1456   \brief   Dual extract 8-bits and zero-extend to 16-bits.
1457   \details This function enables you to extract two 8-bit values from an operand and zero-extend them to 16 bits each.
1458   \param [in]    x   two 8-bit values in val[7:0] and val[23:16] to be zero-extended.
1459   \return        the 8-bit values sign-extended to 16-bit values.\n
1460                  sign-extended value of val[7:0] in the low halfword of the return value.\n
1461                  sign-extended value of val[23:16] in the high halfword of the return value.
1462   \remark
1463                  res[15:0]  = SignExtended(val[7:0])       \n
1464                  res[31:16] = SignExtended(val[23:16])
1465  */
1466 __ALWAYS_INLINE uint32_t __UXTB16(uint32_t x)
1467 {
1468     return ((uint32_t)((((x << 24) >> 24) & 0x0000FFFF) |
1469                        (((x <<  8) >>  8) & 0xFFFF0000)));
1470 }
1471
1472 #endif /* _CSI_SIMD_H_ */