arm_min_f32.c 9.3 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363
  1. /* ----------------------------------------------------------------------
  2. * Project: CMSIS DSP Library
  3. * Title: arm_min_f32.c
  4. * Description: Minimum value of a floating-point vector
  5. *
  6. * $Date: 18. March 2019
  7. * $Revision: V1.6.0
  8. *
  9. * Target Processor: Cortex-M cores
  10. * -------------------------------------------------------------------- */
  11. /*
  12. * Copyright (C) 2010-2019 ARM Limited or its affiliates. All rights reserved.
  13. *
  14. * SPDX-License-Identifier: Apache-2.0
  15. *
  16. * Licensed under the Apache License, Version 2.0 (the License); you may
  17. * not use this file except in compliance with the License.
  18. * You may obtain a copy of the License at
  19. *
  20. * www.apache.org/licenses/LICENSE-2.0
  21. *
  22. * Unless required by applicable law or agreed to in writing, software
  23. * distributed under the License is distributed on an AS IS BASIS, WITHOUT
  24. * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  25. * See the License for the specific language governing permissions and
  26. * limitations under the License.
  27. */
  28. #include "arm_math.h"
  29. #if (defined(ARM_MATH_NEON) || defined(ARM_MATH_MVEF)) && !defined(ARM_MATH_AUTOVECTORIZE)
  30. #include <limits.h>
  31. #endif
  32. /**
  33. @ingroup groupStats
  34. */
  35. /**
  36. @defgroup Min Minimum
  37. Computes the minimum value of an array of data.
  38. The function returns both the minimum value and its position within the array.
  39. There are separate functions for floating-point, Q31, Q15, and Q7 data types.
  40. */
  41. /**
  42. @addtogroup Min
  43. @{
  44. */
  45. /**
  46. @brief Minimum value of a floating-point vector.
  47. @param[in] pSrc points to the input vector
  48. @param[in] blockSize number of samples in input vector
  49. @param[out] pResult minimum value returned here
  50. @param[out] pIndex index of minimum value returned here
  51. @return none
  52. */
  53. #if defined(ARM_MATH_MVEF) && !defined(ARM_MATH_AUTOVECTORIZE)
  54. void arm_min_f32(
  55. const float32_t * pSrc,
  56. uint32_t blockSize,
  57. float32_t * pResult,
  58. uint32_t * pIndex)
  59. {
  60. uint32_t blkCnt; /* loop counters */
  61. f32x4_t vecSrc;
  62. float32_t const *pSrcVec;
  63. f32x4_t curExtremValVec = vdupq_n_f32(F32_MAX);
  64. float32_t minValue = F32_MAX;
  65. uint32_t idx = blockSize;
  66. uint32x4_t indexVec;
  67. uint32x4_t curExtremIdxVec;
  68. float32_t tmp;
  69. mve_pred16_t p0;
  70. indexVec = vidupq_u32((uint32_t)0, 1);
  71. curExtremIdxVec = vdupq_n_u32(0);
  72. pSrcVec = (float32_t const *) pSrc;
  73. /* Compute 4 outputs at a time */
  74. blkCnt = blockSize >> 2U;
  75. while (blkCnt > 0U)
  76. {
  77. vecSrc = vldrwq_f32(pSrcVec);
  78. pSrcVec += 4;
  79. /*
  80. * Get current max per lane and current index per lane
  81. * when a max is selected
  82. */
  83. p0 = vcmpleq(vecSrc, curExtremValVec);
  84. curExtremValVec = vpselq(vecSrc, curExtremValVec, p0);
  85. curExtremIdxVec = vpselq(indexVec, curExtremIdxVec, p0);
  86. indexVec = indexVec + 4;
  87. /*
  88. * Decrement the blockSize loop counter
  89. */
  90. blkCnt--;
  91. }
  92. /*
  93. * Get min value across the vector
  94. */
  95. minValue = vminnmvq(minValue, curExtremValVec);
  96. /*
  97. * set index for lower values to max possible index
  98. */
  99. p0 = vcmpleq(curExtremValVec, minValue);
  100. indexVec = vpselq(curExtremIdxVec, vdupq_n_u32(blockSize), p0);
  101. /*
  102. * Get min index which is thus for a max value
  103. */
  104. idx = vminvq(idx, indexVec);
  105. /*
  106. * tail
  107. */
  108. blkCnt = blockSize & 0x3;
  109. while (blkCnt > 0U)
  110. {
  111. /* Initialize minVal to the next consecutive values one by one */
  112. tmp = *pSrc++;
  113. /* compare for the minimum value */
  114. if (minValue > tmp)
  115. {
  116. /* Update the minimum value and it's index */
  117. minValue = tmp;
  118. idx = blockSize - blkCnt;
  119. }
  120. blkCnt--;
  121. }
  122. /*
  123. * Save result
  124. */
  125. *pIndex = idx;
  126. *pResult = minValue;
  127. }
  128. #else
  129. #if defined(ARM_MATH_NEON) && !defined(ARM_MATH_AUTOVECTORIZE)
  130. void arm_min_f32(
  131. const float32_t * pSrc,
  132. uint32_t blockSize,
  133. float32_t * pResult,
  134. uint32_t * pIndex)
  135. {
  136. float32_t maxVal1, out; /* Temporary variables to store the output value. */
  137. uint32_t blkCnt, outIndex; /* loop counter */
  138. float32x4_t outV, srcV;
  139. float32x2_t outV2;
  140. uint32x4_t idxV;
  141. static const uint32_t indexInit[4]={4,5,6,7};
  142. static const uint32_t countVInit[4]={0,1,2,3};
  143. uint32x4_t maxIdx;
  144. uint32x4_t index;
  145. uint32x4_t delta;
  146. uint32x4_t countV;
  147. uint32x2_t countV2;
  148. maxIdx = vdupq_n_u32(ULONG_MAX);
  149. delta = vdupq_n_u32(4);
  150. index = vld1q_u32(indexInit);
  151. countV = vld1q_u32(countVInit);
  152. /* Initialise the index value to zero. */
  153. outIndex = 0U;
  154. /* Load first input value that act as reference value for comparison */
  155. if (blockSize <= 3)
  156. {
  157. out = *pSrc++;
  158. blkCnt = blockSize - 1;
  159. while (blkCnt > 0U)
  160. {
  161. /* Initialize maxVal to the next consecutive values one by one */
  162. maxVal1 = *pSrc++;
  163. /* compare for the maximum value */
  164. if (out > maxVal1)
  165. {
  166. /* Update the maximum value and it's index */
  167. out = maxVal1;
  168. outIndex = blockSize - blkCnt;
  169. }
  170. /* Decrement the loop counter */
  171. blkCnt--;
  172. }
  173. }
  174. else
  175. {
  176. outV = vld1q_f32(pSrc);
  177. pSrc += 4;
  178. /* Compute 4 outputs at a time */
  179. blkCnt = (blockSize - 4 ) >> 2U;
  180. while (blkCnt > 0U)
  181. {
  182. srcV = vld1q_f32(pSrc);
  183. pSrc += 4;
  184. idxV = vcltq_f32(srcV, outV);
  185. outV = vbslq_f32(idxV, srcV, outV );
  186. countV = vbslq_u32(idxV, index,countV );
  187. index = vaddq_u32(index,delta);
  188. /* Decrement the loop counter */
  189. blkCnt--;
  190. }
  191. outV2 = vpmin_f32(vget_low_f32(outV),vget_high_f32(outV));
  192. outV2 = vpmin_f32(outV2,outV2);
  193. out = vget_lane_f32(outV2,0);
  194. idxV = vceqq_f32(outV, vdupq_n_f32(out));
  195. countV = vbslq_u32(idxV, countV,maxIdx);
  196. countV2 = vpmin_u32(vget_low_u32(countV),vget_high_u32(countV));
  197. countV2 = vpmin_u32(countV2,countV2);
  198. outIndex = vget_lane_u32(countV2,0);
  199. /* if (blockSize - 1U) is not multiple of 4 */
  200. blkCnt = (blockSize - 4 ) % 4U;
  201. while (blkCnt > 0U)
  202. {
  203. /* Initialize maxVal to the next consecutive values one by one */
  204. maxVal1 = *pSrc++;
  205. /* compare for the maximum value */
  206. if (out > maxVal1)
  207. {
  208. /* Update the maximum value and it's index */
  209. out = maxVal1;
  210. outIndex = blockSize - blkCnt ;
  211. }
  212. /* Decrement the loop counter */
  213. blkCnt--;
  214. }
  215. }
  216. /* Store the maximum value and it's index into destination pointers */
  217. *pResult = out;
  218. *pIndex = outIndex;
  219. }
  220. #else
  221. void arm_min_f32(
  222. const float32_t * pSrc,
  223. uint32_t blockSize,
  224. float32_t * pResult,
  225. uint32_t * pIndex)
  226. {
  227. float32_t minVal, out; /* Temporary variables to store the output value. */
  228. uint32_t blkCnt, outIndex; /* Loop counter */
  229. #if defined (ARM_MATH_LOOPUNROLL) && !defined(ARM_MATH_AUTOVECTORIZE)
  230. uint32_t index; /* index of maximum value */
  231. #endif
  232. /* Initialise index value to zero. */
  233. outIndex = 0U;
  234. /* Load first input value that act as reference value for comparision */
  235. out = *pSrc++;
  236. #if defined (ARM_MATH_LOOPUNROLL) && !defined(ARM_MATH_AUTOVECTORIZE)
  237. /* Initialise index of maximum value. */
  238. index = 0U;
  239. /* Loop unrolling: Compute 4 outputs at a time */
  240. blkCnt = (blockSize - 1U) >> 2U;
  241. while (blkCnt > 0U)
  242. {
  243. /* Initialize minVal to next consecutive values one by one */
  244. minVal = *pSrc++;
  245. /* compare for the minimum value */
  246. if (out > minVal)
  247. {
  248. /* Update the minimum value and it's index */
  249. out = minVal;
  250. outIndex = index + 1U;
  251. }
  252. minVal = *pSrc++;
  253. if (out > minVal)
  254. {
  255. out = minVal;
  256. outIndex = index + 2U;
  257. }
  258. minVal = *pSrc++;
  259. if (out > minVal)
  260. {
  261. out = minVal;
  262. outIndex = index + 3U;
  263. }
  264. minVal = *pSrc++;
  265. if (out > minVal)
  266. {
  267. out = minVal;
  268. outIndex = index + 4U;
  269. }
  270. index += 4U;
  271. /* Decrement loop counter */
  272. blkCnt--;
  273. }
  274. /* Loop unrolling: Compute remaining outputs */
  275. blkCnt = (blockSize - 1U) % 4U;
  276. #else
  277. /* Initialize blkCnt with number of samples */
  278. blkCnt = (blockSize - 1U);
  279. #endif /* #if defined (ARM_MATH_LOOPUNROLL) */
  280. while (blkCnt > 0U)
  281. {
  282. /* Initialize minVal to the next consecutive values one by one */
  283. minVal = *pSrc++;
  284. /* compare for the minimum value */
  285. if (out > minVal)
  286. {
  287. /* Update the minimum value and it's index */
  288. out = minVal;
  289. outIndex = blockSize - blkCnt;
  290. }
  291. /* Decrement loop counter */
  292. blkCnt--;
  293. }
  294. /* Store the minimum value and it's index into destination pointers */
  295. *pResult = out;
  296. *pIndex = outIndex;
  297. }
  298. #endif /* #if defined(ARM_MATH_NEON) */
  299. #endif /* defined(ARM_MATH_MVEF) && !defined(ARM_MATH_AUTOVECTORIZE) */
  300. /**
  301. @} end of Min group
  302. */