63 const float* inputVector,
64 unsigned int num_points)
66 unsigned int number = 0;
68 const unsigned int quarterPoints = num_points / 4;
70 const float* inputVectorPtr = (
const float*)inputVector;
71 double* outputVectorPtr = outputVector;
75 for (; number < quarterPoints; number++) {
76 inputVal = _mm_loadu_ps(inputVectorPtr);
79 ret = _mm256_cvtps_pd(inputVal);
80 _mm256_storeu_pd(outputVectorPtr, ret);
85 number = quarterPoints * 4;
86 for (; number < num_points; number++) {
87 outputVector[number] = (double)(inputVector[number]);
97 const float* inputVector,
98 unsigned int num_points)
100 unsigned int number = 0;
102 const unsigned int quarterPoints = num_points / 4;
104 const float* inputVectorPtr = (
const float*)inputVector;
105 double* outputVectorPtr = outputVector;
109 for (; number < quarterPoints; number++) {
110 inputVal = _mm_loadu_ps(inputVectorPtr);
113 ret = _mm_cvtps_pd(inputVal);
115 _mm_storeu_pd(outputVectorPtr, ret);
116 outputVectorPtr += 2;
118 inputVal = _mm_movehl_ps(inputVal, inputVal);
120 ret = _mm_cvtps_pd(inputVal);
122 _mm_storeu_pd(outputVectorPtr, ret);
123 outputVectorPtr += 2;
126 number = quarterPoints * 4;
127 for (; number < num_points; number++) {
128 outputVector[number] = (double)(inputVector[number]);
164 const float* inputVector,
165 unsigned int num_points)
167 unsigned int number = 0;
169 const unsigned int quarterPoints = num_points / 4;
171 const float* inputVectorPtr = (
const float*)inputVector;
172 double* outputVectorPtr = outputVector;
176 for (; number < quarterPoints; number++) {
177 inputVal = _mm_load_ps(inputVectorPtr);
180 ret = _mm256_cvtps_pd(inputVal);
181 _mm256_store_pd(outputVectorPtr, ret);
183 outputVectorPtr += 4;
186 number = quarterPoints * 4;
187 for (; number < num_points; number++) {
188 outputVector[number] = (double)(inputVector[number]);
197 const float* inputVector,
198 unsigned int num_points)
200 unsigned int number = 0;
202 const unsigned int quarterPoints = num_points / 4;
204 const float* inputVectorPtr = (
const float*)inputVector;
205 double* outputVectorPtr = outputVector;
209 for (; number < quarterPoints; number++) {
210 inputVal = _mm_load_ps(inputVectorPtr);
213 ret = _mm_cvtps_pd(inputVal);
215 _mm_store_pd(outputVectorPtr, ret);
216 outputVectorPtr += 2;
218 inputVal = _mm_movehl_ps(inputVal, inputVal);
220 ret = _mm_cvtps_pd(inputVal);
222 _mm_store_pd(outputVectorPtr, ret);
223 outputVectorPtr += 2;
226 number = quarterPoints * 4;
227 for (; number < num_points; number++) {
228 outputVector[number] = (double)(inputVector[number]);