您可以尝试使用基于复指数的余弦定义:
j^2=-1.
存储exp((2 * PI*freqscale[currentcarrier] / fs)*j) 和exp(phase*j)。评估 cos(...) 然后在 for 循环中恢复到几个产品和添加,sin()、cos() 和 exp() 只被调用几次。
实现如下:
#include <stdio.h>
#include <stdlib.h>
#include <math.h>
#include <complex.h>
#include <time.h>
#define PI 3.141592653589
typedef struct cos_plan{
double complex* expo;
int size;
}cos_plan;
double phase_func(double* calibdata, long size, double* freqscale, double fs, double phase, int currentcarrier){
double result=0; //initialization
for (int i = 0; i < size; i++){
result += calibdata[i] * cos ( (2 * PI*freqscale[currentcarrier] * i / fs) + (phase*(PI / 180.) - (PI / 2.)) );
//printf("i %d cos %g\n",i,cos ( (2 * PI*freqscale[currentcarrier] * i / fs) + (phase*(PI / 180.) - (PI / 2.)) ));
}
result = fabs(result / size);
return result;
}
double phase_func2(double* calibdata, long size, double* freqscale, double fs, double phase, int currentcarrier, cos_plan* plan){
//first, let's compute the exponentials:
//double complex phaseexp=cos(phase*(PI / 180.) - (PI / 2.))+sin(phase*(PI / 180.) - (PI / 2.))*I;
//double complex phaseexpm=conj(phaseexp);
double phasesin=sin(phase*(PI / 180.) - (PI / 2.));
double phasecos=cos(phase*(PI / 180.) - (PI / 2.));
if (plan->size<size){
double complex *tmp=realloc(plan->expo,size*sizeof(double complex));
if(tmp==NULL){fprintf(stderr,"realloc failed\n");exit(1);}
plan->expo=tmp;
plan->size=size;
}
plan->expo[0]=1;
//plan->expo[1]=exp(2 *I* PI*freqscale[currentcarrier]/fs);
plan->expo[1]=cos(2 * PI*freqscale[currentcarrier]/fs)+sin(2 * PI*freqscale[currentcarrier]/fs)*I;
//printf("%g %g\n",creall(plan->expo[1]),cimagl(plan->expo[1]));
for(int i=2;i<size;i++){
if(i%2==0){
plan->expo[i]=plan->expo[i/2]*plan->expo[i/2];
}else{
plan->expo[i]=plan->expo[i/2]*plan->expo[i/2+1];
}
}
//computing the result
double result=0; //initialization
for(int i=0;i<size;i++){
//double coss=0.5*creall(plan->expo[i]*phaseexp+conj(plan->expo[i])*phaseexpm);
double coss=creall(plan->expo[i])*phasecos-cimagl(plan->expo[i])*phasesin;
//printf("i %d cos %g\n",i,coss);
result+=calibdata[i] *coss;
}
result = fabs(result / size);
return result;
}
int main(){
//the parameters
long n=100000000;
double* calibdata=malloc(n*sizeof(double));
if(calibdata==NULL){fprintf(stderr,"malloc failed\n");exit(1);}
int freqnb=42;
double* freqscale=malloc(freqnb*sizeof(double));
if(freqscale==NULL){fprintf(stderr,"malloc failed\n");exit(1);}
for (int i = 0; i < freqnb; i++){
freqscale[i]=i*i*0.007+i;
}
double fs=n;
double phase=0.05;
//populate calibdata
for (int i = 0; i < n; i++){
calibdata[i]=i/((double)n);
calibdata[i]=calibdata[i]*calibdata[i]-calibdata[i]+0.007/(calibdata[i]+3.0);
}
//call to sample code
clock_t t;
t = clock();
double res=phase_func(calibdata,n, freqscale, fs, phase, 13);
t = clock() - t;
printf("first call got %g in %g seconds.\n",res,((float)t)/CLOCKS_PER_SEC);
//initialize
cos_plan plan;
plan.expo=malloc(n*sizeof(double complex));
plan.size=n;
t = clock();
res=phase_func2(calibdata,n, freqscale, fs, phase, 13,&plan);
t = clock() - t;
printf("second call got %g in %g seconds.\n",res,((float)t)/CLOCKS_PER_SEC);
//cleaning
free(plan.expo);
free(calibdata);
free(freqscale);
return 0;
}
使用gcc main.c -o main -std=c99 -lm -Wall -O3 编译。使用您提供的代码,在我的计算机上使用 size=100000000 需要 8 秒,而 建议的解决方案的执行时间需要 1.5 秒...这并不令人印象深刻,但也不容忽视。
提出的解决方案不涉及在 for 循环中对 cos 或 sin 的任何调用。事实上,只有乘法和加法。瓶颈要么是内存带宽,要么是通过平方求幂中的测试和对内存的访问(很可能是第一个问题,因为我添加了一个额外的复数数组)。
c中的复数见:
如果问题是内存带宽,则需要并行性……直接计算cos会更容易。如果freqscale[currentcarrier] / fs 是整数,则可以进行额外的简化。您的问题非常接近Discrete Cosine Transform 的计算,目前的技巧接近离散傅里叶变换,而 FFTW 库非常擅长计算这些变换。
请注意,由于失去意义,当前代码可能会产生不准确的结果:size 很大时,result 可能比 cos(...)*calibdata[] 大得多。使用部分总和可以解决问题。