且构网

分享程序员开发的那些事...
且构网 - 分享程序员编程开发的那些事

通过cuFFT缩放反向FFT

更新时间:2023-02-26 22:18:34

通过引入cuFFT回调函数,可以执行由cuFFT可以通过将标准化操作定义为 __ device __ 函数直接嵌入 cufftExecC2C 调用中。

With the introduction of the cuFFT callback feature, the normalization required by the inverse FFT performed by the cuFFT can be embedded directly within the cufftExecC2C call by defining the normalization operation as a __device__ function.

除了cuFFT用户指南,对于cuFFT回调功能,请参阅

Besides the cuFFT User Guide, for the cuFFT callback features, see

CUDA Pro提示:使用cuFFT回调进行自定义数据处理

下面是通过cuFFT回调实现IFFT规范化的示例。

Below is an example of implementing the IFFT normalization by cuFFT callback.

    #include <stdio.h>
    #include <assert.h>

    #include "cuda_runtime.h"
    #include "device_launch_parameters.h"

    #include <cufft.h>
    #include <cufftXt.h>

    /********************/
    /* CUDA ERROR CHECK */
    /********************/
    #define gpuErrchk(ans) { gpuAssert((ans), __FILE__, __LINE__); }
    inline void gpuAssert(cudaError_t code, char *file, int line, bool abort=true)
    {
        if (code != cudaSuccess)
        {
            fprintf(stderr,"GPUassert: %s %s %d\n", cudaGetErrorString(code), file, line);
            if (abort) exit(code);
        }
    }

    /*********************/
    /* CUFFT ERROR CHECK */
    /*********************/
    // See http://***.com/questions/16267149/cufft-error-handling
    #ifdef _CUFFT_H_
    static const char *_cudaGetErrorEnum(cufftResult error)
    {
        switch (error)
        {
            case CUFFT_SUCCESS:
                return "CUFFT_SUCCESS";

            case CUFFT_INVALID_PLAN:
                return "CUFFT_INVALID_PLAN";

            case CUFFT_ALLOC_FAILED:
                return "CUFFT_ALLOC_FAILED";

            case CUFFT_INVALID_TYPE:
                 return "CUFFT_INVALID_TYPE";

            case CUFFT_INVALID_VALUE:
                return "CUFFT_INVALID_VALUE";

            case CUFFT_INTERNAL_ERROR:
                return "CUFFT_INTERNAL_ERROR";

            case CUFFT_EXEC_FAILED:
                return "CUFFT_EXEC_FAILED";

            case CUFFT_SETUP_FAILED:
                return "CUFFT_SETUP_FAILED";

            case CUFFT_INVALID_SIZE:
                return "CUFFT_INVALID_SIZE";

            case CUFFT_UNALIGNED_DATA:
                return "CUFFT_UNALIGNED_DATA";
        }

        return "<unknown>";
    }
    #endif

    #define cufftSafeCall(err)      __cufftSafeCall(err, __FILE__, __LINE__)
    inline void __cufftSafeCall(cufftResult err, const char *file, const int line)
    {
        if( CUFFT_SUCCESS != err) {
        fprintf(stderr, "CUFFT error in file '%s', line %d\n %s\nerror %d: %s\nterminating!\n",__FILE__, __LINE__,err, \
                                _cudaGetErrorEnum(err)); \
        cudaDeviceReset(); assert(0); \
        }
    }

    __device__ void IFFT_Scaling(void *dataOut, size_t offset, cufftComplex element, void *callerInfo, void *sharedPtr) {

        float *scaling_factor = (float*)callerInfo;

        float2 output;
        output.x = cuCrealf(element);
        output.y = cuCimagf(element);

        output.x = output.x / scaling_factor[0];
        output.y = output.y / scaling_factor[0];

        ((float2*)dataOut)[offset] = output;
}

    __device__ cufftCallbackStoreC d_storeCallbackPtr = IFFT_Scaling;

    /********/
    /* MAIN */
    /********/
    int main() {

        const int N = 16;

        cufftHandle plan;

        float2 *h_input             = (float2*)malloc(N*sizeof(float2));
        float2 *h_output1           = (float2*)malloc(N*sizeof(float2));
        float2 *h_output2           = (float2*)malloc(N*sizeof(float2));

        float2 *d_input;            gpuErrchk(cudaMalloc((void**)&d_input, N*sizeof(float2)));
        float2 *d_output1;          gpuErrchk(cudaMalloc((void**)&d_output1, N*sizeof(float2)));
        float2 *d_output2;          gpuErrchk(cudaMalloc((void**)&d_output2, N*sizeof(float2)));

        float *h_scaling_factor     = (float*)malloc(sizeof(float));
        h_scaling_factor[0] = 16.0f;
        float *d_scaling_factor;    gpuErrchk(cudaMalloc((void**)&d_scaling_factor, sizeof(float)));
        gpuErrchk(cudaMemcpy(d_scaling_factor, h_scaling_factor, sizeof(float), cudaMemcpyHostToDevice));

        for (int i=0; i<N; i++) {
            h_input[i].x = 1.0f;
            h_input[i].y = 0.f;
        }

        gpuErrchk(cudaMemcpy(d_input, h_input, N*sizeof(float2), cudaMemcpyHostToDevice));

        cufftSafeCall(cufftPlan1d(&plan, N, CUFFT_C2C, 1));

        cufftSafeCall(cufftExecC2C(plan, d_input, d_output1, CUFFT_FORWARD));
        gpuErrchk(cudaMemcpy(h_output1, d_output1, N*sizeof(float2), cudaMemcpyDeviceToHost));
        for (int i=0; i<N; i++) printf("Direct transform - %d - (%f, %f)\n", i, h_output1[i].x, h_output1[i].y);

        cufftCallbackStoreC h_storeCallbackPtr;
        gpuErrchk(cudaMemcpyFromSymbol(&h_storeCallbackPtr, d_storeCallbackPtr, sizeof(h_storeCallbackPtr)));

        cufftSafeCall(cufftXtSetCallback(plan, (void **)&h_storeCallbackPtr, CUFFT_CB_ST_COMPLEX, (void **)&d_scaling_factor));

        cufftSafeCall(cufftExecC2C(plan, d_output1, d_output2, CUFFT_INVERSE));
        gpuErrchk(cudaMemcpy(h_output2, d_output2, N*sizeof(float2), cudaMemcpyDeviceToHost));
        for (int i=0; i<N; i++) printf("Inverse transform - %d - (%f, %f)\n", i, h_output2[i].x, h_output2[i].y);

        cufftSafeCall(cufftDestroy(plan));

        gpuErrchk(cudaFree(d_input));
        gpuErrchk(cudaFree(d_output1));
        gpuErrchk(cudaFree(d_output2));

        return 0;
    }

EDIT

执行回调操作的时刻由 cufftXtSetCallback 的调用 CUFFT_CB_ST_COMPLEX >。请注意,您可以使用相同的cuFFT计划加载和存储回调。

The "moment" the callback operation is performed is specified by CUFFT_CB_ST_COMPLEX in the call to cufftXtSetCallback. Notice that you can then have load and store callbacks with the same cuFFT plan.