ENG  RUSTimus Online Judge
Online Judge
Problems
Authors
Online contests
About Online Judge
Frequently asked questions
Site news
Webboard
Links
Problem set
Submit solution
Judge status
Guide
Register
Update your info
Authors ranklist
Current contest
Scheduled contests
Past contests
Rules
back to board

Discussion of Problem 1000. A+B Problem

A + B
Posted by GiorgiNV 25 Mar 2012 21:22
// Nvidia CUDA Driver API 4.1
//========================
#include <cuda.h>
#include <stdio.h>
#include <device_launch_parameters.h>
#include <Windows.h>

//========================
#pragma comment(lib,"cuda.lib")

//========================
int wmain()
{
    //
    CUdevice     cuDev;
    CUcontext    cuCtx;
    CUmodule     cuModl;
    CUfunction   cuFunc;
    //
    CUdeviceptr  pDmem[3];
    CUstream     stream[4];
    CUevent      start;
    CUevent      end;
    size_t       paramsize;
    void*        kernelparams[4];
    float        time;
    //
    cuInit(0);
    cuDeviceGet(&cuDev,0);
    cuCtxCreate(&cuCtx,CU_CTX_SCHED_AUTO,cuDev);
    //
    cuEventCreate(&start,0);
    cuEventCreate(&end,0);
    //
    for(int i=0; i<4; i++)
    {
        if(cuStreamCreate(&stream[i],0))
        {
            return 0;
        }
    }
    //
    cuModuleLoad(&cuModl,"matrixtile.ptx");
    cuModuleGetFunction(&cuFunc,cuModl,"add");
    //
    for(int i=0; i<3; i++)
    {
        if(cuMemAlloc(&pDmem[i],sizeof(int)))
        {
            return 0;
        }
    }

    int a = 1;
    int b = 8;
    int r;
    cuEventRecord(start,0);
    if(cuMemcpyHtoDAsync(pDmem[0],(void*)&a,sizeof(int),stream[0]))
    {
        return 0;
    }
    if(cuMemcpyHtoDAsync(pDmem[1],(void*)&b,sizeof(int),stream[1]))
    {
        return 0;
    }
    if(cuMemcpyHtoDAsync(pDmem[2],(void*)&r,sizeof(int),stream[2]))
    {
        return 0;
    }
    //
    paramsize = sizeof(pDmem);
    kernelparams[0] = &pDmem[0];
    kernelparams[1] = &pDmem[1];
    kernelparams[2] = &pDmem[2];
    kernelparams[3] = &paramsize;

    //
    cuLaunchKernel(cuFunc,1,1,1,1,1,1,0,stream[3],kernelparams,0);
    cuEventRecord(end,0);
    cuEventSynchronize(end);
    cuEventElapsedTime(&time,start,end);
    wprintf(L"Time = %.7f\n\n",time);
    //
    cuMemcpyDtoHAsync(&r,pDmem[2],sizeof(int),stream[3]);
    cuCtxSynchronize();
    //
    cuModuleUnload(cuModl);
    cuCtxDestroy(cuCtx);
    wprintf(L"result = %d\n",r);
    //
    ReadFile(GetStdHandle(STD_INPUT_HANDLE),0,0,0,0);
    return 0;
}

Time = 0.0076 milliseconds

Edited by author 25.03.2012 21:23
Re: A + B
Posted by Giorgi Pataraia [Tbilisi SU] 25 Mar 2012 22:22
you're genius bro ;) :D nice solution :)
Re: A + B
Posted by GiorgiNV 25 Mar 2012 22:27
:D