Chinaunix首页 | 论坛 | 博客
  • 博客访问: 9407497
  • 博文数量: 1748
  • 博客积分: 12961
  • 博客等级: 上将
  • 技术积分: 20070
  • 用 户 组: 普通用户
  • 注册时间: 2009-01-09 11:25
个人简介

偷得浮生半桶水(半日闲), 好记性不如抄下来(烂笔头). 信息爆炸的时代, 学习是一项持续的工作.

文章分类

全部博文(1748)

文章存档

2024年(24)

2023年(26)

2022年(112)

2021年(217)

2020年(157)

2019年(192)

2018年(81)

2017年(78)

2016年(70)

2015年(52)

2014年(40)

2013年(51)

2012年(85)

2011年(45)

2010年(231)

2009年(287)

分类: Android平台

2015-08-04 20:12:48


/>
OpenCL程序分成成两部分:一部分是在设备上执行的(例如GPU),另一部分是在主机上运行的。在设备上执行的程序就是实现“异构”和“并行计算”的部分。为了能在设备上执行代码,程序员需要写一个特殊的函数(kernel函数)。这个函数需要使用OpenCL语言编写。OpenCL语言采用了C语言的一部分加上一些约束、关键字和数据类型。在主机上运行的程序用OpenCL的API管理设备上运行的程序。主机程序的API用C语言编写,也有C++、Java、Python等高级语言接口。

  1. /*
  2.         gcc main.c -lOpenCL
  3. */

  4. #include <stdio.h>
  5. #include <string.h>
  6. #include <stdlib.h>
  7. #include <iostream>
  8. #include <CL/cl.h>
  9. using namespace std;

  10. void check_result(const int *buf, const int len) {
  11.     int i;
  12.     for (i = 0; i < len; i++) {
  13.         if (buf[i] != (i + 1) * 2) {
  14.             cout << "Result error!" << endl;
  15.             break;
  16.         }
  17.     }
  18.     if (i == len)
  19.     cout << "Result ok." << endl;
  20. }
  21.   
  22. void init_buf(int *buf, int len) {
  23.     int i;
  24.     for (i = 0; i < len; i++) {
  25.         buf[i] = i + 1;
  26.     }
  27. }

  28. int main(void) {
  29.     cl_int ret;
  30.     /** step 1: get platform */
  31.     cl_uint num_platforms;
  32.     ret = clGetPlatformIDs(0, NULL, &num_platforms);
  33.     // get platform number
  34.     if ((CL_SUCCESS != ret) || (num_platforms < 1)) {
  35.         cout << "Error getting platform number: " << ret << endl;
  36.         return 0;
  37.     }

  38.     cl_platform_id platform_id = NULL;
  39.     ret = clGetPlatformIDs(1, &platform_id, NULL);
  40.     // get first platform id
  41.     if (CL_SUCCESS != ret) {
  42.         cout << "Error getting platform id: " << ret << endl;
  43.         return 0;
  44.     }
  45.     /** step 2: get device */
  46.     cl_uint num_devices;
  47.     clGetDeviceIDs(platform_id, CL_DEVICE_TYPE_GPU, 0, NULL, &num_devices);
  48.     if ((CL_SUCCESS != ret) || (num_devices < 1)) {
  49.         cout << "Error getting GPU device number: " << ret << endl;
  50.         return 0;
  51.     }
  52.     cl_device_id device_id = NULL;
  53.     clGetDeviceIDs(platform_id, CL_DEVICE_TYPE_GPU, 1, &device_id, NULL);
  54.     if (CL_SUCCESS != ret) {
  55.         cout << "Error getting GPU device id: " << ret << endl;
  56.         return 0;
  57.     }

  58.     /** step 3: create context */
  59.     cl_context_properties props[] = { CL_CONTEXT_PLATFORM, (cl_context_properties)platform_id, 0 };
  60.     cl_context context = NULL;
  61.     context = clCreateContext(props, 1, &device_id, NULL, NULL, &ret);
  62.     if ((CL_SUCCESS != ret) || (NULL == context)) {
  63.         cout << "Error creating context: " << ret << endl;
  64.         return 0;
  65.     }
  66.     /** step 4: create command queue */
  67.     cl_command_queue command_queue = NULL;
  68.     command_queue = clCreateCommandQueue(context, device_id, 0, &ret);
  69.     if ((CL_SUCCESS != ret) || (NULL == command_queue)) {
  70.         cout << "Error creating command queue: " << ret << endl;
  71.         return 0;
  72.     }

  73.     /** step 5: create memory object */
  74.     cl_mem mem_obj = NULL;
  75.     int *host_buffer = NULL;
  76.     const int ARRAY_SIZE = 1000;
  77.     const int BUF_SIZE = ARRAY_SIZE * sizeof(int);
  78.     // create and init host buffer
  79.     host_buffer = (int *)malloc(BUF_SIZE);
  80.     init_buf(host_buffer, ARRAY_SIZE);
  81.     // create opencl memory object using host ptr
  82.     mem_obj = clCreateBuffer(context, CL_MEM_READ_WRITE | CL_MEM_USE_HOST_PTR, BUF_SIZE, host_buffer, &ret);
  83.     if ((CL_SUCCESS != ret) || (NULL == mem_obj)) {
  84.         cout << "Error creating command queue: " << ret << endl;
  85.         return 0;
  86.     }
  87.     /** step 6: create program */
  88.     char *kernelSource =
  89.         "__kernel void test(__global int *pInOut)\n"
  90.         "{\n"
  91.         " int index = get_global_id(0);\n"
  92.         " pInOut[index] += pInOut[index];\n"
  93.         "}\n";
  94.     cl_program program = NULL;

  95.     // create program
  96.     program = clCreateProgramWithSource(context, 1, (const char**)&kernelSource, NULL, &ret);
  97.     if ((CL_SUCCESS != ret) || (NULL == program)) {
  98.         cout << "Error creating program: " << ret << endl;
  99.         return 0;
  100.     }
  101.     // build program
  102.     ret = clBuildProgram(program, 1, &device_id, NULL, NULL, NULL);
  103.     if (CL_SUCCESS != ret) {
  104.         cout << "Error building program: " << ret << endl;
  105.         return 0;
  106.     }
  107.     /** step 7: create kernel */
  108.     cl_kernel kernel = NULL;
  109.     kernel = clCreateKernel(program, "test", &ret);
  110.     if ((CL_SUCCESS != ret) || (NULL == kernel)) {
  111.         cout << "Error creating kernel: " << ret << endl;
  112.         return 0;
  113.     }
  114.     /** step 8: set kernel arguments */
  115.     ret = clSetKernelArg(kernel, 0, sizeof(cl_mem), (void*)&mem_obj);
  116.     if (CL_SUCCESS != ret) {
  117.         cout << "Error setting kernel argument: " << ret << endl;
  118.         return 0;
  119.     }
  120.     /** step 9: set work group size */
  121.     cl_uint work_dim = 1; // in most opencl device, max dimition is 3
  122.     size_t global_work_size[] = { ARRAY_SIZE };
  123.     size_t *local_work_size = NULL; // let opencl device determine how to break work items into work groups
  124.     /** step 10: run kernel */
  125.     ret = clEnqueueNDRangeKernel(command_queue, kernel, work_dim, NULL, global_work_size, local_work_size, 0, NULL, NULL);
  126.     if (CL_SUCCESS != ret) {
  127.         cout << "Error enqueue NDRange: " << ret << endl;
  128.         return 0;
  129.     }

  130.     /** step 11: get result */
  131.     int *device_buffer = (int *)clEnqueueMapBuffer(command_queue, mem_obj, CL_TRUE, CL_MAP_READ | CL_MAP_WRITE, 0, BUF_SIZE, 0, NULL, NULL, &ret);
  132.     if ((CL_SUCCESS != ret) || (NULL == device_buffer)) {
  133.         cout << "Error map buffer: " << ret << endl;
  134.         return 0;
  135.     }
  136.     // check result
  137.     check_result(device_buffer, ARRAY_SIZE);
  138.     /** step 12: release all resources */
  139.     if (NULL != kernel) clReleaseKernel(kernel);
  140.     if (NULL != program) clReleaseProgram(program);
  141.     if (NULL != mem_obj) clReleaseMemObject(mem_obj);
  142.        if (NULL != command_queue) clReleaseCommandQueue(command_queue);
  143.     if (NULL != context) clReleaseContext(context);
  144.     if (NULL != host_buffer)    free(host_buffer);
  145.     return 0;
  146. }

阅读(2488) | 评论(0) | 转发(0) |
0

上一篇:C 中 编程技巧

下一篇: 树莓派烧写镜像

给主人留下些什么吧!~~