Greed

Здравствуйте. Понял, что не до конца понимаю структуру greed и решил написать небольшую программку, что бы просто посмотреть как что заполняется..

  1. __global__ void Kernel(int * d_result, int * d_blockID){
  2.  
  3.         int idx = blockIdx.x * blockDim.x + threadIdx.x;
  4.         d_result[idx] = threadIdx.x;
  5.         d_blockID[idx] = blockIdx.x;
  6.  
  7. }
  8.  
  9. int main(int argc, char *argv[]){
  10.         int i;
  11.         int num = 100;
  12.         int numByt = num *  sizeof (int);
  13.         int * h_threadID = new int [num];
  14.         int * h_blockID = new int [num];
  15.  
  16.         int * d_threadID;
  17.         int * d_blockID;
  18.         cudaMalloc ((void**)&d_threadID, numByt);
  19.         cudaMalloc ((void**)&d_blockID, numByt);
  20.  
  21.         dim3 threads(32, 1, 1);
  22.         dim3 blocks(4, 1);
  23.         dim3 grid(1);
  24.  
  25.         for (i = 0; i < numByt; i++){
  26.                 h_threadID[i] = 0;
  27.                 h_blockID[i] = 0;
  28.         }
  29.  
  30.         cudaMemcpy(d_threadID, h_threadID, numByt, cudaMemcpyHostToDevice);
  31.         cudaMemcpy(d_blockID, h_blockID, numByt, cudaMemcpyHostToDevice);
  32.  
  33.         Kernel<<<blocks, threads>>>(d_threadID, d_blockID);
  34.  
  35.         cudaMemcpy(h_threadID, d_threadID, numByt, cudaMemcpyDeviceToHost);
  36.         cudaMemcpy(h_blockID, d_blockID, numByt, cudaMemcpyDeviceToHost);
  37.  
  38.         FILE *fp;
  39.  
  40.         if ((fp=freopen("out", "w", stdout)) == NULL){
  41.                 printf("He удается открыть файл.\n");
  42.                 exit(1);
  43.         }
  44.         for (i = 0; i < numByt; i++){
  45.                 printf("threadID[%d] = %d, blockID = %d\n",i, h_threadID[i], h_blockID[i]);
  46.         }
  47.         fclose(fp);
  48. }

Все абсолютно просто.. нить передает свой индекс и блок ID.. Но тут с удивлением обнаружил, что в результате получаю:

threadID[0] = 0, blockID = 0
threadID[1] = 1, blockID = 0
threadID[2] = 2, blockID = 0
threadID[3] = 3, blockID = 0
threadID[4] = 4, blockID = 0
threadID[5] = 5, blockID = 0
threadID[6] = 6, blockID = 0
threadID[7] = 7, blockID = 0
threadID[8] = 8, blockID = 0
threadID[9] = 9, blockID = 0
threadID[10] = 10, blockID = 0
threadID[11] = 11, blockID = 0
threadID[12] = 12, blockID = 0
threadID[13] = 13, blockID = 0
threadID[14] = 14, blockID = 0
threadID[15] = 15, blockID = 0
threadID[16] = 16, blockID = 0
threadID[17] = 17, blockID = 0
threadID[18] = 18, blockID = 0
threadID[19] = 19, blockID = 0
threadID[20] = 20, blockID = 0
threadID[21] = 21, blockID = 0
threadID[22] = 22, blockID = 0
threadID[23] = 23, blockID = 0
threadID[24] = 24, blockID = 0
threadID[25] = 25, blockID = 0
threadID[26] = 26, blockID = 0
threadID[27] = 27, blockID = 0
threadID[28] = 28, blockID = 0
threadID[29] = 29, blockID = 0
threadID[30] = 30, blockID = 0
threadID[31] = 31, blockID = 0
threadID[32] = 0, blockID = 1
threadID[33] = 1, blockID = 1
threadID[34] = 2, blockID = 1
threadID[35] = 3, blockID = 1
threadID[36] = 4, blockID = 1
threadID[37] = 5, blockID = 1
threadID[38] = 6, blockID = 1
threadID[39] = 7, blockID = 1
threadID[40] = 8, blockID = 1
threadID[41] = 9, blockID = 1
threadID[42] = 10, blockID = 1
threadID[43] = 11, blockID = 1
threadID[44] = 12, blockID = 1
threadID[45] = 13, blockID = 1
threadID[46] = 14, blockID = 1
threadID[47] = 15, blockID = 1
threadID[48] = 16, blockID = 1
threadID[48] = 16, blockID = 1
threadID[49] = 17, blockID = 1
threadID[50] = 18, blockID = 1
threadID[51] = 19, blockID = 1
threadID[52] = 20, blockID = 1
threadID[53] = 21, blockID = 1
threadID[54] = 22, blockID = 1
threadID[55] = 23, blockID = 1
threadID[56] = 24, blockID = 1
threadID[57] = 25, blockID = 1
threadID[58] = 26, blockID = 1
threadID[59] = 27, blockID = 1
threadID[60] = 28, blockID = 1
threadID[61] = 29, blockID = 1
threadID[62] = 30, blockID = 1
threadID[63] = 31, blockID = 1
threadID[64] = 0, blockID = 2
threadID[65] = 1, blockID = 2
threadID[66] = 2, blockID = 2
threadID[67] = 3, blockID = 2
threadID[68] = 4, blockID = 2
threadID[69] = 5, blockID = 2
threadID[70] = 6, blockID = 2
threadID[71] = 7, blockID = 2
threadID[72] = 8, blockID = 2
threadID[73] = 9, blockID = 2
threadID[74] = 10, blockID = 2
threadID[75] = 11, blockID = 2
threadID[76] = 12, blockID = 2
threadID[77] = 13, blockID = 2
threadID[78] = 14, blockID = 2
threadID[79] = 15, blockID = 2
threadID[80] = 16, blockID = 2
threadID[81] = 17, blockID = 2
threadID[82] = 18, blockID = 2
threadID[83] = 19, blockID = 2
threadID[84] = 20, blockID = 2
threadID[85] = 21, blockID = 2
threadID[86] = 22, blockID = 2
threadID[87] = 23, blockID = 2
threadID[88] = 24, blockID = 2
threadID[89] = 25, blockID = 2
threadID[90] = 26, blockID = 2
threadID[91] = 27, blockID = 2
threadID[92] = 28, blockID = 2
threadID[93] = 29, blockID = 2
threadID[94] = 30, blockID = 2
threadID[95] = 31, blockID = 2
threadID[96] = 0, blockID = 3
threadID[97] = 1, blockID = 3
threadID[98] = 2, blockID = 3
threadID[99] = 3, blockID = 3
threadID[100] = 0, blockID = 0
threadID[101] = 0, blockID = 0
threadID[102] = 0, blockID = 0
threadID[103] = 0, blockID = 0
threadID[104] = 0, blockID = 0
threadID[105] = 0, blockID = 0

После 100 элемента, ничего не выводиться... а...ну еще появляются не понятные 1, 2 и 3 с 136 по 203 элемент... Может кто-нибудь объяснить подобное поведение?

Forums: 

constants

11. int num = 100;

и

21. dim3 threads(32, 1, 1);
22. dim3 blocks(4, 1);

P.S. О, Captcha появилась...