#include #include "repeat.h" #include "cuda_runtime.h" #define SM_BASE 16 //the SM number of the device #define ILP 8 #define OVERHEAD 36 //the overhead (latency cycles) of a clock() and a suncthreads() #define SMEM_SIZE 4096 //SMEM_SIZE=threadNum*ILP __noinline__ __device__ unsigned int get_smid(void) { unsigned int ret; asm("mov.u32 %0, %smid;":"=r"(ret) ); return ret; } __global__ void KernelReadSharedMem(unsigned int *duration, int *data, unsigned long int *data2){ __shared__ int sData_1[SMEM_SIZE]; __shared__ int sData_2[SMEM_SIZE]; int i; for (i=threadIdx.x; i>>(d_duration, d_data, d_timeStamp); cudaThreadSynchronize (); //copy back result cudaMemcpy( (void*) h_duration, (void*) d_duration, sizeof(unsigned int) * Db.x * Dg.x, cudaMemcpyDeviceToHost); cudaThreadSynchronize (); cudaMemcpy( (void*) h_timeStamp, (void*) d_timeStamp, sizeof(unsigned long int) * Db.x * Dg.x, cudaMemcpyDeviceToHost); cudaThreadSynchronize (); cudaMemcpy( (void*) h_data, (void*) d_data, sizeof(int) * Db.x * Dg.x, cudaMemcpyDeviceToHost); cudaThreadSynchronize (); //print result, SM id, latency ///int m = Dg.x/SM_BASE; unsigned long maxStamp = 0, minStamp = ULONG_MAX; int j; for(j = 0; j < 16; j++) { maxStamp = 0; minStamp = ULONG_MAX; int warpNum = 0; for(i=0; i maxStamp) maxStamp = h_timeStamp[i] + h_duration[i]; warpNum++; } } printf("%d %d %d %f\n", j, threadNum, blockNum, 8*1.279*ILP*warpNum*32/(maxStamp-minStamp-OVERHEAD)); } //free the memory space cudaFree(d_duration);cudaFree(d_data);cudaFree(d_timeStamp); free(h_duration); free(h_data); free(h_timeStamp); cudaDeviceReset(); } int main(){ //printf("Shared Memory Throughput:\n"); //printf("BlockNum\t ThreadNum\t SM_id \tThroughputPerWarp(GB/s)\t TimeStamp\n"); printf("BlockNum\tThreadNum\tSM_id\tDuration\tstart\tend\n"); int blockNum, i, currentThreadNum; int threadNum[6] ={32, 64, 128, 256 ,512}; for(i=0; i<5; i++){ currentThreadNum = threadNum[i]; for (blockNum=SM_BASE; blockNum <= 6*SM_BASE ; blockNum+=SM_BASE){ ReadSharedMem(currentThreadNum, blockNum); //printf("\n\n"); } } return 0; }