15 年之前 · 578c319b4a
--- a/tests/experiments/latency/cuda-latency.c
+++ b/tests/experiments/latency/cuda-latency.c
@@ -19,6 +19,7 @@
 
				 #include <cuda.h>
			
 
				 #include <cuda_runtime.h>
			
 
				 
			
 
				+#include <assert.h>
			
 
				 #include <string.h>
			
 
				 #include <math.h>
			
 
				 #include <sys/types.h>
			
@@ -52,13 +53,21 @@ static cudaStream_t stream[2];
 
				 
			
 
				 void send_data(unsigned src, unsigned dst)
			
 
				 {
			
 
				+	cudaError_t cures;
			
 
				+
			
 
				 	/* Copy data from GPU to RAM */
			
 
				 #ifdef ASYNC
			
 
				-	cudaMemcpyAsync(cpu_buffer, gpu_buffer[src], buffer_size, cudaMemcpyDeviceToHost, stream[src]);
			
 
				-	cudaStreamSynchronize(stream[src]);
			
 
				+	cures = cudaMemcpyAsync(cpu_buffer, gpu_buffer[src], buffer_size, cudaMemcpyDeviceToHost, stream[src]);
			
 
				+	assert(!cures);
			
 
				+
			
 
				+	cures = cudaStreamSynchronize(stream[src]);
			
 
				+	assert(!cures);
			
 
				 #else
			
 
				-	cudaMemcpy(cpu_buffer, gpu_buffer[src], buffer_size, cudaMemcpyDeviceToHost);
			
 
				-	cudaThreadSynchronize();
			
 
				+	cures = cudaMemcpy(cpu_buffer, gpu_buffer[src], buffer_size, cudaMemcpyDeviceToHost);
			
 
				+	assert(!cures);
			
 
				+
			
 
				+	cures = cudaThreadSynchronize();
			
 
				+	assert(!cures);
			
 
				 #endif
			
 
				 
			
 
				 	/* Tell the other GPU that data is in RAM */
			
@@ -72,6 +81,8 @@ void send_data(unsigned src, unsigned dst)
 
				 
			
 
				 void recv_data(unsigned src, unsigned dst)
			
 
				 {
			
 
				+	cudaError_t cures;
			
 
				+
			
 
				 	/* Wait for the data to be in RAM */
			
 
				 	pthread_mutex_lock(&mutex_gpu);
			
 
				 	while (!data_is_available[dst])
			
@@ -83,11 +94,16 @@ void recv_data(unsigned src, unsigned dst)
 
				 
			
 
				 	/* Upload data */
			
 
				 #ifdef ASYNC
			
 
				-	cudaMemcpyAsync(gpu_buffer[dst], cpu_buffer, buffer_size, cudaMemcpyDeviceToHost, stream[dst]);
			
 
				-	cudaThreadSynchronize();
			
 
				+	cures = cudaMemcpyAsync(gpu_buffer[dst], cpu_buffer, buffer_size, cudaMemcpyDeviceToHost, stream[dst]);
			
 
				+	assert(!cures);
			
 
				+	cures = cudaThreadSynchronize();
			
 
				+	assert(!cures);
			
 
				 #else
			
 
				-	cudaMemcpy(gpu_buffer[dst], cpu_buffer, buffer_size, cudaMemcpyDeviceToHost);
			
 
				-	cudaThreadSynchronize();
			
 
				+	cures = cudaMemcpy(gpu_buffer[dst], cpu_buffer, buffer_size, cudaMemcpyHostToDevice);
			
 
				+	assert(!cures);
			
 
				+
			
 
				+	cures = cudaThreadSynchronize();
			
 
				+	assert(!cures);
			
 
				 #endif
			
 
				 }
			
 
				 
			
@@ -139,11 +155,14 @@ void *launch_gpu_thread(void *arg)
 
				 
			
 
				 int main(int argc, char **argv)
			
 
				 {
			
 
				+
			
 
				 	pthread_mutex_init(&mutex, NULL);
			
 
				 	pthread_cond_init(&cond, NULL);
			
 
				 	pthread_cond_init(&cond_go, NULL);
			
 
				 
			
 
				-	cudaHostAlloc(&cpu_buffer, buffer_size, cudaHostAllocPortable);
			
 
				+	cudaError_t cures;
			
 
				+	cures = cudaHostAlloc(&cpu_buffer, buffer_size, cudaHostAllocPortable);
			
 
				+	assert(!cures);
			
 
				 
			
 
				 	unsigned id;
			
 
				 	for (id = 0; id < 2; id++)