include import "./string.zc" import "./mem.zc" // Memory Management. /// Allocate device memory for n elements of type T. fn cuda_alloc(n: usize) -> T* { let ptr: T* = NULL; cudaMalloc((void**)&ptr, n * sizeof(T)); return ptr; } /// Free device memory. fn cuda_free(ptr: void*) { cudaFree(ptr); } /// Copy bytes from host to device. fn cuda_copy_to_device(dst: void*, src: void*, bytes: usize) { cudaMemcpy(dst, src, bytes, cudaMemcpyHostToDevice); } /// Copy bytes from device to host. fn cuda_copy_to_host(dst: void*, src: void*, bytes: usize) { cudaMemcpy(dst, src, bytes, cudaMemcpyDeviceToHost); } /// Copy bytes between device buffers. fn cuda_copy_device(dst: void*, src: void*, bytes: usize) { cudaMemcpy(dst, src, bytes, cudaMemcpyDeviceToDevice); } /// Set device memory to zero. fn cuda_zero(ptr: void*, bytes: usize) { cudaMemset(ptr, 0, bytes); } // Synchronization. /// Synchronize the device (wait for all kernels to complete). fn cuda_sync() { cudaDeviceSynchronize(); } // Thread Indexing. // Grid/Block Dimensions @device @inline fn grid_dim_x() -> int { return gridDim.x; } @device @inline fn grid_dim_y() -> int { return gridDim.y; } @device @inline fn grid_dim_z() -> int { return gridDim.z; } @device @inline fn block_dim_x() -> int { return blockDim.x; } @device @inline fn block_dim_y() -> int { return blockDim.y; } @device @inline fn block_dim_z() -> int { return blockDim.z; } // Block Indices @device @inline fn block_id_x() -> int { return blockIdx.x; } @device @inline fn block_id_y() -> int { return blockIdx.y; } @device @inline fn block_id_z() -> int { return blockIdx.z; } // Thread Indices @device @inline fn thread_id_x() -> int { return threadIdx.x; } @device @inline fn thread_id_y() -> int { return threadIdx.y; } @device @inline fn thread_id_z() -> int { return threadIdx.z; } // Convenience. @device @inline fn thread_id() -> int { return blockIdx.x * blockDim.x + threadIdx.x; } @device @inline fn block_id() -> int { return blockIdx.x; } @device @inline fn local_id() -> int { return threadIdx.x; } @device @inline fn block_size() -> int { return blockDim.x; } @device @inline fn grid_size() -> int { return gridDim.x; } // Device Info. /// Get the number of CUDA devices. fn cuda_device_count() -> int { let count: int = 0; cudaGetDeviceCount(&count); return count; } /// Set the active CUDA device. fn cuda_set_device(id: int) { cudaSetDevice(id); } // Error Handling. /// Get the last CUDA error code. fn cuda_last_error() -> int { let err: int; err = (int)cudaGetLastError(); return err; } /// Check if last CUDA operation succeeded. fn cuda_ok() -> bool { return cuda_last_error() == 0; } // Minimal raw block: required for cudaDeviceProp struct field access // The cudaDeviceProp struct cannot be declared in Zen-C without type conflicts raw { void _z_cuda_get_props(int dev, char* name, size_t* total_mem, int* sm_count, int* major, int* minor, int* max_threads, int* warp_size) { struct cudaDeviceProp prop; if (cudaGetDeviceProperties(&prop, dev) == 0) { strcpy(name, prop.name); *total_mem = prop.totalGlobalMem; *sm_count = prop.multiProcessorCount; *major = prop.major; *minor = prop.minor; *max_threads = prop.maxThreadsPerBlock; *warp_size = prop.warpSize; } } } extern fn _z_cuda_get_props(dev: int, name: char*, mem: usize*, sm: int*, maj: int*, min: int*, max_t: int*, warp: int*); struct CudaDeviceProp { name: String; total_global_mem: usize; multi_processor_count: int; major: int; minor: int; max_threads_per_block: int; warp_size: int; } struct CudaMemInfo { free: usize; total: usize; } fn cuda_device_properties(device_id: int) -> CudaDeviceProp { let mem: usize = 0; let sm: int = 0; let maj: int = 0; let min: int = 0; let max_t: int = 0; let warp: int = 0; let name_ptr = alloc_n(256); name_ptr[0] = 0; _z_cuda_get_props(device_id, name_ptr, &mem, &sm, &maj, &min, &max_t, &warp); let s = String::new(name_ptr); free(name_ptr); return CudaDeviceProp { name: s, total_global_mem: mem, multi_processor_count: sm, major: maj, minor: min, max_threads_per_block: max_t, warp_size: warp }; } fn cuda_driver_version() -> int { let d: int = 0; cudaDriverGetVersion(&d); return d; } fn cuda_runtime_version() -> int { let r: int = 0; cudaRuntimeGetVersion(&r); return r; } fn cuda_mem_info() -> CudaMemInfo { let f: usize = 0; let t: usize = 0; cudaMemGetInfo(&f, &t); return CudaMemInfo { free: f, total: t }; } fn cuda_device_reset() { cudaDeviceReset(); }