diff --git a/Samples/src/CustomPi/CustomPi.ml b/Samples/src/CustomPi/CustomPi.ml index e97672d..3180b7e 100644 --- a/Samples/src/CustomPi/CustomPi.ml +++ b/Samples/src/CustomPi/CustomPi.ml @@ -34,6 +34,10 @@ let ray = 10.0;; kernel gpuPi : point Spoc.Vector.vcustom -> Spoc.Vector.vint32 -> int -> float -> unit = "kernels/CustomPi" "pi" +kernel gpuPi_complex : Spoc.Vector.vcomplex32 -> Spoc.Vector.vint32 -> int -> float -> unit = "kernels/CustomPi" "pi" + + + kernel gpuPi_double : point Spoc.Vector.vcustom -> Spoc.Vector.vint32 -> int -> float -> unit = "kernels/CustomPi" "pi_double" @@ -54,7 +58,46 @@ let gpuPi_d dev (block,grid) (gpuField, vbool, nbPoint, ray)= (gpuField, vbool, nbPoint, (ray *. ray)) ;; + +let gpuPi_complex vbool dev size = + let gpuField = Vector.create complex32 size in + for i = 0 to size - 1 do + gpuField.[] <- {re = Random.float ray; im = Random.float ray;}; + done; + let threadsPerBlock = match dev.Devices.specific_info with + | Devices.OpenCLInfo clI -> + (match clI.Devices.device_type with + | Devices.CL_DEVICE_TYPE_CPU -> 1 + | _ -> 256) + | _ -> 256 in + let blocksPerGrid = + (((size / 32) + threadsPerBlock) - 1) / threadsPerBlock in + let block = + { + Spoc.Kernel.blockX = threadsPerBlock; + Spoc.Kernel.blockY = 1; + Spoc.Kernel.blockZ = 1; + } + and grid = + { + Spoc.Kernel.gridX = blocksPerGrid; + Spoc.Kernel.gridY = 1; + Spoc.Kernel.gridZ = 1; + } + in + Spoc.Kernel.run dev (block,grid) gpuPi_complex + (gpuField,vbool, nbPoint, (ray *. ray)); + let pio4 = + Int32.to_int + (Tools.fold_left (fun a b -> Int32.add a b) Int32.zero vbool) in + Printf.printf "GPU Complex Computation : PI = %d/%d = %.10g\n" pio4 size + ((4. *. (float pio4)) /. (float size)); + Pervasives.flush stdout; +;; + let gpuPI gpuField vbool dev size = + + let threadsPerBlock = match dev.Devices.specific_info with | Devices.OpenCLInfo clI -> (match clI.Devices.device_type with @@ -156,5 +199,9 @@ let _ = Printf.printf "GPU Computation : PI = %d/%d = %.10g\n" !pio4 !size ((4. *. (float !pio4)) /. (float !size)); Pervasives.flush stdout; - )))) + Spoc.Mem.auto_transfers true; + gpuPi_complex vbool !dev !size; + )))) + + diff --git a/Spoc/extension/kernels_ext.ml b/Spoc/extension/kernels_ext.ml index e1bc33c..d545f85 100644 --- a/Spoc/extension/kernels_ext.ml +++ b/Spoc/extension/kernels_ext.ml @@ -298,7 +298,7 @@ let rec parseInvTyp t i= | TyId (_, id) when (String.compare (string_of_ident id) "acc_acc_Spoc_Vector_vcomplex32") = 0 -> incr idx; - arg_string := "Spoc.Kernel.VComplex32 "^i^" "^(!arg_string); + arg_string := "Spoc.Kernel.VComplex32 "^i^" "^(!arg_string); arg_string2 :=i^" "^(!arg_string2) | TyId (_, id) when (String.compare (string_of_ident id) "acc_acc_Spoc_Vector_vint32") = 0 -> @@ -520,6 +520,7 @@ let relaxed _loc nv = | TyId (l, id) when ((string_of_ident id) = "acc_acc_Spoc_Vector_vchar") -> relaxed _loc nv | TyId (l, id) when ((string_of_ident id) = "acc_acc_Spoc_Vector_vbool") -> relaxed _loc nv | TyId (l, id) when ((string_of_ident id) = "acc_acc_Spoc_Vector_vcustom") -> relaxed _loc nv + | TyId (l, id) when ((string_of_ident id) = "acc_acc_Spoc_Vector_vcomplex32") -> relaxed _loc nv | TyApp (_loc, t1, t2) -> gen_inv_id t1 _loc nv | _ -> (ExId (_loc,