silx-kit · t20100 · Jun 17, 2021 · May 31, 2021 · May 31, 2021 · Jun 1, 2021
diff --git a/src/silx/opencl/test/test_doubleword.py b/src/silx/opencl/test/test_doubleword.py
@@ -32,7 +32,7 @@
 __contact__ = "[email protected]"
 __license__ = "MIT"
 __copyright__ = "European Synchrotron Radiation Facility, Grenoble, France"
-__date__ = "18/05/2021"
+__date__ = "31/05/2021"
 
 import unittest
 import numpy
@@ -99,7 +99,7 @@ def tearDownClass(cls):
     def test_fast_sum2(self):
         test_kernel = ElementwiseKernel(self.ctx,
                       "float *a, float *b, float *res_h, float *res_l",
-                      "float2 tmp = fast_sum2(a[i], b[i]); res_h[i] = tmp.s0; res_l[i] = tmp.s1",
+                      "float2 tmp = fast_fp_plus_fp(a[i], b[i]); res_h[i] = tmp.s0; res_l[i] = tmp.s1",
                       preamble=self.doubleword)
         a_g = pyopencl.array.to_device(self.queue, self.ah)
         b_g = pyopencl.array.to_device(self.queue, self.bl)
@@ -113,7 +113,7 @@ def test_fast_sum2(self):
     def test_sum2(self):
         test_kernel = ElementwiseKernel(self.ctx,
                     "float *a, float *b, float *res_h, float *res_l",
-                    "float2 tmp = sum2(a[i],b[i]); res_h[i]=tmp.s0; res_l[i]=tmp.s1;",
+                    "float2 tmp = fp_plus_fp(a[i],b[i]); res_h[i]=tmp.s0; res_l[i]=tmp.s1;",
                     preamble=self.doubleword)
         a_g = pyopencl.array.to_device(self.queue, self.ah)
         b_g = pyopencl.array.to_device(self.queue, self.bh)
@@ -127,7 +127,7 @@ def test_sum2(self):
     def test_prod2(self):
         test_kernel = ElementwiseKernel(self.ctx,
                     "float *a, float *b, float *res_h, float *res_l",
-                    "float2 tmp = prod2(a[i],b[i]); res_h[i]=tmp.s0; res_l[i]=tmp.s1;",
+                    "float2 tmp = fp_times_fp(a[i],b[i]); res_h[i]=tmp.s0; res_l[i]=tmp.s1;",
                     preamble=self.doubleword)
         a_g = pyopencl.array.to_device(self.queue, self.ah)
         b_g = pyopencl.array.to_device(self.queue, self.bh)

diff --git a/src/silx/resources/opencl/doubleword.cl b/src/silx/resources/opencl/doubleword.cl
@@ -36,15 +36,15 @@
 #endif
 
 //Algorithm 1, p23, theorem 1.1.12. Requires e_x > e_y, valid if |x| > |y| 
-inline fp2 fast_sum2(fp x, fp y){
+inline fp2 fast_fp_plus_fp(fp x, fp y){
     X87_VOLATILE fp s = x + y;
     X87_VOLATILE fp z = s - x;
     fp e = y - z;
     return (fp2)(s, e);
 }
 
-//Algorithm 2, p24, same as fast_sum2 without the condition on e_x and e_y
-inline fp2 sum2(fp x, fp y){
+//Algorithm 2, p24, same as fast_fp_plus_fp without the condition on e_x and e_y
+inline fp2 fp_plus_fp(fp x, fp y){
     X87_VOLATILE fp s = x + y;
     X87_VOLATILE fp xp = s - y;
     X87_VOLATILE fp yp = s - xp;
@@ -54,56 +54,56 @@ inline fp2 sum2(fp x, fp y){
 }
 
 //Algorithm 3, p24: multiplication with a FMA
-inline fp2 prod2(fp x, fp y){
+inline fp2 fp_times_fp(fp x, fp y){
     fp p = x * y;
     fp e = fma(x, y, -p);
     return (fp2)(p, e);  
 }
 
 //Algorithm 7, p38: Addition of a FP to a DW. 10flop bounds:2u²+5u³
 inline fp2 dw_plus_fp(fp2 x, fp y){
-    fp2 s = sum2(x.s0, y);
+    fp2 s = fp_plus_fp(x.s0, y);
     X87_VOLATILE fp v = x.s1 + s.s1;
-    return fast_sum2(s.s0, v);
+    return fast_fp_plus_fp(s.s0, v);
 }
 
 //Algorithm 9, p40: addition of two DW: 20flop bounds:3u²+13u³
 inline fp2 dw_plus_dw(fp2 x, fp2 y){
-    fp2 s = sum2(x.s0, y.s0);
-    fp2 t = sum2(x.s1, y.s1);
-    fp2 v = fast_sum2(s.s0, s.s1 + t.s0);
-    return fast_sum2(v.s0, t.s1 + v.s1);
+    fp2 s = fp_plus_fp(x.s0, y.s0);
+    fp2 t = fp_plus_fp(x.s1, y.s1);
+    fp2 v = fast_fp_plus_fp(s.s0, s.s1 + t.s0);
+    return fast_fp_plus_fp(v.s0, t.s1 + v.s1);
 }
 
 //Algorithm 12, p49: Multiplication FP*DW: 6flops bounds:2u²
 inline fp2 dw_times_fp(fp2 x, fp y){
-    fp2 c = prod2(x.s0, y);
-    return fast_sum2(c.s0, fma(x.s1, y, c.s1));
+    fp2 c = fp_times_fp(x.s0, y);
+    return fast_fp_plus_fp(c.s0, fma(x.s1, y, c.s1));
 }
 
 //Algorithm 14, p52: Multiplication DW*DW, 8 flops bounds:6u²
 inline fp2 dw_times_dw(fp2 x, fp2 y){
-    fp2 c = prod2(x.s0, y.s0);
+    fp2 c = fp_times_fp(x.s0, y.s0);
     X87_VOLATILE fp l = fma(x.s1, y.s0, x.s0 * y.s1);
-    return fast_sum2(c.s0, c.s1 + l);
+    return fast_fp_plus_fp(c.s0, c.s1 + l);
 }
 
 //Algorithm 17, p55: Division DW / FP, 10flops bounds: 3.5u²
 inline fp2 dw_div_fp(fp2 x, fp y){
     X87_VOLATILE fp th = x.s0 / y;
-    fp2 pi = prod2(th, y);
+    fp2 pi = fp_times_fp(th, y);
     fp2 d = x - pi;
     X87_VOLATILE fp delta = d.s0 + d.s1;
     X87_VOLATILE fp tl = delta/y; 
-    return fast_sum2(th, tl);
+    return fast_fp_plus_fp(th, tl);
 }
 
 //Derived from algorithm 20, p64: Inversion 1/ DW, 22 flops
 inline fp2 inv_dw(fp2 y){
     X87_VOLATILE fp th = one/y.s0;
     X87_VOLATILE fp rh = fma(-y.s0, th, one);
     X87_VOLATILE fp rl = -y.s1 * th;
-    fp2 e = fast_sum2(rh, rl);
+    fp2 e = fast_fp_plus_fp(rh, rl);
     fp2 delta = dw_times_fp(e, th);
     return dw_plus_fp(delta, th);
 }