diff --git a/python/taichi/lang/common_ops.py b/python/taichi/lang/common_ops.py
new file mode 100644
index 0000000000000..c8743cfe62254
--- /dev/null
+++ b/python/taichi/lang/common_ops.py
@@ -0,0 +1,60 @@
+class TaichiOperations:
+    def __neg__(self):
+        import taichi as ti
+        return ti.neg(self)
+
+    def __abs__(self):
+        import taichi as ti
+        return ti.abs(self)
+
+    def __add__(self, other):
+        import taichi as ti
+        return ti.add(self, other)
+
+    def __radd__(self, other):
+        import taichi as ti
+        return ti.add(other, self)
+
+    def __sub__(self, other):
+        import taichi as ti
+        return ti.sub(self, other)
+
+    def __rsub__(self, other):
+        import taichi as ti
+        return ti.sub(other, self)
+
+    def __mul__(self, other):
+        import taichi as ti
+        return ti.mul(self, other)
+
+    def __rmul__(self, other):
+        import taichi as ti
+        return ti.mul(other, self)
+
+    def __truediv__(self, other):
+        import taichi as ti
+        return ti.truediv(self, other)
+
+    def __rtruediv__(self, other):
+        import taichi as ti
+        return ti.truediv(other, self)
+
+    def __floordiv__(self, other):
+        import taichi as ti
+        return ti.floordiv(self, other)
+
+    def __rfloordiv__(self, other):
+        import taichi as ti
+        return ti.floordiv(other, self)
+
+    def __mod__(self, other):
+        import taichi as ti
+        return ti.mod(self, other)
+
+    def __pow__(self, other, modulo=None):
+        import taichi as ti
+        return ti.pow(self, other)
+
+    def __rpow__(self, other, modulo=None):
+        import taichi as ti
+        return ti.pow(other, self)
diff --git a/python/taichi/lang/expr.py b/python/taichi/lang/expr.py
index ee9129d16a40c..64a13cac23e7a 100644
--- a/python/taichi/lang/expr.py
+++ b/python/taichi/lang/expr.py
@@ -1,10 +1,11 @@
 from .core import taichi_lang_core
 from .util import *
+from .common_ops import TaichiOperations
 import traceback
 
 
 # Scalar, basic data type
-class Expr:
+class Expr(TaichiOperations):
     materialize_layout_callback = None
     layout_materialized = False
 
@@ -46,52 +47,6 @@ def stack_info():
         # remove the confusing last line
         return '\n'.join(raw.split('\n')[:-3]) + '\n'
 
-    def __add__(self, other):
-        other = Expr(other)
-        return Expr(taichi_lang_core.expr_add(self.ptr, other.ptr),
-                    tb=self.stack_info())
-
-    __radd__ = __add__
-
-    def __neg__(self):
-        return Expr(taichi_lang_core.expr_neg(self.ptr), tb=self.stack_info())
-
-    def __sub__(self, other):
-        other = Expr(other)
-        return Expr(taichi_lang_core.expr_sub(self.ptr, other.ptr),
-                    tb=self.stack_info())
-
-    def __rsub__(self, other):
-        other = Expr(other)
-        return Expr(taichi_lang_core.expr_sub(other.ptr, self.ptr))
-
-    def __mul__(self, other):
-        if is_taichi_class(other) and hasattr(other, '__rmul__'):
-            return other.__rmul__(self)
-        else:
-            other = Expr(other)
-            return Expr(taichi_lang_core.expr_mul(self.ptr, other.ptr))
-
-    __rmul__ = __mul__
-
-    def __truediv__(self, other):
-        return Expr(taichi_lang_core.expr_truediv(self.ptr, Expr(other).ptr))
-
-    def __rtruediv__(self, other):
-        return Expr(taichi_lang_core.expr_truediv(Expr(other).ptr, self.ptr))
-
-    def __floordiv__(self, other):
-        return Expr(taichi_lang_core.expr_floordiv(self.ptr, Expr(other).ptr))
-
-    def __rfloordiv__(self, other):
-        return Expr(taichi_lang_core.expr_floordiv(Expr(other).ptr, self.ptr))
-
-    def __mod__(self, other):
-        other = Expr(other)
-        quotient = Expr(taichi_lang_core.expr_floordiv(self.ptr, other.ptr))
-        multiply = Expr(taichi_lang_core.expr_mul(other.ptr, quotient.ptr))
-        return Expr(taichi_lang_core.expr_sub(self.ptr, multiply.ptr))
-
     def __iadd__(self, other):
         self.atomic_add(other)
 
@@ -99,17 +54,16 @@ def __isub__(self, other):
         self.atomic_sub(other)
 
     def __imul__(self, other):
-        self.assign(Expr(taichi_lang_core.expr_mul(self.ptr, other.ptr)))
+        import taichi as ti
+        self.assign(ti.mul(self, other))
 
     def __itruediv__(self, other):
-        self.assign(
-            Expr(taichi_lang_core.expr_truediv(self.ptr,
-                                               Expr(other).ptr)))
+        import taichi as ti
+        self.assign(ti.truediv(self, other))
 
     def __ifloordiv__(self, other):
-        self.assign(
-            Expr(taichi_lang_core.expr_floordiv(self.ptr,
-                                                Expr(other).ptr)))
+        import taichi as ti
+        self.assign(ti.floordiv(self, other))
 
     def __iand__(self, other):
         self.atomic_and(other)
@@ -120,6 +74,7 @@ def __ior__(self, other):
     def __ixor__(self, other):
         self.atomic_xor(other)
 
+    # TODO: move to ops.py: ti.cmp_le
     def __le__(self, other):
         other = Expr(other)
         return Expr(taichi_lang_core.expr_cmp_le(self.ptr, other.ptr))
@@ -314,39 +269,6 @@ def fill(self, val):
         from .meta import fill_tensor
         fill_tensor(self, val)
 
-    def __rpow__(self, power, modulo=None):
-        # Python will try Matrix.__pow__ first so we don't have to worry whether `power` is `Matrix`
-        return Expr(power).__pow__(self, modulo)
-
-    def __pow__(self, power, modulo=None):
-        import taichi as ti
-        if ti.is_taichi_class(power):
-            return power.element_wise_binary(lambda x, y: pow(y, x), self)
-        if not isinstance(power, int) or abs(power) > 100:
-            return Expr(taichi_lang_core.expr_pow(self.ptr, Expr(power).ptr))
-        if power == 0:
-            return Expr(1)
-        negative = power < 0
-        power = abs(power)
-        tmp = self
-        ret = None
-        while power:
-            if power & 1:
-                if ret is None:
-                    ret = tmp
-                else:
-                    ret = ti.expr_init(ret * tmp)
-            tmp = ti.expr_init(tmp * tmp)
-            power >>= 1
-        if negative:
-            return 1 / ret
-        else:
-            return ret
-
-    def __abs__(self):
-        import taichi as ti
-        return ti.abs(self)
-
     def __ti_int__(self):
         import taichi as ti
         return ti.cast(self, ti.get_runtime().default_ip)
diff --git a/python/taichi/lang/matrix.py b/python/taichi/lang/matrix.py
index 5a0fe619876b1..23331880858a6 100644
--- a/python/taichi/lang/matrix.py
+++ b/python/taichi/lang/matrix.py
@@ -4,6 +4,7 @@
 import numbers
 import numpy as np
 from .util import to_numpy_type, to_pytorch_type
+from .common_ops import TaichiOperations
 
 
 def broadcast_if_scalar(func):
@@ -15,7 +16,7 @@ def broadcasted(self, other, *args, **kwargs):
     return broadcasted
 
 
-class Matrix:
+class Matrix(TaichiOperations):
     is_taichi_class = True
 
     def __init__(self,
@@ -42,12 +43,14 @@ def __init__(self,
                     assert row.n == rows[
                         0].n, "input vectors must be the same shape"
                 self.m = rows[0].n
+                # l-value copy:
                 self.entries = [row(i) for row in rows for i in range(row.n)]
             elif isinstance(rows[0], list):
                 for row in rows:
                     assert len(row) == len(
                         rows[0]), "input lists must be the same shape"
                 self.m = len(rows[0])
+                # l-value copy:
                 self.entries = [x for row in rows for x in row]
             else:
                 raise Exception(
@@ -168,113 +171,12 @@ def __matmul__(self, other):
                     ret(i, j).assign(ret(i, j) + self(i, k) * other(k, j))
         return ret
 
-    @broadcast_if_scalar
-    def __pow__(self, other):
-        assert self.n == other.n and self.m == other.m
-        ret = Matrix(self.n, self.m)
-        for i in range(self.n):
-            for j in range(self.m):
-                ret(i, j).assign(self(i, j)**other(i, j))
-        return ret
-
-    @broadcast_if_scalar
-    def __rpow__(self, other):
-        assert self.n == other.n and self.m == other.m
-        ret = Matrix(self.n, self.m)
-        for i in range(self.n):
-            for j in range(self.m):
-                ret(i, j).assign(other(i, j)**self(i, j))
-        return ret
-
-    @broadcast_if_scalar
-    def __div__(self, other):
-        assert self.n == other.n and self.m == other.m
-        ret = Matrix(self.n, self.m)
-        for i in range(self.n):
-            for j in range(self.m):
-                ret(i, j).assign(self(i, j) / other(i, j))
-        return ret
-
-    @broadcast_if_scalar
-    def __rtruediv__(self, other):
-        assert self.n == other.n and self.m == other.m
-        ret = Matrix(self.n, self.m)
-        for i in range(self.n):
-            for j in range(self.m):
-                ret(i, j).assign(other(i, j) / self(i, j))
-        return ret
-
     def broadcast(self, scalar):
         ret = Matrix(self.n, self.m, empty=True)
         for i in range(self.n * self.m):
             ret.entries[i] = scalar
         return ret
 
-    @broadcast_if_scalar
-    def __truediv__(self, other):
-        assert self.n == other.n and self.m == other.m
-        ret = Matrix(self.n, self.m)
-        for i in range(self.n):
-            for j in range(self.m):
-                ret(i, j).assign(self(i, j) / other(i, j))
-        return ret
-
-    @broadcast_if_scalar
-    def __floordiv__(self, other):
-        assert self.n == other.n and self.m == other.m
-        ret = Matrix(self.n, self.m)
-        for i in range(self.n):
-            for j in range(self.m):
-                ret(i, j).assign(self(i, j) // other(i, j))
-        return ret
-
-    @broadcast_if_scalar
-    def __mul__(self, other):
-        assert self.n == other.n and self.m == other.m
-        ret = Matrix(self.n, self.m)
-        for i in range(self.n):
-            for j in range(self.m):
-                ret(i, j).assign(self(i, j) * other(i, j))
-        return ret
-
-    __rmul__ = __mul__
-
-    @broadcast_if_scalar
-    def __add__(self, other):
-        assert self.n == other.n and self.m == other.m
-        ret = Matrix(self.n, self.m)
-        for i in range(self.n):
-            for j in range(self.m):
-                ret(i, j).assign(self(i, j) + other(i, j))
-        return ret
-
-    __radd__ = __add__
-
-    @broadcast_if_scalar
-    def __sub__(self, other):
-        assert self.n == other.n and self.m == other.m
-        ret = Matrix(self.n, self.m)
-        for i in range(self.n):
-            for j in range(self.m):
-                ret(i, j).assign(self(i, j) - other(i, j))
-        return ret
-
-    def __neg__(self):
-        ret = Matrix(self.n, self.m)
-        for i in range(self.n):
-            for j in range(self.m):
-                ret(i, j).assign(-self(i, j))
-        return ret
-
-    @broadcast_if_scalar
-    def __rsub__(self, other):
-        assert self.n == other.n and self.m == other.m
-        ret = Matrix(self.n, self.m)
-        for i in range(self.n):
-            for j in range(self.m):
-                ret(i, j).assign(other(i, j) - self(i, j))
-        return ret
-
     def linearize_entry_id(self, *args):
         assert 1 <= len(args) <= 2
         if len(args) == 1 and isinstance(args[0], (list, tuple)):
@@ -382,7 +284,7 @@ def abs(self):
 
     def trace(self):
         assert self.n == self.m
-        sum = self(0, 0)
+        sum = expr.Expr(self(0, 0))
         for i in range(1, self.n):
             sum = sum + self(i, i)
         return sum
@@ -393,8 +295,9 @@ def inverse(self):
             return Matrix([1 / self(0, 0)])
         elif self.n == 2:
             inv_det = impl.expr_init(1.0 / self.determinant(self))
+            # Discussion: https://github.com/taichi-dev/taichi/pull/943#issuecomment-626344323
             return inv_det * Matrix([[self(1, 1), -self(0, 1)],
-                                     [-self(1, 0), self(0, 0)]])
+                                     [-self(1, 0), self(0, 0)]]).variable()
         elif self.n == 3:
             n = 3
             import taichi as ti
@@ -527,6 +430,7 @@ def diag(dim, val):
     def loop_range(self):
         return self.entries[0]
 
+    # TODO
     @broadcast_if_scalar
     def augassign(self, other, op):
         if not isinstance(other, Matrix):
diff --git a/python/taichi/lang/ops.py b/python/taichi/lang/ops.py
index 9bf64010532f1..bb712cbf1113b 100644
--- a/python/taichi/lang/ops.py
+++ b/python/taichi/lang/ops.py
@@ -20,10 +20,12 @@ def stack_info():
 def unary(foo):
     import taichi as ti
 
+    imp_foo = lambda x: foo(Expr(x))
+
     @functools.wraps(foo)
     def wrapped(a):
         if ti.is_taichi_class(a):
-            return a.element_wise_unary(foo)
+            return a.element_wise_unary(imp_foo)
         else:
             return foo(Expr(a))
 
@@ -37,15 +39,17 @@ def wrapped(a):
 def binary(foo):
     import taichi as ti
 
+    imp_foo = lambda x, y: foo(Expr(x), Expr(y))
+    rev_foo = lambda x, y: foo(Expr(y), Expr(x))
+
     @functools.wraps(foo)
     def wrapped(a, b):
         if ti.is_taichi_class(a):
-            return a.element_wise_binary(foo, b)
+            return a.element_wise_binary(imp_foo, b)
         elif ti.is_taichi_class(b):
-            rev_foo = lambda x, y: foo(y, x)
             return b.element_wise_binary(rev_foo, a)
         else:
-            return foo(Expr(a), Expr(b))
+            return imp_foo(a, b)
 
     binary_ops.append(wrapped)
     return wrapped
@@ -84,6 +88,11 @@ def sqr(obj):
     return obj * obj
 
 
+@unary
+def neg(expr):
+    return Expr(taichi_lang_core.expr_neg(expr.ptr), tb=stack_info())
+
+
 @unary
 def sin(expr):
     return Expr(taichi_lang_core.expr_sin(expr.ptr), tb=stack_info())
@@ -121,32 +130,32 @@ def ceil(expr):
 
 @unary
 def inv(expr):
-    return Expr(taichi_lang_core.expr_inv(expr.ptr))
+    return Expr(taichi_lang_core.expr_inv(expr.ptr), tb=stack_info())
 
 
 @unary
 def tan(expr):
-    return Expr(taichi_lang_core.expr_tan(expr.ptr))
+    return Expr(taichi_lang_core.expr_tan(expr.ptr), tb=stack_info())
 
 
 @unary
 def tanh(expr):
-    return Expr(taichi_lang_core.expr_tanh(expr.ptr))
+    return Expr(taichi_lang_core.expr_tanh(expr.ptr), tb=stack_info())
 
 
 @unary
 def exp(expr):
-    return Expr(taichi_lang_core.expr_exp(expr.ptr))
+    return Expr(taichi_lang_core.expr_exp(expr.ptr), tb=stack_info())
 
 
 @unary
 def log(expr):
-    return Expr(taichi_lang_core.expr_log(expr.ptr))
+    return Expr(taichi_lang_core.expr_log(expr.ptr), tb=stack_info())
 
 
 @unary
 def abs(expr):
-    return Expr(taichi_lang_core.expr_abs(expr.ptr))
+    return Expr(taichi_lang_core.expr_abs(expr.ptr), tb=stack_info())
 
 
 def random(dt=None):
@@ -156,29 +165,102 @@ def random(dt=None):
     return Expr(taichi_lang_core.make_rand_expr(dt))
 
 
+@binary
+def add(a, b):
+    return Expr(taichi_lang_core.expr_add(a.ptr, b.ptr), tb=stack_info())
+
+
+@binary
+def sub(a, b):
+    return Expr(taichi_lang_core.expr_sub(a.ptr, b.ptr), tb=stack_info())
+
+
+@binary
+def mul(a, b):
+    return Expr(taichi_lang_core.expr_mul(a.ptr, b.ptr), tb=stack_info())
+
+
+@binary
+def mod(a, b):
+    quotient = Expr(taichi_lang_core.expr_floordiv(a.ptr, b.ptr))
+    multiply = Expr(taichi_lang_core.expr_mul(b.ptr, quotient.ptr))
+    return Expr(taichi_lang_core.expr_sub(a.ptr, multiply.ptr))
+
+
+@binary
+def raw_pow(a, b):
+    return Expr(taichi_lang_core.expr_pow(a.ptr, b.ptr), tb=stack_info())
+
+
+# TODO: move this to a C++ pass (#944)
+def pow(self, power):
+    import taichi as ti
+    if not isinstance(power, int):
+        return raw_pow(self, power)
+    if power == 0:
+        # TODO: remove the hack, use {Expr,Matrix}.dup().fill(1)
+        # also note that this can be solved by #940
+        return self * 0 + Expr(1)
+
+    negative = power < 0
+    # Why not simply use `power = abs(power)`?
+    # Because `abs` is overrided by the `ti.abs` above.
+    if negative:
+        power = -power
+
+    tmp = self
+    ret = None
+    while power:
+        if power & 1:
+            if ret is None:
+                ret = tmp
+            else:
+                ret = ti.expr_init(ret * tmp)
+        tmp = ti.expr_init(tmp * tmp)
+        power >>= 1
+
+    if negative:
+        return 1 / ret
+    else:
+        return ret
+
+
+# NEXT: add matpow(self, power)
+
+
+@binary
+def floordiv(a, b):
+    return Expr(taichi_lang_core.expr_floordiv(a.ptr, b.ptr), tb=stack_info())
+
+
+@binary
+def truediv(a, b):
+    return Expr(taichi_lang_core.expr_truediv(a.ptr, b.ptr), tb=stack_info())
+
+
 @binary
 def max(a, b):
-    return Expr(taichi_lang_core.expr_max(a.ptr, b.ptr))
+    return Expr(taichi_lang_core.expr_max(a.ptr, b.ptr), tb=stack_info())
 
 
 @binary
 def min(a, b):
-    return Expr(taichi_lang_core.expr_min(a.ptr, b.ptr))
+    return Expr(taichi_lang_core.expr_min(a.ptr, b.ptr), tb=stack_info())
 
 
 @binary
 def atan2(a, b):
-    return Expr(taichi_lang_core.expr_atan2(a.ptr, b.ptr))
+    return Expr(taichi_lang_core.expr_atan2(a.ptr, b.ptr), tb=stack_info())
 
 
 @binary
 def raw_div(a, b):
-    return Expr(taichi_lang_core.expr_div(a.ptr, b.ptr))
+    return Expr(taichi_lang_core.expr_div(a.ptr, b.ptr), tb=stack_info())
 
 
 @binary
 def raw_mod(a, b):
-    return Expr(taichi_lang_core.expr_mod(a.ptr, b.ptr))
+    return Expr(taichi_lang_core.expr_mod(a.ptr, b.ptr), tb=stack_info())
 
 
 def ti_max(*args):
diff --git a/taichi/backends/opengl/codegen_opengl.cpp b/taichi/backends/opengl/codegen_opengl.cpp
index dc651e11db66a..0c24622ec5a0d 100644
--- a/taichi/backends/opengl/codegen_opengl.cpp
+++ b/taichi/backends/opengl/codegen_opengl.cpp
@@ -169,6 +169,12 @@ class KernelGen : public IRVisitor {
       );
     }  // }}}
 
+    if (used.fast_pow) {
+      kernel_header += (
+#include "taichi/backends/opengl/shaders/fast_pow.glsl.h"
+      );
+    }
+
     line_appender_header_.append_raw(kernel_header);
 
     int threads_per_group = opengl_get_threads_per_group();
@@ -407,6 +413,16 @@ class KernelGen : public IRVisitor {
         emit("{} {} = atan({}, {});", dt_name, bin_name, lhs_name, rhs_name);
       }
       return;
+    } else if (bin->op_type == BinaryOpType::pow
+        && is_integral(bin->rhs->element_type())) {
+      // The GLSL `pow` is not so percise for `int`... e.g.: `pow(5, 3)` obtains 124
+      // So that we have to use some hack to make it percise.
+      // Discussion: https://github.com/taichi-dev/taichi/pull/943#issuecomment-626354902
+      emit("{} {} = {}(fast_pow_{}({}, {}));", dt_name, bin_name, dt_name,
+          data_type_short_name(bin->lhs->element_type()),
+           lhs_name, rhs_name);
+      used.fast_pow = true;
+      return;
     }
     const auto binop = binary_op_type_symbol(bin->op_type);
     if (is_opengl_binary_op_infix(bin->op_type)) {
diff --git a/taichi/backends/opengl/opengl_kernel_util.h b/taichi/backends/opengl/opengl_kernel_util.h
index b9388014b8b76..ee9f18e746438 100644
--- a/taichi/backends/opengl/opengl_kernel_util.h
+++ b/taichi/backends/opengl/opengl_kernel_util.h
@@ -20,6 +20,7 @@ struct UsedFeature {
   bool simulated_atomic_float{false};
   bool int64{false};
   bool global_temp{false};
+  bool fast_pow{false};
 };
 
 struct StructCompiledResult {
diff --git a/taichi/backends/opengl/shaders/fast_pow.glsl.h b/taichi/backends/opengl/shaders/fast_pow.glsl.h
new file mode 100644
index 0000000000000..051ad939ce6eb
--- /dev/null
+++ b/taichi/backends/opengl/shaders/fast_pow.glsl.h
@@ -0,0 +1,38 @@
+// vim: ft=glsl
+// clang-format off
+#include "taichi/util/macros.h"
+STR(
+int fast_pow_i32(int x, int y)
+{
+  if (y > 512)
+    return int(pow(x, y));
+
+  bool neg = y < 0;
+  y = abs(y);
+  int ret = 1;
+  while (y != 0) {
+    if ((y & 1) != 0)
+      ret *= x;
+    x *= x;
+    y >>= 1;
+  }
+  return neg ? 1 / ret : ret;
+}
+
+float fast_pow_f32(float x, int y)
+{
+  if (y > 512)
+    return pow(x, y);
+
+  bool neg = y < 0;
+  y = abs(y);
+  float ret = 1.0;
+  while (y != 0) {
+    if ((y & 1) != 0)
+      ret *= x;
+    x *= x;
+    y >>= 1;
+  }
+  return neg ? 1.0 / ret : ret;
+}
+)
diff --git a/tests/python/test_element_wise.py b/tests/python/test_element_wise.py
index 762bbf36322e0..57dcd88978994 100644
--- a/tests/python/test_element_wise.py
+++ b/tests/python/test_element_wise.py
@@ -1,6 +1,7 @@
 import taichi as ti
 from taichi import approx
 from random import random, randint, seed
+import operator as ops
 import math
 
 
@@ -12,7 +13,9 @@ def rand(dtype):
     if ti.core.is_integral(dtype):
         return randint(1, 5)
     else:
-        return float(randint(1, 5)) / 5
+        # Prevent integer operands in pow and floordiv in GLSL
+        # Discussion: https://github.com/taichi-dev/taichi/pull/943#discussion_r423177941
+        return float(randint(1, 5)) / 5 - 0.01
 
 
 @ti.host_arch_only
@@ -82,20 +85,56 @@ def func():
             assert c[None][i, j] == approx(expected)
 
 
-def test_matrix_element_wise_binary():
+def test_matrix_element_wise_unary_infix():
+    seed(5156)
+    for n, m in [(5, 4), (3, 1)]:
+        _test_matrix_element_wise_unary(ti.f32, n, m, ops.neg, ops.neg)
+        _test_matrix_element_wise_unary(ti.i32, n, m, ops.neg, ops.neg)
+
+
+def test_matrix_element_wise_binary_infix_f32():
+    seed(4399)
+    for n, m in [(5, 4), (3, 1)]:
+        _test_matrix_element_wise_binary(ti.f32, n, m, ops.add, ops.add)
+        _test_matrix_element_wise_binary(ti.f32, n, m, ops.sub, ops.sub)
+        _test_matrix_element_wise_binary(ti.f32, n, m, ops.mul, ops.mul)
+        _test_matrix_element_wise_binary(ti.f32, n, m, ops.mod, ops.mod)
+        _test_matrix_element_wise_binary(ti.f32, n, m, ops.pow, ops.pow)
+        _test_matrix_element_wise_binary(ti.f32, n, m, ops.truediv, ops.truediv)
+        _test_matrix_element_wise_binary(ti.f32, n, m, ops.floordiv, ops.floordiv)
+
+
+def test_matrix_element_wise_binary_infix_i32():
+    seed(6174)
+    for n, m in [(5, 4), (3, 1)]:
+        _test_matrix_element_wise_binary(ti.i32, n, m, ops.add, ops.add)
+        _test_matrix_element_wise_binary(ti.i32, n, m, ops.sub, ops.sub)
+        _test_matrix_element_wise_binary(ti.i32, n, m, ops.mul, ops.mul)
+        _test_matrix_element_wise_binary(ti.i32, n, m, ops.mod, ops.mod)
+        _test_matrix_element_wise_binary(ti.i32, n, m, ops.pow, ops.pow)
+        # TODO: add pow(f32, i32)
+
+
+def test_matrix_element_wise_binary_f32():
     seed(666)
     for n, m in [(5, 4), (3, 1)]:
         _test_matrix_element_wise_binary(ti.f32, n, m, ti.atan2, math.atan2)
         _test_matrix_element_wise_binary(ti.f32, n, m, ti.min, min)
-        _test_matrix_element_wise_binary(ti.i32, n, m, ti.min, min)
         _test_matrix_element_wise_binary(ti.f32, n, m, ti.max, max)
+        _test_matrix_element_wise_binary(ti.f32, n, m, ti.pow, pow)
+
+
+def test_matrix_element_wise_binary_i32():
+    seed(985)
+    for n, m in [(5, 4), (3, 1)]:
+        _test_matrix_element_wise_binary(ti.i32, n, m, ti.min, min)
         _test_matrix_element_wise_binary(ti.i32, n, m, ti.max, max)
-        _test_matrix_element_wise_binary(ti.f32, n, m, pow, pow)
-        _test_matrix_element_wise_binary(ti.i32, n, m, pow, pow)
+        _test_matrix_element_wise_binary(ti.i32, n, m, ti.pow, pow)
         _test_matrix_element_wise_binary(ti.i32, n, m, ti.raw_mod, _c_mod)
+        # TODO: add ti.raw_div
 
 
-def test_matrix_element_wise_unary():
+def test_matrix_element_wise_unary_1():
     seed(233)
     for n, m in [(5, 4), (3, 1)]:
         _test_matrix_element_wise_unary(ti.f32, n, m, ti.sin, math.sin)
@@ -105,6 +144,11 @@ def test_matrix_element_wise_unary():
         _test_matrix_element_wise_unary(ti.f32, n, m, ti.acos, math.acos)
         _test_matrix_element_wise_unary(ti.f32, n, m, ti.tanh, math.tanh)
         _test_matrix_element_wise_unary(ti.f32, n, m, ti.sqrt, math.sqrt)
+
+
+def test_matrix_element_wise_unary_2():
+    seed(211)
+    for n, m in [(5, 4), (3, 1)]:
         _test_matrix_element_wise_unary(ti.f32, n, m, ti.exp, math.exp)
         _test_matrix_element_wise_unary(ti.f32, n, m, ti.log, math.log)
         _test_matrix_element_wise_unary(ti.f32, n, m, ti.ceil, math.ceil)