Skip to content

Commit

Permalink
Merge pull request #1782 from PrincetonUniversity/devel
Browse files Browse the repository at this point in the history
Devel
  • Loading branch information
dillontsmith authored Oct 12, 2020
2 parents e415857 + 78a0712 commit 472a2d0
Show file tree
Hide file tree
Showing 15 changed files with 172 additions and 173 deletions.
6 changes: 3 additions & 3 deletions .github/workflows/pnl-ci.yml
Original file line number Diff line number Diff line change
Expand Up @@ -23,23 +23,23 @@ jobs:
fetch-depth: 10

- name: Linux wheels cache
uses: actions/[email protected].1
uses: actions/[email protected].2
if: startsWith(runner.os, 'Linux')
with:
path: ~/.cache/pip/wheels
key: ${{ runner.os }}-python-${{ matrix.python-version }}-${{ matrix.python-architecture }}-pip-wheels-${{ github.sha }}
restore-keys: ${{ runner.os }}-python-${{ matrix.python-version }}-${{ matrix.python-architecture }}-pip-wheels

- name: MacOS wheels cache
uses: actions/[email protected].1
uses: actions/[email protected].2
if: startsWith(runner.os, 'macOS')
with:
path: ~/Library/Caches/pip/wheels
key: ${{ runner.os }}-python-${{ matrix.python-version }}-${{ matrix.python-architecture }}-pip-wheels-${{ github.sha }}
restore-keys: ${{ runner.os }}-python-${{ matrix.python-version }}-${{ matrix.python-architecture }}-pip-wheels

- name: Windows wheels cache
uses: actions/[email protected].1
uses: actions/[email protected].2
if: startsWith(runner.os, 'Windows')
with:
path: ~\AppData\Local\pip\Cache\wheels
Expand Down
4 changes: 2 additions & 2 deletions .travis.yml
Original file line number Diff line number Diff line change
Expand Up @@ -67,9 +67,9 @@ before_install:
if [ "$TRAVIS_CPU_ARCH" != "amd64" ]; then
# There are a lot fewer wheels distributed for non-x86 architectures.
# We end up building a lot of them locally, install dev packages
export EXTRA_PKGS="build-essential gfortran llvm-9-dev libfreetype6-dev libjpeg-dev liblapack-dev zlib1g-dev"
export EXTRA_PKGS="build-essential gfortran llvm-10-dev libfreetype6-dev libjpeg-dev liblapack-dev zlib1g-dev"
# Export LLVM_CONFIG for llvmlite
export LLVM_CONFIG=llvm-config-9
export LLVM_CONFIG=llvm-config-10
# Disable coverage
export RUN_COV=""
fi
Expand Down
2 changes: 1 addition & 1 deletion dev_requirements.txt
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
jupyter<=1.0.0
psyneulink-sphinx-theme<=1.2.1.7
pytest<6.1.1
pytest<6.1.2
pytest-benchmark<=3.2.3
pytest-cov<=2.10.1
pytest-helpers-namespace<=2019.1.8
Expand Down
2 changes: 1 addition & 1 deletion psyneulink/core/components/mechanisms/mechanism.py
Original file line number Diff line number Diff line change
Expand Up @@ -3003,7 +3003,7 @@ def _gen_llvm_function_internal(self, ctx, builder, params, state, arg_in,
for scale in [TimeScale.TIME_STEP, TimeScale.PASS, TimeScale.TRIAL, TimeScale.RUN]:
num_exec_time_ptr = builder.gep(num_executions_ptr, [ctx.int32_ty(0), ctx.int32_ty(scale.value)])
new_val = builder.load(num_exec_time_ptr)
new_val = builder.add(new_val, ctx.int32_ty(1))
new_val = builder.add(new_val, new_val.type(1))
builder.store(new_val, num_exec_time_ptr)

builder = self._gen_llvm_output_ports(ctx, builder, value, params, state, arg_in, arg_out)
Expand Down
4 changes: 2 additions & 2 deletions psyneulink/core/llvm/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -88,12 +88,12 @@ def _cuda_kernel(self):
self.__cuda_kernel = _ptx_engine.get_kernel(self.name)
return self.__cuda_kernel

def cuda_call(self, *args, threads=1, block_size=32):
def cuda_call(self, *args, threads=1, block_size=128):
grid = ((threads + block_size - 1) // block_size, 1)
self._cuda_kernel(*args, np.int32(threads),
block=(block_size, 1, 1), grid=grid)

def cuda_wrap_call(self, *args, threads=1, block_size=32):
def cuda_wrap_call(self, *args, threads=1, block_size=128):
wrap_args = (jit_engine.pycuda.driver.InOut(a) if isinstance(a, np.ndarray) else a for a in args)
self.cuda_call(*wrap_args, threads=threads, block_size=block_size)

Expand Down
12 changes: 8 additions & 4 deletions psyneulink/core/llvm/builder_context.py
Original file line number Diff line number Diff line change
Expand Up @@ -431,12 +431,16 @@ def _convert_llvm_ir_to_ctype(t: ir.Type):
if type_t is ir.VoidType:
return None
elif type_t is ir.IntType:
if t.width == 32:
return ctypes.c_int
if t.width == 8:
return ctypes.c_int8
elif t.width == 16:
return ctypes.c_int16
elif t.width == 32:
return ctypes.c_int32
elif t.width == 64:
return ctypes.c_longlong
return ctypes.c_int64
else:
assert False, "Integer type too big!"
assert False, "Unknown integer type: {}".format(type_t)
elif type_t is ir.DoubleType:
return ctypes.c_double
elif type_t is ir.FloatType:
Expand Down
2 changes: 1 addition & 1 deletion psyneulink/core/llvm/codegen.py
Original file line number Diff line number Diff line change
Expand Up @@ -916,7 +916,7 @@ def gen_composition_run(ctx, composition, *, tags:frozenset):
node_state = builder.gep(state, [ctx.int32_ty(0), ctx.int32_ty(0), ctx.int32_ty(idx)])
num_executions_ptr = helpers.get_state_ptr(builder, node, node_state, "num_executions")
num_exec_time_ptr = builder.gep(num_executions_ptr, [ctx.int32_ty(0), ctx.int32_ty(TimeScale.RUN.value)])
builder.store(ctx.int32_ty(0), num_exec_time_ptr)
builder.store(num_exec_time_ptr.type.pointee(0), num_exec_time_ptr)

# Call execution
exec_tags = tags.difference({"run"})
Expand Down
4 changes: 3 additions & 1 deletion psyneulink/core/llvm/jit_engine.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,8 @@
if pycuda.driver.get_version()[0] > 5:
from pycuda import autoinit as pycuda_default
import pycuda.compiler
assert pycuda_default.context is not None
pycuda_default.context.set_cache_config(pycuda.driver.func_cache.PREFER_L1)
ptx_enabled = True
else:
raise UserWarning("CUDA driver too old (need 6+): " + str(pycuda.driver.get_version()))
Expand Down Expand Up @@ -316,5 +318,5 @@ def get_kernel(self, name):
wrapper_mod = _gen_cuda_kernel_wrapper_module(function)
self.compile_modules([wrapper_mod], set())
kernel = self._engine._find_kernel(name + "_cuda_kernel")

kernel.set_cache_config(pycuda.driver.func_cache.PREFER_L1)
return kernel
2 changes: 1 addition & 1 deletion requirements.txt
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
autograd<=1.3
dill<=0.32
elfi<=0.7.6
graphviz<=0.14.1
graphviz<0.14.3
grpcio<=1.31.0
grpcio-tools<=1.31.0
llvmlite<=0.34
Expand Down
42 changes: 42 additions & 0 deletions tests/llvm/test_custom_func.py
Original file line number Diff line number Diff line change
Expand Up @@ -64,3 +64,45 @@ def test_fixed_dimensions__pnl_builtin_vxm(mode):
binf2.cuda_wrap_call(vector, matrix, new_res)

assert np.array_equal(orig_res, new_res)


@pytest.mark.llvm
@pytest.mark.parametrize('mode', ['CPU',
pytest.param('PTX', marks=pytest.mark.cuda)])
@pytest.mark.parametrize('val', [np.int8(0x7e),
np.int16(0x7eec),
np.int32(0x7eedbeee),
np.int64(0x7eedcafedeadbeee)
], ids=lambda x: str(x.dtype))
def test_integer_broadcast(mode, val):
custom_name = None
with pnlvm.LLVMBuilderContext() as ctx:
custom_name = ctx.get_unique_name("broadcast")
int_ty = ctx.convert_python_struct_to_llvm_ir(val)
int_array_ty = ir.ArrayType(int_ty, 8)
func_ty = ir.FunctionType(ir.VoidType(), (int_ty.as_pointer(),
int_array_ty.as_pointer()))
function = ir.Function(ctx.module, func_ty, name=custom_name)

i, o = function.args
block = function.append_basic_block(name="entry")
builder = ir.IRBuilder(block)
ival = builder.load(i)
ival = builder.add(ival, ival.type(1))
with pnlvm.helpers.array_ptr_loop(builder, o, "broadcast") as (b, i):
out_ptr = builder.gep(o, [ctx.int32_ty(0), i])
builder.store(ival, out_ptr)
builder.ret_void()

binf = pnlvm.LLVMBinaryFunction.get(custom_name)
res = np.zeros(8, dtype=val.dtype)

if mode == 'CPU':
ct_res = np.ctypeslib.as_ctypes(res)
ct_in = np.ctypeslib.as_ctypes(val)

binf(ctypes.byref(ct_in), ctypes.byref(ct_res))
else:
binf.cuda_wrap_call(np.asarray(val), res)

assert all(res == np.broadcast_to(val + 1, 8))
17 changes: 17 additions & 0 deletions tests/mechanisms/test_ddm_mechanism.py
Original file line number Diff line number Diff line change
Expand Up @@ -793,3 +793,20 @@ def test_sequence_of_DDM_mechs_in_Composition_Pathway():
# if you do not specify, assert_allcose will use a relative tolerance of 1e-07,
# which WILL FAIL unless you gather higher precision values to use as reference
np.testing.assert_allclose(val, expected, atol=1e-08, err_msg='Failed on expected_output[{0}]'.format(i))


@pytest.mark.mechanism
@pytest.mark.ddm_mechanism
@pytest.mark.parametrize('mode', ['Python',
pytest.param('LLVM', marks=pytest.mark.llvm),
pytest.param('LLVMExec', marks=pytest.mark.llvm),
pytest.param('LLVMRun', marks=pytest.mark.llvm),
pytest.param('PTXExec', marks=[pytest.mark.llvm, pytest.mark.cuda]),
pytest.param('PTXRun', marks=[pytest.mark.llvm, pytest.mark.cuda])])
def test_DDMMechanism_LCA_equivalent(mode):
ddm = DDM(default_variable=[0], function=DriftDiffusionIntegrator(rate=1, time_step_size=0.1))
comp2 = Composition()
comp2.add_node(ddm)
result2 = comp2.run(inputs={ddm:[1]}, bin_execute=mode)
assert np.allclose(np.asfarray(result2[0]), [0.1])
assert np.allclose(np.asfarray(result2[1]), [0.1])
17 changes: 17 additions & 0 deletions tests/mechanisms/test_lca.py
Original file line number Diff line number Diff line change
Expand Up @@ -285,6 +285,23 @@ def test_equivalance_of_threshold_and_termination_specifications_max_vs_next(sel
# result = comp.run(inputs={lca:[1,0]})
# assert np.allclose(result, [[0.71463572, 0.28536428]])

@pytest.mark.mechanism
@pytest.mark.lca_mechanism
@pytest.mark.parametrize('mode', ['Python',
pytest.param('LLVM', marks=pytest.mark.llvm),
pytest.param('LLVMExec', marks=pytest.mark.llvm),
pytest.param('LLVMRun', marks=pytest.mark.llvm),
pytest.param('PTXExec', marks=[pytest.mark.llvm, pytest.mark.cuda]),
pytest.param('PTXRun', marks=[pytest.mark.llvm, pytest.mark.cuda])])
def test_LCAMechanism_DDM_equivalent(self, mode):
lca = LCAMechanism(size=2, leak=0., threshold=1, auto=0, hetero=0,
initial_value=[0, 0], execute_until_finished=False)
comp1 = Composition()
comp1.add_node(lca)
result1 = comp1.run(inputs={lca:[1, -1]}, bin_execute=mode)
assert np.allclose(result1, [[0.52497918747894, 0.47502081252106]],)


class TestLCAReset:

def test_reset_run(self):
Expand Down
Loading

0 comments on commit 472a2d0

Please sign in to comment.