Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Devel #1782

Merged
merged 15 commits into from
Oct 12, 2020
Merged

Devel #1782

Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 3 additions & 3 deletions .github/workflows/pnl-ci.yml
Original file line number Diff line number Diff line change
Expand Up @@ -23,23 +23,23 @@ jobs:
fetch-depth: 10

- name: Linux wheels cache
uses: actions/[email protected].1
uses: actions/[email protected].2
if: startsWith(runner.os, 'Linux')
with:
path: ~/.cache/pip/wheels
key: ${{ runner.os }}-python-${{ matrix.python-version }}-${{ matrix.python-architecture }}-pip-wheels-${{ github.sha }}
restore-keys: ${{ runner.os }}-python-${{ matrix.python-version }}-${{ matrix.python-architecture }}-pip-wheels

- name: MacOS wheels cache
uses: actions/[email protected].1
uses: actions/[email protected].2
if: startsWith(runner.os, 'macOS')
with:
path: ~/Library/Caches/pip/wheels
key: ${{ runner.os }}-python-${{ matrix.python-version }}-${{ matrix.python-architecture }}-pip-wheels-${{ github.sha }}
restore-keys: ${{ runner.os }}-python-${{ matrix.python-version }}-${{ matrix.python-architecture }}-pip-wheels

- name: Windows wheels cache
uses: actions/[email protected].1
uses: actions/[email protected].2
if: startsWith(runner.os, 'Windows')
with:
path: ~\AppData\Local\pip\Cache\wheels
Expand Down
4 changes: 2 additions & 2 deletions .travis.yml
Original file line number Diff line number Diff line change
Expand Up @@ -67,9 +67,9 @@ before_install:
if [ "$TRAVIS_CPU_ARCH" != "amd64" ]; then
# There are a lot fewer wheels distributed for non-x86 architectures.
# We end up building a lot of them locally, install dev packages
export EXTRA_PKGS="build-essential gfortran llvm-9-dev libfreetype6-dev libjpeg-dev liblapack-dev zlib1g-dev"
export EXTRA_PKGS="build-essential gfortran llvm-10-dev libfreetype6-dev libjpeg-dev liblapack-dev zlib1g-dev"
# Export LLVM_CONFIG for llvmlite
export LLVM_CONFIG=llvm-config-9
export LLVM_CONFIG=llvm-config-10
# Disable coverage
export RUN_COV=""
fi
Expand Down
2 changes: 1 addition & 1 deletion dev_requirements.txt
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
jupyter<=1.0.0
psyneulink-sphinx-theme<=1.2.1.7
pytest<6.1.1
pytest<6.1.2
pytest-benchmark<=3.2.3
pytest-cov<=2.10.1
pytest-helpers-namespace<=2019.1.8
Expand Down
2 changes: 1 addition & 1 deletion psyneulink/core/components/mechanisms/mechanism.py
Original file line number Diff line number Diff line change
Expand Up @@ -3003,7 +3003,7 @@ def _gen_llvm_function_internal(self, ctx, builder, params, state, arg_in,
for scale in [TimeScale.TIME_STEP, TimeScale.PASS, TimeScale.TRIAL, TimeScale.RUN]:
num_exec_time_ptr = builder.gep(num_executions_ptr, [ctx.int32_ty(0), ctx.int32_ty(scale.value)])
new_val = builder.load(num_exec_time_ptr)
new_val = builder.add(new_val, ctx.int32_ty(1))
new_val = builder.add(new_val, new_val.type(1))
builder.store(new_val, num_exec_time_ptr)

builder = self._gen_llvm_output_ports(ctx, builder, value, params, state, arg_in, arg_out)
Expand Down
4 changes: 2 additions & 2 deletions psyneulink/core/llvm/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -88,12 +88,12 @@ def _cuda_kernel(self):
self.__cuda_kernel = _ptx_engine.get_kernel(self.name)
return self.__cuda_kernel

def cuda_call(self, *args, threads=1, block_size=32):
def cuda_call(self, *args, threads=1, block_size=128):
grid = ((threads + block_size - 1) // block_size, 1)
self._cuda_kernel(*args, np.int32(threads),
block=(block_size, 1, 1), grid=grid)

def cuda_wrap_call(self, *args, threads=1, block_size=32):
def cuda_wrap_call(self, *args, threads=1, block_size=128):
wrap_args = (jit_engine.pycuda.driver.InOut(a) if isinstance(a, np.ndarray) else a for a in args)
self.cuda_call(*wrap_args, threads=threads, block_size=block_size)

Expand Down
12 changes: 8 additions & 4 deletions psyneulink/core/llvm/builder_context.py
Original file line number Diff line number Diff line change
Expand Up @@ -431,12 +431,16 @@ def _convert_llvm_ir_to_ctype(t: ir.Type):
if type_t is ir.VoidType:
return None
elif type_t is ir.IntType:
if t.width == 32:
return ctypes.c_int
if t.width == 8:
return ctypes.c_int8
elif t.width == 16:
return ctypes.c_int16
elif t.width == 32:
return ctypes.c_int32
elif t.width == 64:
return ctypes.c_longlong
return ctypes.c_int64
else:
assert False, "Integer type too big!"
assert False, "Unknown integer type: {}".format(type_t)
elif type_t is ir.DoubleType:
return ctypes.c_double
elif type_t is ir.FloatType:
Expand Down
2 changes: 1 addition & 1 deletion psyneulink/core/llvm/codegen.py
Original file line number Diff line number Diff line change
Expand Up @@ -916,7 +916,7 @@ def gen_composition_run(ctx, composition, *, tags:frozenset):
node_state = builder.gep(state, [ctx.int32_ty(0), ctx.int32_ty(0), ctx.int32_ty(idx)])
num_executions_ptr = helpers.get_state_ptr(builder, node, node_state, "num_executions")
num_exec_time_ptr = builder.gep(num_executions_ptr, [ctx.int32_ty(0), ctx.int32_ty(TimeScale.RUN.value)])
builder.store(ctx.int32_ty(0), num_exec_time_ptr)
builder.store(num_exec_time_ptr.type.pointee(0), num_exec_time_ptr)

# Call execution
exec_tags = tags.difference({"run"})
Expand Down
4 changes: 3 additions & 1 deletion psyneulink/core/llvm/jit_engine.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,8 @@
if pycuda.driver.get_version()[0] > 5:
from pycuda import autoinit as pycuda_default
import pycuda.compiler
assert pycuda_default.context is not None
pycuda_default.context.set_cache_config(pycuda.driver.func_cache.PREFER_L1)
ptx_enabled = True
else:
raise UserWarning("CUDA driver too old (need 6+): " + str(pycuda.driver.get_version()))
Expand Down Expand Up @@ -316,5 +318,5 @@ def get_kernel(self, name):
wrapper_mod = _gen_cuda_kernel_wrapper_module(function)
self.compile_modules([wrapper_mod], set())
kernel = self._engine._find_kernel(name + "_cuda_kernel")

kernel.set_cache_config(pycuda.driver.func_cache.PREFER_L1)
return kernel
2 changes: 1 addition & 1 deletion requirements.txt
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
autograd<=1.3
dill<=0.32
elfi<=0.7.6
graphviz<=0.14.1
graphviz<0.14.3
grpcio<=1.31.0
grpcio-tools<=1.31.0
llvmlite<=0.34
Expand Down
42 changes: 42 additions & 0 deletions tests/llvm/test_custom_func.py
Original file line number Diff line number Diff line change
Expand Up @@ -64,3 +64,45 @@ def test_fixed_dimensions__pnl_builtin_vxm(mode):
binf2.cuda_wrap_call(vector, matrix, new_res)

assert np.array_equal(orig_res, new_res)


@pytest.mark.llvm
@pytest.mark.parametrize('mode', ['CPU',
pytest.param('PTX', marks=pytest.mark.cuda)])
@pytest.mark.parametrize('val', [np.int8(0x7e),
np.int16(0x7eec),
np.int32(0x7eedbeee),
np.int64(0x7eedcafedeadbeee)
], ids=lambda x: str(x.dtype))
def test_integer_broadcast(mode, val):
custom_name = None
with pnlvm.LLVMBuilderContext() as ctx:
custom_name = ctx.get_unique_name("broadcast")
int_ty = ctx.convert_python_struct_to_llvm_ir(val)
int_array_ty = ir.ArrayType(int_ty, 8)
func_ty = ir.FunctionType(ir.VoidType(), (int_ty.as_pointer(),
int_array_ty.as_pointer()))
function = ir.Function(ctx.module, func_ty, name=custom_name)

i, o = function.args
block = function.append_basic_block(name="entry")
builder = ir.IRBuilder(block)
ival = builder.load(i)
ival = builder.add(ival, ival.type(1))
with pnlvm.helpers.array_ptr_loop(builder, o, "broadcast") as (b, i):
out_ptr = builder.gep(o, [ctx.int32_ty(0), i])
builder.store(ival, out_ptr)
builder.ret_void()

binf = pnlvm.LLVMBinaryFunction.get(custom_name)
res = np.zeros(8, dtype=val.dtype)

if mode == 'CPU':
ct_res = np.ctypeslib.as_ctypes(res)
ct_in = np.ctypeslib.as_ctypes(val)

binf(ctypes.byref(ct_in), ctypes.byref(ct_res))
else:
binf.cuda_wrap_call(np.asarray(val), res)

assert all(res == np.broadcast_to(val + 1, 8))
17 changes: 17 additions & 0 deletions tests/mechanisms/test_ddm_mechanism.py
Original file line number Diff line number Diff line change
Expand Up @@ -793,3 +793,20 @@ def test_sequence_of_DDM_mechs_in_Composition_Pathway():
# if you do not specify, assert_allcose will use a relative tolerance of 1e-07,
# which WILL FAIL unless you gather higher precision values to use as reference
np.testing.assert_allclose(val, expected, atol=1e-08, err_msg='Failed on expected_output[{0}]'.format(i))


@pytest.mark.mechanism
@pytest.mark.ddm_mechanism
@pytest.mark.parametrize('mode', ['Python',
pytest.param('LLVM', marks=pytest.mark.llvm),
pytest.param('LLVMExec', marks=pytest.mark.llvm),
pytest.param('LLVMRun', marks=pytest.mark.llvm),
pytest.param('PTXExec', marks=[pytest.mark.llvm, pytest.mark.cuda]),
pytest.param('PTXRun', marks=[pytest.mark.llvm, pytest.mark.cuda])])
def test_DDMMechanism_LCA_equivalent(mode):
ddm = DDM(default_variable=[0], function=DriftDiffusionIntegrator(rate=1, time_step_size=0.1))
comp2 = Composition()
comp2.add_node(ddm)
result2 = comp2.run(inputs={ddm:[1]}, bin_execute=mode)
assert np.allclose(np.asfarray(result2[0]), [0.1])
assert np.allclose(np.asfarray(result2[1]), [0.1])
17 changes: 17 additions & 0 deletions tests/mechanisms/test_lca.py
Original file line number Diff line number Diff line change
Expand Up @@ -285,6 +285,23 @@ def test_equivalance_of_threshold_and_termination_specifications_max_vs_next(sel
# result = comp.run(inputs={lca:[1,0]})
# assert np.allclose(result, [[0.71463572, 0.28536428]])

@pytest.mark.mechanism
@pytest.mark.lca_mechanism
@pytest.mark.parametrize('mode', ['Python',
pytest.param('LLVM', marks=pytest.mark.llvm),
pytest.param('LLVMExec', marks=pytest.mark.llvm),
pytest.param('LLVMRun', marks=pytest.mark.llvm),
pytest.param('PTXExec', marks=[pytest.mark.llvm, pytest.mark.cuda]),
pytest.param('PTXRun', marks=[pytest.mark.llvm, pytest.mark.cuda])])
def test_LCAMechanism_DDM_equivalent(self, mode):
lca = LCAMechanism(size=2, leak=0., threshold=1, auto=0, hetero=0,
initial_value=[0, 0], execute_until_finished=False)
comp1 = Composition()
comp1.add_node(lca)
result1 = comp1.run(inputs={lca:[1, -1]}, bin_execute=mode)
assert np.allclose(result1, [[0.52497918747894, 0.47502081252106]],)


class TestLCAReset:

def test_reset_run(self):
Expand Down
Loading