Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Remove legacy random code #351

Merged
merged 25 commits into from
Dec 13, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
25 commits
Select commit Hold shift + click to select a range
6b72918
update als docs for rng
mdekstrand Dec 13, 2023
a2edf68
drop use of deprecated util.rng
mdekstrand Dec 13, 2023
69ea2fe
use RNG for single-partition and over-sized samples (closes #327)
mdekstrand Dec 13, 2023
292ce6d
fix conftest for numpy_rng
mdekstrand Dec 13, 2023
56dd47c
switch als to seedbank
mdekstrand Dec 13, 2023
56c8ba3
drop unused import
mdekstrand Dec 13, 2023
d981b8d
update funksvd to seedbank
mdekstrand Dec 13, 2023
c320170
drop unused import
mdekstrand Dec 13, 2023
b138e65
update ALS tests to no longer use util.rng
mdekstrand Dec 13, 2023
5cba770
use initialize for conftest
mdekstrand Dec 13, 2023
2e3cc27
use seedbank in parallel worker init
mdekstrand Dec 13, 2023
f122eb4
conftest: clean up unused imports
mdekstrand Dec 13, 2023
47d380f
drop codecarbon
mdekstrand Dec 13, 2023
a43a85c
clean up test warnings
mdekstrand Dec 13, 2023
d86181c
clean up util random imports
mdekstrand Dec 13, 2023
c1cb84a
clean out unused random functions
mdekstrand Dec 13, 2023
5ce53b8
fix paralle test imports
mdekstrand Dec 13, 2023
88d0f86
drop util tests in favor seedbank
mdekstrand Dec 13, 2023
2b67329
fix test parallel import
mdekstrand Dec 13, 2023
31a1c83
clean up legacy RNG usage
mdekstrand Dec 13, 2023
3b965a8
remove duplicate export
mdekstrand Dec 13, 2023
5604b5b
remove obsolete exports
mdekstrand Dec 13, 2023
a459316
make a test use the two-arg random pathway
mdekstrand Dec 13, 2023
681907b
clean up unused imports
mdekstrand Dec 13, 2023
572cdbd
fix placket-luce rank test
mdekstrand Dec 13, 2023
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
48 changes: 12 additions & 36 deletions conftest.py
Original file line number Diff line number Diff line change
@@ -1,59 +1,35 @@
import logging
from pytest import fixture

import numpy as np
from seedbank import numpy_rng, initialize

from lenskit import util
logging.getLogger("numba").setLevel(logging.INFO)

logging.getLogger('numba').setLevel(logging.INFO)

_log = logging.getLogger('lenskit.tests')
_log = logging.getLogger("lenskit.tests")


@fixture
def rng():
return util.rng(42)


@fixture
def legacy_rng():
return util.rng(42, legacy_rng=True)
return numpy_rng(42)


@fixture(autouse=True)
def init_rng(request):
util.init_rng(42)
initialize(42)


@fixture(autouse=True)
def log_test(request):
modname = request.module.__name__ if request.module else '<unknown>'
funcname = request.function.__name__ if request.function else '<unknown>'
_log.info('running test %s:%s', modname, funcname)


@fixture(autouse=True, scope='session')
def carbon(request):
try:
from codecarbon import EmissionsTracker
except ImportError:
yield True # we do nothing
return

tracker = EmissionsTracker("lkpy-tests", 5)
tracker.start()
try:
yield True
finally:
emissions = tracker.stop()
_log.info('test suite used %.3f kgCO2eq', emissions)
modname = request.module.__name__ if request.module else "<unknown>"
funcname = request.function.__name__ if request.function else "<unknown>"
_log.info("running test %s:%s", modname, funcname)


def pytest_collection_modifyitems(items):
# add 'slow' to all 'eval' tests
for item in items:
evm = item.get_closest_marker('eval')
slm = item.get_closest_marker('slow')
evm = item.get_closest_marker("eval")
slm = item.get_closest_marker("slow")
if evm is not None and slm is None:
_log.debug('adding slow mark to %s', item)
item.add_marker('slow')
_log.debug("adding slow mark to %s", item)
item.add_marker("slow")
8 changes: 4 additions & 4 deletions lenskit/algorithms/als.py
Original file line number Diff line number Diff line change
@@ -1,11 +1,11 @@
import logging
from collections import namedtuple
import warnings

import numpy as np
from numba import njit, prange

from csr import CSR
from seedbank import numpy_rng

from .bias import Bias
from .mf_common import MFPredictor
Expand Down Expand Up @@ -337,7 +337,7 @@ class BiasedMF(MFPredictor):
damping ``damping``.
method(str): the solver to use (see above).
rng_spec:
Random number generator or state (see :func:`lenskit.util.random.rng`).
Random number generator or state (see :func:`seedbank.numpy_rng`).
progress: a :func:`tqdm.tqdm`-compatible progress bar function
"""

Expand Down Expand Up @@ -366,7 +366,7 @@ def __init__(
else:
self.bias = bias
self.progress = progress if progress is not None else util.no_progress
self.rng = util.rng(rng_spec)
self.rng = numpy_rng(rng_spec)
self.save_user_features = save_user_features

def fit(self, ratings, **kwargs):
Expand Down Expand Up @@ -600,7 +600,7 @@ def __init__(
self.weight = weight
self.use_ratings = use_ratings
self.method = method
self.rng = util.rng(rng_spec)
self.rng = numpy_rng(rng_spec)
self.progress = progress if progress is not None else util.no_progress
self.save_user_features = save_user_features

Expand Down
2 changes: 1 addition & 1 deletion lenskit/algorithms/basic.py
Original file line number Diff line number Diff line change
Expand Up @@ -287,7 +287,7 @@ def __init__(self, selector=None, rng_spec=None):
else:
self.selector = UnratedItemCandidateSelector()
# Get a Pandas-compatible RNG
self.rng_source = derivable_rng(rng_spec, legacy=True)
self.rng_source = derivable_rng(rng_spec)
self.items = None

def fit(self, ratings, **kwargs):
Expand Down
4 changes: 2 additions & 2 deletions lenskit/algorithms/funksvd.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@
import pandas as pd
import numpy as np
import numba as n
from pandas.core.series import Series
from seedbank import numpy_rng

try:
from numba.experimental import jitclass
Expand Down Expand Up @@ -242,7 +242,7 @@ def __init__(
self.bias = Bias(damping=damping)
else:
self.bias = bias
self.random = util.rng(random_state)
self.random = numpy_rng(random_state)

def fit(self, ratings, **kwargs):
"""
Expand Down
27 changes: 13 additions & 14 deletions lenskit/crossfold.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@

import numpy as np
import pandas as pd
from . import util
from seedbank import numpy_rng

TTPair = namedtuple("TTPair", ["train", "test"])
TTPair.__doc__ = "Train-test pair (named tuple)."
Expand All @@ -29,7 +29,7 @@ def partition_rows(data, partitions, *, rng_spec=None):
partitions(int):
The number of partitions to produce.
rng_spec:
The random number generator or seed (see :py:func:`lenskit.util.rng`).
The random number generator or seed (see :py:func:`seedbank.numpy_rng`).

Returns:
iterator: an iterator of train-test pairs
Expand All @@ -41,7 +41,7 @@ def partition_rows(data, partitions, *, rng_spec=None):
# create an array of indexes
rows = np.arange(len(data))
# shuffle the indices & split into partitions
rng = util.rng(rng_spec)
rng = numpy_rng(rng_spec)
rng.shuffle(rows)
test_sets = np.array_split(rows, partitions)

Expand Down Expand Up @@ -90,15 +90,16 @@ def sample_rows(data, partitions, size, disjoint=True, *, rng_spec=None):
disjoint(bool):
If ``True``, force samples to be disjoint.
rng_spec:
The random number generator or seed (see :py:func:`lenskit.util.rng`).
The random number generator or seed (see :py:func:`seedbank.numpy_rng`).

Returns:
iterator: An iterator of train-test pairs.
"""

confirm_unique_index(data)
rng = numpy_rng(rng_spec)
if partitions is None:
test = data.sample(n=size)
test = data.sample(n=size, random_state=rng)
tr_mask = pd.Series(True, index=data.index)
tr_mask.loc[test.index] = False
train = data[tr_mask]
Expand All @@ -111,13 +112,11 @@ def sample_rows(data, partitions, size, disjoint=True, *, rng_spec=None):
size,
len(data),
)
return partition_rows(data, partitions)
return partition_rows(data, partitions, rng_spec=rng)

# create an array of indexes
rows = np.arange(len(data))

rng = util.rng(rng_spec)

if disjoint:
_logger.info("creating %d disjoint samples of size %d", partitions, size)
ips = _disjoint_sample(rows, partitions, size, rng)
Expand Down Expand Up @@ -181,7 +180,7 @@ class SampleN(PartitionMethod):

def __init__(self, n, rng_spec=None):
self.n = n
self.rng = util.rng(rng_spec, legacy=True)
self.rng = numpy_rng(rng_spec)

def __call__(self, udf):
return udf.sample(n=self.n, random_state=self.rng)
Expand All @@ -197,7 +196,7 @@ class SampleFrac(PartitionMethod):

def __init__(self, frac, rng_spec=None):
self.fraction = frac
self.rng = util.rng(rng_spec, legacy=True)
self.rng = numpy_rng(rng_spec)

def __call__(self, udf):
return udf.sample(frac=self.fraction, random_state=self.rng)
Expand Down Expand Up @@ -247,7 +246,7 @@ def partition_users(data, partitions: int, method: PartitionMethod, *, rng_spec=
data(pandas.DataFrame): a data frame containing ratings or other data you wish to partition.
partitions(int): the number of partitions to produce
method(PartitionMethod): The method for selecting test rows for each user.
rng_spec: The random number generator or seed (see :py:func:`lenskit.util.rng`).
rng_spec: The random number generator or seed (see :py:func:`seedbank.numpy_rng`).

Returns
iterator: an iterator of train-test pairs
Expand All @@ -263,7 +262,7 @@ def partition_users(data, partitions: int, method: PartitionMethod, *, rng_spec=
# create an array of indexes into user row
rows = np.arange(len(users))
# shuffle the indices & split into partitions
rng = util.rng(rng_spec, legacy=True)
rng = numpy_rng(rng_spec)
rng.shuffle(rows)
test_sets = np.array_split(rows, partitions)

Expand Down Expand Up @@ -303,14 +302,14 @@ def sample_users(
method(PartitionMethod):
The method for obtaining user test ratings.
rng_spec:
The random number generator or seed (see :py:func:`lenskit.util.rng`).
The random number generator or seed (see :py:func:`seedbank.numpy_rng`).

Returns:
iterator: An iterator of train-test pairs (as :class:`TTPair` objects).
"""

confirm_unique_index(data)
rng = util.rng(rng_spec, legacy=True)
rng = numpy_rng(rng_spec)

user_col = data["user"]
users = user_col.unique()
Expand Down
4 changes: 1 addition & 3 deletions lenskit/util/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@
from ..algorithms import Algorithm
from .log import log_to_notebook, log_to_stderr # noqa: F401
from .timing import Stopwatch # noqa: F401
from .random import rng, init_rng, derivable_rng # noqa: F401
from .random import derivable_rng
from .parallel import proc_count # noqa: F401

try:
Expand All @@ -24,8 +24,6 @@
"log_to_notebook",
"Stopwatch",
"read_df_detect",
"rng",
"init_rng",
"derivable_rng",
"proc_count",
"clone",
Expand Down
14 changes: 9 additions & 5 deletions lenskit/util/parallel.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,9 +14,10 @@
import pickle
from threadpoolctl import threadpool_limits

import seedbank

from lenskit.sharing import persist, PersistedModel
from lenskit.util.log import log_queue
from lenskit.util.random import derive_seed, init_rng, get_root_seed

_log = logging.getLogger(__name__)
__work_model = None
Expand Down Expand Up @@ -91,7 +92,7 @@ def _initialize_worker(log_queue, seed):
__is_worker = True
faulthandler.enable()
if seed is not None:
init_rng(seed)
seedbank.initialize(seed)
if log_queue is not None:
h = logging.handlers.QueueHandler(log_queue)
root = logging.getLogger()
Expand All @@ -101,7 +102,7 @@ def _initialize_worker(log_queue, seed):


def _initialize_mp_worker(mkey, func, threads, log_queue, seed):
seed = derive_seed(mp.current_process().name, base=seed)
seed = seedbank.derive_seed(mp.current_process().name, base=seed)
_initialize_worker(log_queue, seed)
global __work_model, __work_func

Expand Down Expand Up @@ -183,7 +184,7 @@ def run_sp(func, *args, **kwargs):
"""
ctx = LKContext.INSTANCE
rq = ctx.SimpleQueue()
seed = derive_seed()
seed = seedbank.derive_seed()
worker_args = (log_queue(), seed, rq, func, args, kwargs)
_log.debug("spawning subprocess to run %s", func)
proc = ctx.Process(target=_sp_worker, args=worker_args)
Expand Down Expand Up @@ -300,7 +301,10 @@ def __init__(self, model, func, n_jobs, persist_method):
os.environ["_LK_IN_MP"] = "yes"
kid_tc = proc_count(level=1)
self.executor = ProcessPoolExecutor(
n_jobs, ctx, _initialize_mp_worker, (key, func, kid_tc, log_queue(), get_root_seed())
n_jobs,
ctx,
_initialize_mp_worker,
(key, func, kid_tc, log_queue(), seedbank.root_seed()),
)

def map(self, *iterables):
Expand Down
Loading
Loading