Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Numpy2 draft #3563

Draft
wants to merge 4 commits into
base: develop
Choose a base branch
from
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 0 additions & 3 deletions .github/workflows/build-wheels.yml
Original file line number Diff line number Diff line change
Expand Up @@ -61,19 +61,16 @@ jobs:
fail-fast: false
matrix:
include:
- {python: '3.8', os: macos-12}
- {python: '3.9', os: macos-12}
- {python: '3.10', os: macos-12}
- {python: '3.11', os: macos-12}
- {python: '3.12', os: macos-12}

- {python: '3.8', os: ubuntu-20.04}
- {python: '3.9', os: ubuntu-20.04}
- {python: '3.10', os: ubuntu-20.04}
- {python: '3.11', os: ubuntu-20.04}
- {python: '3.12', os: ubuntu-20.04}

- {python: '3.8', os: windows-2019}
- {python: '3.9', os: windows-2019}

- {python: '3.10', os: windows-2019}
Expand Down
2 changes: 0 additions & 2 deletions .github/workflows/tests.yml
Original file line number Diff line number Diff line change
Expand Up @@ -63,13 +63,11 @@ jobs:
fail-fast: false
matrix:
include:
- {python: '3.8', os: ubuntu-20.04}
- {python: '3.9', os: ubuntu-20.04}
- {python: '3.10', os: ubuntu-20.04}
- {python: '3.11', os: ubuntu-20.04}
- {python: '3.12', os: ubuntu-20.04}

- {python: '3.8', os: windows-2019}
- {python: '3.9', os: windows-2019}
- {python: '3.10', os: windows-2019}
- {python: '3.11', os: windows-2019}
Expand Down
10 changes: 5 additions & 5 deletions gensim/_matutils.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -42,7 +42,7 @@ def mean_absolute_difference(a, b):
@cython.boundscheck(False)
@cython.wraparound(False)
@cython.cdivision(True)
cdef DTYPE_t _mean_absolute_difference(DTYPE_t[:] a, DTYPE_t[:] b) nogil:
cdef DTYPE_t _mean_absolute_difference(DTYPE_t[:] a, DTYPE_t[:] b) noexcept nogil:
"""Mean absolute difference between two arrays.

Parameters
Expand Down Expand Up @@ -103,7 +103,7 @@ def logsumexp(x):
@cython.boundscheck(False)
@cython.wraparound(False)
@cython.cdivision(True)
cdef DTYPE_t _logsumexp_2d(DTYPE_t[:, :] data) nogil:
cdef DTYPE_t _logsumexp_2d(DTYPE_t[:, :] data) noexcept nogil:
"""Log of sum of exponentials.

Parameters
Expand Down Expand Up @@ -223,7 +223,7 @@ def dirichlet_expectation_1d(alpha):

@cython.boundscheck(False)
@cython.wraparound(False)
cdef void _dirichlet_expectation_1d(DTYPE_t[:] alpha, DTYPE_t[:] out) nogil:
cdef void _dirichlet_expectation_1d(DTYPE_t[:] alpha, DTYPE_t[:] out) noexcept nogil:
"""Expected value of log(theta) where theta is drawn from a Dirichlet distribution.

Parameters
Expand Down Expand Up @@ -251,7 +251,7 @@ cdef void _dirichlet_expectation_1d(DTYPE_t[:] alpha, DTYPE_t[:] out) nogil:

@cython.boundscheck(False)
@cython.wraparound(False)
cdef void _dirichlet_expectation_2d(DTYPE_t[:, :] alpha, DTYPE_t[:, :] out) nogil:
cdef void _dirichlet_expectation_2d(DTYPE_t[:, :] alpha, DTYPE_t[:, :] out) noexcept nogil:
"""Expected value of log(theta) where theta is drawn from a Dirichlet distribution.

Parameters
Expand Down Expand Up @@ -298,7 +298,7 @@ def digamma(DTYPE_t x):


@cython.cdivision(True)
cdef inline DTYPE_t _digamma(DTYPE_t x,) nogil:
cdef inline DTYPE_t _digamma(DTYPE_t x,) noexcept nogil:
"""Digamma function for positive floats.

Parameters
Expand Down
2 changes: 1 addition & 1 deletion gensim/models/doc2vec_corpusfile.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -61,7 +61,7 @@ cdef void prepare_c_structures_for_batch(
np.uint32_t *indexes, int *codelens, np.uint8_t **codes, np.uint32_t **points,
np.uint32_t *reduced_windows, int *document_len, int train_words,
int docvecs_count, int doc_tag, int shrink_windows,
) nogil:
) noexcept nogil:
cdef VocabItem predict_word
cdef string token
cdef int i = 0
Expand Down
12 changes: 6 additions & 6 deletions gensim/models/doc2vec_inner.pxd
Original file line number Diff line number Diff line change
Expand Up @@ -57,39 +57,39 @@ cdef void fast_document_dbow_hs(
const np.uint32_t *word_point, const np.uint8_t *word_code, const int codelen,
REAL_t *context_vectors, REAL_t *syn1, const int size,
const np.uint32_t context_index, const REAL_t alpha, REAL_t *work, int learn_context, int learn_hidden,
REAL_t *contexts_lockf, const np.uint32_t contexts_lockf_len) nogil
REAL_t *contexts_lockf, const np.uint32_t contexts_lockf_len) noexcept nogil


cdef unsigned long long fast_document_dbow_neg(
const int negative, np.uint32_t *cum_table, unsigned long long cum_table_len,
REAL_t *context_vectors, REAL_t *syn1neg, const int size, const np.uint32_t word_index,
const np.uint32_t context_index, const REAL_t alpha, REAL_t *work,
unsigned long long next_random, int learn_context, int learn_hidden, REAL_t *contexts_lockf,
const np.uint32_t contexts_lockf_len) nogil
const np.uint32_t contexts_lockf_len) noexcept nogil


cdef void fast_document_dm_hs(
const np.uint32_t *word_point, const np.uint8_t *word_code, int word_code_len,
REAL_t *neu1, REAL_t *syn1, const REAL_t alpha, REAL_t *work,
const int size, int learn_hidden) nogil
const int size, int learn_hidden) noexcept nogil


cdef unsigned long long fast_document_dm_neg(
const int negative, np.uint32_t *cum_table, unsigned long long cum_table_len, unsigned long long next_random,
REAL_t *neu1, REAL_t *syn1neg, const int predict_word_index, const REAL_t alpha, REAL_t *work,
const int size, int learn_hidden) nogil
const int size, int learn_hidden) noexcept nogil


cdef void fast_document_dmc_hs(
const np.uint32_t *word_point, const np.uint8_t *word_code, int word_code_len,
REAL_t *neu1, REAL_t *syn1, const REAL_t alpha, REAL_t *work,
const int layer1_size, const int vector_size, int learn_hidden) nogil
const int layer1_size, const int vector_size, int learn_hidden) noexcept nogil


cdef unsigned long long fast_document_dmc_neg(
const int negative, np.uint32_t *cum_table, unsigned long long cum_table_len, unsigned long long next_random,
REAL_t *neu1, REAL_t *syn1neg, const int predict_word_index, const REAL_t alpha, REAL_t *work,
const int layer1_size, const int vector_size, int learn_hidden) nogil
const int layer1_size, const int vector_size, int learn_hidden) noexcept nogil


cdef init_d2v_config(Doc2VecConfig *c, model, alpha, learn_doctags, learn_words, learn_hidden, train_words=*, work=*,
Expand Down
12 changes: 6 additions & 6 deletions gensim/models/doc2vec_inner.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -39,7 +39,7 @@ cdef void fast_document_dbow_hs(
const np.uint32_t *word_point, const np.uint8_t *word_code, const int codelen,
REAL_t *context_vectors, REAL_t *syn1, const int size,
const np.uint32_t context_index, const REAL_t alpha, REAL_t *work, int learn_context, int learn_hidden,
REAL_t *contexts_lockf, const np.uint32_t contexts_lockf_len) nogil:
REAL_t *contexts_lockf, const np.uint32_t contexts_lockf_len) noexcept nogil:

cdef long long a, b
cdef long long row1 = context_index * size, row2
Expand All @@ -66,7 +66,7 @@ cdef unsigned long long fast_document_dbow_neg(
REAL_t *context_vectors, REAL_t *syn1neg, const int size, const np.uint32_t word_index,
const np.uint32_t context_index, const REAL_t alpha, REAL_t *work,
unsigned long long next_random, int learn_context, int learn_hidden, REAL_t *contexts_lockf,
const np.uint32_t contexts_lockf_len) nogil:
const np.uint32_t contexts_lockf_len) noexcept nogil:

cdef long long a
cdef long long row1 = context_index * size, row2
Expand Down Expand Up @@ -106,7 +106,7 @@ cdef unsigned long long fast_document_dbow_neg(
cdef void fast_document_dm_hs(
const np.uint32_t *word_point, const np.uint8_t *word_code, int word_code_len,
REAL_t *neu1, REAL_t *syn1, const REAL_t alpha, REAL_t *work,
const int size, int learn_hidden) nogil:
const int size, int learn_hidden) noexcept nogil:

cdef long long b
cdef long long row2
Expand All @@ -129,7 +129,7 @@ cdef void fast_document_dm_hs(
cdef unsigned long long fast_document_dm_neg(
const int negative, np.uint32_t *cum_table, unsigned long long cum_table_len, unsigned long long next_random,
REAL_t *neu1, REAL_t *syn1neg, const int predict_word_index, const REAL_t alpha, REAL_t *work,
const int size, int learn_hidden) nogil:
const int size, int learn_hidden) noexcept nogil:

cdef long long row2
cdef unsigned long long modulo = 281474976710655ULL
Expand Down Expand Up @@ -165,7 +165,7 @@ cdef unsigned long long fast_document_dm_neg(
cdef void fast_document_dmc_hs(
const np.uint32_t *word_point, const np.uint8_t *word_code, int word_code_len,
REAL_t *neu1, REAL_t *syn1, const REAL_t alpha, REAL_t *work,
const int layer1_size, const int vector_size, int learn_hidden) nogil:
const int layer1_size, const int vector_size, int learn_hidden) noexcept nogil:

cdef long long a, b
cdef long long row2
Expand All @@ -189,7 +189,7 @@ cdef void fast_document_dmc_hs(
cdef unsigned long long fast_document_dmc_neg(
const int negative, np.uint32_t *cum_table, unsigned long long cum_table_len, unsigned long long next_random,
REAL_t *neu1, REAL_t *syn1neg, const int predict_word_index, const REAL_t alpha, REAL_t *work,
const int layer1_size, const int vector_size, int learn_hidden) nogil:
const int layer1_size, const int vector_size, int learn_hidden) noexcept nogil:

cdef long long a
cdef long long row2
Expand Down
2 changes: 1 addition & 1 deletion gensim/models/fasttext_corpusfile.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -48,7 +48,7 @@ cdef void prepare_c_structures_for_batch(
int *effective_words, int *effective_sentences, unsigned long long *next_random, cvocab_t *vocab,
int *sentence_idx, np.uint32_t *indexes, int *codelens, np.uint8_t **codes, np.uint32_t **points,
np.uint32_t *reduced_windows, int *subwords_idx_len, np.uint32_t **subwords_idx, int shrink_windows,
) nogil:
) noexcept nogil:
cdef VocabItem word
cdef string token
cdef vector[string] sent
Expand Down
10 changes: 5 additions & 5 deletions gensim/models/fasttext_inner.pxd
Original file line number Diff line number Diff line change
Expand Up @@ -135,16 +135,16 @@ cdef void init_ft_config(FastTextConfig *c, model, alpha, _work, _neu1)
cdef object populate_ft_config(FastTextConfig *c, vocab, buckets_word, sentences)


cdef void fasttext_fast_sentence_sg_neg(FastTextConfig *c, int i, int j) nogil
cdef void fasttext_fast_sentence_sg_neg(FastTextConfig *c, int i, int j) noexcept nogil


cdef void fasttext_fast_sentence_sg_hs(FastTextConfig *c, int i, int j) nogil
cdef void fasttext_fast_sentence_sg_hs(FastTextConfig *c, int i, int j) noexcept nogil


cdef void fasttext_fast_sentence_cbow_neg(FastTextConfig *c, int i, int j, int k) nogil
cdef void fasttext_fast_sentence_cbow_neg(FastTextConfig *c, int i, int j, int k) noexcept nogil


cdef void fasttext_fast_sentence_cbow_hs(FastTextConfig *c, int i, int j, int k) nogil
cdef void fasttext_fast_sentence_cbow_hs(FastTextConfig *c, int i, int j, int k) noexcept nogil


cdef void fasttext_train_any(FastTextConfig *c, int num_sentences) nogil
cdef void fasttext_train_any(FastTextConfig *c, int num_sentences) noexcept nogil
10 changes: 5 additions & 5 deletions gensim/models/fasttext_inner.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -72,7 +72,7 @@ cdef int ONE = 1
cdef REAL_t ONEF = <REAL_t>1.0


cdef void fasttext_fast_sentence_sg_neg(FastTextConfig *c, int i, int j) nogil:
cdef void fasttext_fast_sentence_sg_neg(FastTextConfig *c, int i, int j) noexcept nogil:
"""Perform skipgram training with negative sampling.

Parameters
Expand Down Expand Up @@ -145,7 +145,7 @@ cdef void fasttext_fast_sentence_sg_neg(FastTextConfig *c, int i, int j) nogil:
c.work, &ONE, &c.syn0_ngrams[subwords_index[d]*c.size], &ONE)


cdef void fasttext_fast_sentence_sg_hs(FastTextConfig *c, int i, int j) nogil:
cdef void fasttext_fast_sentence_sg_hs(FastTextConfig *c, int i, int j) noexcept nogil:
"""Perform skipgram training with hierarchical sampling.

Parameters
Expand Down Expand Up @@ -221,7 +221,7 @@ cdef void fasttext_fast_sentence_sg_hs(FastTextConfig *c, int i, int j) nogil:
&c.syn0_ngrams[row2], &ONE)


cdef void fasttext_fast_sentence_cbow_neg(FastTextConfig *c, int i, int j, int k) nogil:
cdef void fasttext_fast_sentence_cbow_neg(FastTextConfig *c, int i, int j, int k) noexcept nogil:
"""Perform CBOW training with negative sampling.

Parameters
Expand Down Expand Up @@ -306,7 +306,7 @@ cdef void fasttext_fast_sentence_cbow_neg(FastTextConfig *c, int i, int j, int k
&c.syn0_ngrams[c.subwords_idx[m][d]*c.size], &ONE)


cdef void fasttext_fast_sentence_cbow_hs(FastTextConfig *c, int i, int j, int k) nogil:
cdef void fasttext_fast_sentence_cbow_hs(FastTextConfig *c, int i, int j, int k) noexcept nogil:
"""Perform CBOW training with hierarchical sampling.

Parameters
Expand Down Expand Up @@ -510,7 +510,7 @@ cdef object populate_ft_config(FastTextConfig *c, wv, buckets_word, sentences):
return effective_words, effective_sentences


cdef void fasttext_train_any(FastTextConfig *c, int num_sentences) nogil:
cdef void fasttext_train_any(FastTextConfig *c, int num_sentences) noexcept nogil:
"""Performs training on a fully initialized and populated configuration.

Parameters
Expand Down
2 changes: 1 addition & 1 deletion gensim/models/keyedvectors.py
Original file line number Diff line number Diff line change
Expand Up @@ -1667,7 +1667,7 @@ def save_word2vec_format(
if binary:
fout.write(f"{prefix}{key} ".encode('utf8') + key_vector.astype(REAL).tobytes())
else:
fout.write(f"{prefix}{key} {' '.join(repr(val) for val in key_vector)}\n".encode('utf8'))
fout.write(f"{prefix}{key} {' '.join(val.astype('str') for val in key_vector)}\n".encode('utf8'))

@classmethod
def load_word2vec_format(
Expand Down
4 changes: 2 additions & 2 deletions gensim/models/nmf_pgd.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -10,10 +10,10 @@
from libc.math cimport sqrt
from cython.parallel import prange

cdef double fmin(double x, double y) nogil:
cdef double fmin(double x, double y) noexcept nogil:
return x if x < y else y

cdef double fmax(double x, double y) nogil:
cdef double fmax(double x, double y) noexcept nogil:
return x if x > y else y

def solve_h(double[:, ::1] h, double[:, :] Wtv, double[:, ::1] WtW, int[::1] permutation, double kappa):
Expand Down
24 changes: 12 additions & 12 deletions gensim/models/word2vec_corpusfile.pxd
Original file line number Diff line number Diff line change
Expand Up @@ -26,9 +26,9 @@ cdef extern from "fast_line_sentence.h":
cdef cppclass FastLineSentence:
FastLineSentence() except +
FastLineSentence(string&, size_t) except +
vector[string] ReadSentence() nogil except +
bool_t IsEof() nogil
void Reset() nogil
vector[string] ReadSentence() except + nogil
bool_t IsEof() noexcept nogil
void Reset() noexcept nogil


cdef class CythonLineSentence:
Expand All @@ -37,12 +37,12 @@ cdef class CythonLineSentence:
cdef public size_t max_sentence_length, max_words_in_batch, offset
cdef vector[vector[string]] buf_data

cpdef bool_t is_eof(self) nogil
cpdef vector[string] read_sentence(self) nogil except *
cpdef vector[vector[string]] _read_chunked_sentence(self) nogil except *
cpdef vector[vector[string]] _chunk_sentence(self, vector[string] sent) nogil
cpdef void reset(self) nogil
cpdef vector[vector[string]] next_batch(self) nogil except *
cpdef bool_t is_eof(self) noexcept nogil
cpdef vector[string] read_sentence(self) except * nogil
cpdef vector[vector[string]] _read_chunked_sentence(self) except * nogil
cpdef vector[vector[string]] _chunk_sentence(self, vector[string] sent) noexcept nogil
cpdef void reset(self) noexcept nogil
cpdef vector[vector[string]] next_batch(self) except * nogil


cdef struct VocabItem:
Expand All @@ -62,9 +62,9 @@ ctypedef unordered_map[string, VocabItem] cvocab_t
cdef class CythonVocab:
cdef cvocab_t vocab
cdef subword_arrays
cdef cvocab_t* get_vocab_ptr(self) nogil except *
cdef cvocab_t* get_vocab_ptr(self) except * nogil


cdef REAL_t get_alpha(REAL_t alpha, REAL_t end_alpha, int cur_epoch, int num_epochs) nogil
cdef REAL_t get_alpha(REAL_t alpha, REAL_t end_alpha, int cur_epoch, int num_epochs) noexcept nogil
cdef REAL_t get_next_alpha(REAL_t start_alpha, REAL_t end_alpha, long long total_examples, long long total_words,
long long expected_examples, long long expected_words, int cur_epoch, int num_epochs) nogil
long long expected_examples, long long expected_words, int cur_epoch, int num_epochs) noexcept nogil
18 changes: 9 additions & 9 deletions gensim/models/word2vec_corpusfile.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -62,7 +62,7 @@ cdef class CythonVocab:

self.vocab[token] = word

cdef cvocab_t* get_vocab_ptr(self) nogil except *:
cdef cvocab_t* get_vocab_ptr(self) except * nogil:
return &self.vocab


Expand Down Expand Up @@ -92,17 +92,17 @@ cdef class CythonLineSentence:
if self._thisptr != NULL:
del self._thisptr

cpdef bool_t is_eof(self) nogil:
cpdef bool_t is_eof(self) noexcept nogil:
return self._thisptr.IsEof()

cpdef vector[string] read_sentence(self) nogil except *:
cpdef vector[string] read_sentence(self) except * nogil:
return self._thisptr.ReadSentence()

cpdef vector[vector[string]] _read_chunked_sentence(self) nogil except *:
cpdef vector[vector[string]] _read_chunked_sentence(self) except * nogil:
cdef vector[string] sent = self.read_sentence()
return self._chunk_sentence(sent)

cpdef vector[vector[string]] _chunk_sentence(self, vector[string] sent) nogil:
cpdef vector[vector[string]] _chunk_sentence(self, vector[string] sent) noexcept nogil:
cdef vector[vector[string]] res
cdef vector[string] chunk
cdef size_t cur_idx = 0
Expand All @@ -120,7 +120,7 @@ cdef class CythonLineSentence:

return res

cpdef void reset(self) nogil:
cpdef void reset(self) noexcept nogil:
self._thisptr.Reset()

def __iter__(self):
Expand All @@ -135,7 +135,7 @@ cdef class CythonLineSentence:
# This function helps pickle to correctly serialize objects of this class.
return rebuild_cython_line_sentence, (self.source, self.max_sentence_length)

cpdef vector[vector[string]] next_batch(self) nogil except *:
cpdef vector[vector[string]] next_batch(self) except * nogil:
cdef:
vector[vector[string]] job_batch
vector[vector[string]] chunked_sentence
Expand Down Expand Up @@ -235,13 +235,13 @@ cdef void prepare_c_structures_for_batch(
reduced_windows[i] = 0


cdef REAL_t get_alpha(REAL_t alpha, REAL_t end_alpha, int cur_epoch, int num_epochs) nogil:
cdef REAL_t get_alpha(REAL_t alpha, REAL_t end_alpha, int cur_epoch, int num_epochs) noexcept nogil:
return alpha - ((alpha - end_alpha) * (<REAL_t> cur_epoch) / num_epochs)


cdef REAL_t get_next_alpha(
REAL_t start_alpha, REAL_t end_alpha, long long total_examples, long long total_words,
long long expected_examples, long long expected_words, int cur_epoch, int num_epochs) nogil:
long long expected_examples, long long expected_words, int cur_epoch, int num_epochs) noexcept nogil:
cdef REAL_t epoch_progress

if expected_examples != -1:
Expand Down
Loading
Loading