Skip to content

ENH speed up grad computation via Cython #71

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 3 commits into from
Apr 9, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 3 additions & 3 deletions fastcan/_cancorr_fast.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@ from cython.parallel import prange
from scipy.linalg.cython_blas cimport isamax, idamax
from sklearn.utils._cython_blas cimport ColMajor, NoTrans
from sklearn.utils._cython_blas cimport _dot, _scal, _nrm2, _gemm, _axpy
from sklearn.utils._typedefs cimport int32_t, uint8_t
from sklearn.utils._typedefs cimport uint8_t


@final
Expand Down Expand Up @@ -106,7 +106,7 @@ cdef floating _sscvm(
cdef void _mgsvv(
const floating* w, # IN
const floating* x, # IN/OUT
int n_samples, # IN
int n_samples, # IN
) noexcept nogil:
"""
Modified Gram-Schmidt process. x = x - w*w.T*x
Expand All @@ -133,7 +133,7 @@ cpdef int _forward_search(
int num_threads, # IN
int verbose, # IN
uint8_t[::1] mask, # IN/TEMP
int32_t[::1] indices, # OUT
int[::1] indices, # OUT
floating[::1] scores, # OUT
) except -1 nogil:
"""
Expand Down
2 changes: 1 addition & 1 deletion fastcan/_fastcan.py
Original file line number Diff line number Diff line change
Expand Up @@ -293,7 +293,7 @@ def _get_support_mask(self):

def _prepare_search(n_features, n_features_to_select, indices_include, indices_exclude):
# initiated with -1
indices = np.full(n_features_to_select, -1, dtype=np.intc, order="F")
indices = np.full(n_features_to_select, -1, dtype=np.int32, order="F")
indices[: indices_include.size] = indices_include
scores = np.zeros(n_features_to_select, dtype=float, order="F")
mask = np.zeros(n_features, dtype=np.ubyte, order="F")
Expand Down
113 changes: 113 additions & 0 deletions fastcan/_narx_fast.pyx
Original file line number Diff line number Diff line change
@@ -0,0 +1,113 @@
"""
Fast gradient computation for narx
"""
# Authors: The fastcan developers
# SPDX-License-Identifier: MIT

from cython cimport floating, final


@final
cpdef void _update_terms(
const floating[:, ::1] X, # IN
const floating[:, ::1] y_hat, # IN
floating[::1] terms, # OUT
const int[:, ::1] feat_ids, # IN
const int[:, ::1] delay_ids, # IN
const int k, # IN
) noexcept nogil:
"""
Evaluate all terms for the given features and delays at timestep k.
"""
cdef:
int i
int n_coefs = feat_ids.shape[0]

for i in range(n_coefs):
terms[i] = _evaluate_term(
X, y_hat, feat_ids[i], delay_ids[i], k
)


@final
cpdef void _predict_step(
const floating[:, ::1] X, # IN
const floating[:, ::1] y_hat, # IN
floating[::1] y_pred, # OUT
const floating[::1] coef, # IN
const int[:, ::1] feat_ids, # IN
const int[:, ::1] delay_ids, # IN
const int[::1] output_ids, # IN
const int k, # IN
) noexcept nogil:
"""
Evaluate the expression for all outputs at timestep k.
"""
cdef:
int n_terms = feat_ids.shape[0]
int i, output_i

# Add all terms
for i in range(n_terms):
output_i = output_ids[i]
y_pred[output_i] += coef[i] * _evaluate_term(
X, y_hat, feat_ids[i], delay_ids[i], k
)


@final
cdef floating _evaluate_term(
const floating[:, ::1] X, # IN
const floating[:, ::1] y_hat, # IN
const int[::1] feat_ids, # IN
const int[::1] delay_ids, # IN
const int k, # IN
) noexcept nogil:
"""
Evaluate a term based on feature and delay IDs.
"""
cdef:
int n_feats = X.shape[1]
int n_vars = feat_ids.shape[0]
floating term = 1.0
int i, feat_id

for i in range(n_vars):
feat_id = feat_ids[i]
if feat_id != -1:
if feat_id < n_feats:
term *= X[k - delay_ids[i], feat_id]
else:
term *= y_hat[k - delay_ids[i], feat_id - n_feats]

return term


@final
cpdef void _update_cfd(
const floating[:, ::1] X, # IN
const floating[:, ::1] y_hat, # IN
floating[:, :, ::1] cfd, # OUT
const floating[::1] coef, # IN
const int[:, ::1] grad_yyd_ids, # IN
const int[::1] grad_coef_ids, # IN
const int[:, ::1] grad_feat_ids, # IN
const int[:, ::1] grad_delay_ids, # IN
const int k, # IN
) noexcept nogil:
"""
Updates CFD matrix based on the current state.
"""
cdef:
int n_grad_terms = grad_yyd_ids.shape[0]
int i, row_y_id, col_y_id, delay_id_1

for i in range(n_grad_terms):
row_y_id = grad_yyd_ids[i, 0]
col_y_id = grad_yyd_ids[i, 1]
delay_id_1 = grad_yyd_ids[i, 2]

cfd[row_y_id, col_y_id, delay_id_1] += coef[grad_coef_ids[i]] * \
_evaluate_term(
X, y_hat, grad_feat_ids[i], grad_delay_ids[i], k
)
8 changes: 8 additions & 0 deletions fastcan/meson.build
Original file line number Diff line number Diff line change
Expand Up @@ -13,4 +13,12 @@ py.extension_module(
subdir: 'fastcan',
cython_args: cython_args,
install: true
)

py.extension_module(
'_narx_fast',
'_narx_fast.pyx',
subdir: 'fastcan',
cython_args: cython_args,
install: true
)
Loading
Loading