I’m frequently running into problems where I run a for-loop and subset an array. Here’s an example of the kind of calculation I’m doing.
import numpy as np
def get_betas(x, y):
# Calculate parts of OLS formula
xy = x.T @ y
xx = x.T @ x
xx_inv = np.linalg.inv(xx)
# Calculate beta
beta = xx_inv @ xy
return beta
def calc_oos_error(x, y, k):
errors = []
for t in range(k, len(y)):
beta = get_betas(x[:t], y[:t])
error = y[t] - x[t] @ beta
errors.append(error)
return np.mean(errors), np.std(errors)
x = np.random.normal(size = 1000)
y = 10 * x + np.random.normal(scale = 1e-2, size = len(x))
x = x.reshape(-1, 1)
a, b = calc_oos_error(x, y, 10)
I would like to vectorize out the loop in calc_oos_error. However, when I place the array into the vectorization function an error is thrown.
One option would be to use a global reference to x and y.
def calc(t):
beta = get_betas(x[:t], y[:t])
error = y[t] - x[t] @ beta
return error
calc = np.vectorize(calc)
def calc_oos_error_vec(x, y, k):
errors = calc(np.arange(k, len(y)))
return np.mean(errors), np.std(errors)
However, this strikes me as confusing and just asking for errors.