import numpy as np
import os
import multiprocessing

from math import ceil

def worker(row_submatrix, x, start, end, y):
    y[start:end] = row_submatrix @ x

if __name__ == "__main__":
    A = np.random.randn(1000, 500)
    x = np.random.randn(500)
    # create as many processes as CPU cores available
    nproc = os.cpu_count()
    # number of rows of A
    nrows = np.shape(A)[0]
    # 1. Figure out how many rows and which rows of A to give each process
    # 2. Create a shared array y for storing the result
    # 3. Invoke the processes with the appropriate arguments
    y = multiprocessing.Array('d', nrows)
    bsize = int(ceil(nrows / nproc))
    processes = []
    for i in range(nproc):
        start, end = bsize * i, min(bsize * (i + 1), nrows)
        p = multiprocessing.Process(target=worker, args=(A[start:end], x, start, end, y))
        p.start()
        processes.append(p)

    for p in processes:
        p.join()

    print("Are the results the same? ", np.allclose(np.array(y), A @ x))
