import numpy as np

from heapq import merge
from math import ceil
from multiprocessing import Process, Queue
from os import cpu_count

def mergesort_aux(A, start, end):
    if np.abs(start - end) <= 1:
        return list(A[start:end])
    mid = (start + end) // 2
    low = mergesort_aux(A, start, mid)
    hi  = mergesort_aux(A, mid, end)
    return list(merge(low, hi))


def mergesort(A, start, end, queue):
    queue.put(mergesort_aux(A, start, end))


def parallel_mergesort(A, nproc):
    queue = Queue()
    nrows = len(A)
    # block size
    bsize = ceil(nrows / nproc)
    # init array of processes
    processes = [None] * nproc
    for i in range(nproc):
        start, end = bsize * i, min(bsize * (i + 1), nrows)
        p = Process(target=mergesort, args=(A, start, end, queue))
        p.start()
        processes[i] = p

    # Recall: need to call `queue.get()` before we join processes
    sorted_subarrays = tuple(queue.get() for i in range(nproc))

    for p in processes:
        p.join()

    # finally, merge into a big matrix
    return list(merge(*sorted_subarrays))


if __name__ == "__main__":
    A = np.random.randn(10000)
    A_single_sort = sorted(A)
    A_multip_sort = parallel_mergesort(A, cpu_count())

    print("Result was the same?", np.allclose(A_single_sort, A_multip_sort))
