
#include "context.h"

#ifdef CREATE_LOG
  #define E_S 2
  #define COUNTERS { PAPI_L3_TCM, PAPI_L3_TCA }
#endif

meta_thread::meta_thread(int id, int node, local_task_scheduler *loc)
    : id(id), node(node), loc_scheduler(loc), work_count(0)
{  }

void pin_to_core(size_t core)
{
    cpu_set_t cpuset;
    CPU_ZERO(&cpuset);
    CPU_SET(core, &cpuset);
    pthread_setaffinity_np(pthread_self(), sizeof(cpu_set_t), &cpuset);
}

void get_kxy(local_task& t, job_log& l)
{
    switch (t.meta->type)
    {
    case PANEL:
        l.x = t.meta->x;
        l.k = t.meta->k;
        // alternative
        l.y      = t.meta->y;
        l.size_y = t.meta->meta_y;
        /*{
            int q = t.meta->meta_y / t.meta->nlocal;
            int r = t.meta->meta_y % t.meta->nlocal;
            if (t.id < r)
            {
                q++;
                l.y      = t.id * q + t.meta->k;
                l.size_y = q;
            }
            else
            {
                l.y      = r*(q+1) + (t.id-r)*q + t.meta->k;
                l.size_y = q;
                //last_tile = min(last_tile, A.mt);
            }
        }*/
        break;

    case B:
    case SCHUR_COMPLEMENT:
        l.x = t.meta->x + t.id / t.meta->meta_y;
        l.y = t.meta->y + t.id % t.meta->meta_y;
        l.k = t.meta->k;
        l.size_y = 1;
        break;

    case BEHIND_PANEL_UPDATE:
        l.x = t.meta->x + t.id;
        l.y = t.meta->k;
        l.k = t.meta->k;
        l.size_y = t.meta->meta_y;
        break;
    case RANDOMIZE:
        l.x = t.meta->x + t.id / t.meta->meta_y;
        l.y = t.meta->y + t.id % t.meta->meta_y;
        l.k = -1;
        l.size_y = 1;
        break;

    default:
        std::cout << "error: unknown meta task type (while logging)" << std::endl;
    }
}

void base_work(meta_thread& thread)
{
    local_task_scheduler& loc = *(thread.loc_scheduler);
    local_task t;
    #ifdef CREATE_LOG
    long long counter[E_S];
    #endif

    while (! meta_context::getContext().finalize)
    {
        if (thread.id == 0)
        {
            if (meta_context::getContext().scheduler.all_tasks_finished) break;
        }
        else if (meta_context::getContext().scheduler.all_tasks_finished) continue;

        if (!loc.getTask(&t)) continue;

        #ifdef CREATE_LOG
            job_log j_log;
            get_kxy(t, j_log);
            j_log.thread_id = thread.id;
            j_log.pri       = t.meta->pri;
            j_log.task_id   = t.meta->type;
            j_log.start     = std::chrono::high_resolution_clock::now();
            PAPI_read_counters(counter, E_S);
        #endif

        thread.work_count++;
        bool job_finished = t.work();

        #ifdef CREATE_LOG
            PAPI_read_counters(counter, E_S);
            j_log.end   = std::chrono::high_resolution_clock::now();
            j_log.cache_m = counter[0];
            j_log.cache_a = counter[1];
            thread.log.push_back(j_log);
        #endif

        if (!job_finished) continue;
        meta_context::getContext().scheduler.finishMetaTask(t.meta);
    }

}

void* work(void* arg)
{
    meta_thread& thread = *((meta_thread*)arg) ;
    pin_to_core(thread.id);

    #ifdef CREATE_LOG
        PAPI_register_thread();
        int events[E_S] = COUNTERS;
        long long counter[E_S];
        PAPI_start_counters(events, E_S);
    #endif

    base_work(thread);

    #ifdef CREATE_LOG
        PAPI_stop_counters(counter, E_S);
        PAPI_unregister_thread();
    #endif

    return nullptr;
}

meta_context::meta_context() : finalize(false)
{  }

// alternatively read nthreads and nnumanodes with numa_num_task_cpus ...
void meta_context::initialize(int nthread, int node)
{
    #ifdef CREATE_LOG
        pin_to_core(0);
        int retval = PAPI_library_init(PAPI_VER_CURRENT);
        if (retval != PAPI_VER_CURRENT)
        {
            std::cout << "error: PAPI initialization error!" << std::endl;
            exit(retval);
        }
        retval = PAPI_thread_init(pthread_self);
        if (retval != PAPI_OK)
        {
            std::cout << "error: PAPI thread init failed" << std::endl;
        }

        // initialize counters on thread 0
        //PAPI_register_thread();
        int events[E_S] = COUNTERS;
        PAPI_start_counters(events, E_S);
    #endif

    for (int i = 0; i < node; i++)
    {
        node_scheduler.push_back(local_task_scheduler(i, &scheduler));
        scheduler.ready_queues.push_back(priority_queue());
    }

    for (int i = 0; i < nthread; ++i)
    {
        int n = numa_node_of_cpu(i);
        threads.push_back(meta_thread(i,n,&(node_scheduler.at(n))));
    }


    // the main thread will act as thread 0 so no pthread_create necessary
    for (int i = 1; i < nthread; ++i)
    {
        meta_thread& t = threads.at(i);
        node_scheduler.at(t.node).size++;
        pthread_create( &(t.thread), 0, work, (void*) &(t) );
    }
}

void meta_context::initialize(std::vector<int> cores, int node)
{
    if (cores[0] != 0) std::cout << "error: the first thread must be on core 0" << std::endl;
    #ifdef CREATE_LOG
        pin_to_core(0);
        int retval = PAPI_library_init(PAPI_VER_CURRENT);
        if (retval != PAPI_VER_CURRENT)
        {
            std::cout << "error: PAPI initialization error!" << std::endl;
            exit(retval);
        }
        retval = PAPI_thread_init(pthread_self);
        if (retval != PAPI_OK)
        {
            std::cout << "error: PAPI thread init failed" << std::endl;
        }

        // initialize counters on thread 0
        //PAPI_register_thread();
        int events[E_S] = COUNTERS;
        PAPI_start_counters(events, E_S);
    #endif

    for (int i = 0; i < node; i++)
    {
        node_scheduler.push_back(local_task_scheduler(i, &scheduler));
        scheduler.ready_queues.push_back(priority_queue());
    }

    for (int i : cores)
    {
        int n = numa_node_of_cpu(i);
        threads.push_back(meta_thread(i,n,&(node_scheduler.at(n))));
    }

    // the main thread will act as thread 0 so no pthread_create necessary
    for (int i = 1; i < threads.size(); ++i)
    {
        meta_thread& t = threads.at(i);
        node_scheduler.at(t.node).size++;
        pthread_create( &(t.thread), 0, work, (void*) &(t) );
    }
}

// used by the main thread to work
void meta_context::work_0()
{

    node_scheduler.at(0).size++;
    base_work(threads.at(0));
    node_scheduler.at(0).size--;
}

void meta_context::cleanup()
{
    for (auto& t : threads)
    {
        t.work_count = 0;
        t.log.clear();
    }
    if (!scheduler.task_hash.empty()) std::cout << "error: still tasks in hash" << std::endl;
    scheduler.task_hash.clear();
    scheduler.newest_task = -1;
}

void meta_context::shut_down()
{
    #ifdef CREATE_LOG
        long long counter[E_S];
        PAPI_stop_counters(counter, E_S);
        PAPI_unregister_thread();
    #endif

    finalize = true;

    for (int i = 1; i < threads.size(); ++i)
    {
        pthread_join( threads.at(i).thread, NULL );
    }
}
