/* Copyright 2020 Cumulus Networks, Inc.  All rights reserved. */
#include <stdio.h>
#include <stdlib.h>
#include <sys/file.h>
#include <dirent.h>
#include <sys/syscall.h>
#include <utime.h>
#include <string.h>
#include <libgen.h>
#include <execinfo.h>
#include "poll.h"
#include <stdio.h>
#include <stdlib.h>
#include <unistd.h>
#include <sys/types.h>
#include <sys/stat.h>
#include <systemd/sd-daemon.h>
#include <string.h>
#include "log.h"
#include <errno.h>
#include <stdbool.h>
#include <pthread.h>
#include <zmq.h>
#include <sys/signal.h>
#include <sys/time.h>
#include <semaphore.h>
#include "cs_mgr.h"
#include "cs_mgr_intf.h"
#include "cs_mgr_cli.h"

#define MAX_STATE_LEN 64

static FILE *pid_fp;

Mode  startup_operative_mode, operative_mode;
State required_state;

const char *CSMGR_PIDFILE = "/var/run/csmgr.pid";

bool comm_thread_active = false;
pthread_t comm_thread = 0;
pthread_t monitor_thread = 0;
void *pub_context = NULL;
void *pub_socket = NULL;
void *comm_context = NULL;
void *comm_socket = NULL;
int keepalive_interval = DEFAULT_KEEPALIVE_INTERVAL;
int keepalive_seq_no = 0;
bool broadcast_l3_up = false;
registered_modules registered_mods[MAX_MODULES];

Module mod_layering_order[] = { FRR, BGP, OSPF, CLAG, STP, SWITCHD, SDK };
Module mod_l3_list[] = { FRR, BGP, OSPF };   /* network layer modules from the registered list. */

int debug_csmgrd = 0;
bool zmq_socket_restart_on_error = true;

/*
 * csmgr_is_mod_l3
 *
 * Checks if the mod is in the mod_l3_list i.e. it is one of the network protocols.
 */

bool
csmgr_is_mod_l3 (Module mod) {
    int i;

    for (i = 0; i < sizeof(mod_l3_list)/sizeof(mod_l3_list[0]); i++) {
      if (mod == mod_l3_list[i]) return(true);
    }
    
    return(false);
}

/*
 * csmgr_num_mod_l3_up
 *
 * Number of L3 modules that are up.
 */

int
csmgr_num_mod_l3_up (void) {
    int mod, total_l3_mods = sizeof(mod_l3_list)/sizeof(mod_l3_list[0]), num_mods_up = 0;

    for (mod = 0; mod < total_l3_mods; mod++) {
        if (registered_mods[mod_l3_list[mod]].mod_status.mode.state == UP) num_mods_up++;
    }
    
    return(num_mods_up);
}

/*
 * csmgr_is_mod_registered
 *
 * Returns if a module has registered.
 */

bool
csmgr_is_mod_registered (Module mod) {
    return(mod == registered_mods[mod].mod_status.mode.mod);
}

/*
 * csmgr_get_num_active_modules
 *
 * Rturn number of registered modules.
 */

int
csmgr_get_num_active_modules (void) {
    int mod, num_mods = 0;

    /* 0 is CS_MGR, 1, is ALL */

    for (mod = 2; mod <  sizeof(registered_mods)/sizeof(registered_mods[0]); mod++) {
        if (csmgr_is_mod_registered(mod)) {
            num_mods++;
        }
    }

    return(num_mods);
}

int
csmgr_num_l3_mod_present (void) {
    int i;
    int num_l3_mods = 0;

    for (i = 0; i < sizeof(mod_l3_list)/sizeof(mod_l3_list[0]); i++) {
      if (csmgr_is_mod_registered(mod_l3_list[i])) num_l3_mods++;
    }

    return(num_l3_mods);
}

bool
csmgr_set_mod_state (Module module, Mode mode, State state) {
    int mod;
    
    if (module >= MAX_MODULES) {
        ERRLOG("Invalid Module 0x%x", module);
	return(false);
    }

    if (module != ALL_MODULES) {     /* specific module. */
        if (!csmgr_is_mod_registered(module)) {
	  ERRLOG("Module 0x%x not registered", module);
	  return(false);
        }
	registered_mods[module].mod_status.mode.mode = mode;
	registered_mods[module].mod_status.mode.state = state;
	return(true);
    }

    for (mod = 2; mod < MAX_MODULES; mod++) {
      if (!csmgr_is_mod_registered(mod)) continue;
      registered_mods[mod].mod_status.mode.mode = mode;
      registered_mods[mod].mod_status.mode.state = state;	
    }

    return(true);
}

bool
csmgr_publish_nl_status (Module mod, Mode mode, State state, Failure_Reason err) {
    char buf[MAX_MSG_LEN];
    msg_pkg *out = (msg_pkg *)buf;
    msg *m = (msg *)out->entry;
    module_status *mod_status = (module_status *)m->data;
    
    m->type = NETWORK_LAYER_INFO;
    m->len = sizeof(msg) + sizeof(module_status);
    
    mod_status->mode.mod = mod;
    mod_status->mode.mode = mode;
    mod_status->mode.state = state;
    mod_status->failure_reason = err;

    out->total_len =  sizeof(msg_pkg) + m->len;
    
    return(csmgr_pub_msg(out->total_len, out));
}

bool
csmgr_broadcast_nl_status (void) {
    int mod;
    int num_l3_mods = csmgr_num_l3_mod_present();
    int max_l3_mods;
    
    /* if there are still no L3 mods -let others move ahead. */

    if (!num_l3_mods) {
        return(csmgr_publish_nl_status(ALL_MODULES, operative_mode, SUCCESS, NO_ERROR));
    }

    max_l3_mods = sizeof(mod_l3_list)/sizeof(mod_l3_list[0]);
    
    for (mod = 0; mod < max_l3_mods; mod++) {
        if (!csmgr_is_mod_registered(mod_l3_list[mod])) continue;
	(void)csmgr_publish_nl_status(mod_l3_list[mod], operative_mode, SUCCESS, NO_ERROR);
    }

    return(true);
}

/*
 * csmgr_admin_modules
 *
 * Down process:
 *
 * The modules come down in 2 stages in top down order as specified in mod_layering_order[]. For
 * each modules, there is a broodcast "GOING_DOWN" and after acknowledgement from all the modules (or timeout),
 * the DOWN event is broadcast. 
 *
 * The module in question will be available to the other modules during the GOING_DOWN processing and 
 * may perform some or all of  its own pending actions or finish its own action during the subsequent 
 * "DOWN" event for itself.
 *
 * Effectively, all the modules will decouple its dependencies from the target module in question. For all 
 * modules option (pesently the only one), the down will be processed in the top down order as specified in
 * mod_layering_order[].
 *
 * The ordering is critical in the down processing because a module may need the services of other
 * modules to complete their actions.
 *
 * The down event will not be acknowledged. Normally this will only be processed by the module in
 * question. There should be no pending dependencies at this stage. If modules need to maintain
 * mutual dependencies, they could but it will be outside the scope of this orchestration.
 *
 * For maintenance modes, the modules are expected to stay operative as defined by its own requirements.
 * For some modules (e.g. SDK, switchD), it might be a no-op - i.e. it may be fully operational in
 * maintenance mode.
 *
 * For most modes (except maintenance), culmination of the down (of all modules) will result in restart of 
 * the system/process. The individual upgrade / restart will process the specific module only.
 *
 * Up Process:
 *
 * In the boot process, the ordering is enforced by systemd, so the LOAD/INIT messages are adequate. Coming out of
 * maintenance mode, the same process can be followed by the clients for simplicity (since the modules are
 * expected to be already functional in these cases).The modules must activate their dependencies only when
 * the module up for the dependent module is received. The bottom up ordering (opposite of Down process)
 * should not be required since they have already been active at some point earlier (revisit if required).
 *
 * Note: The API allows a specific module to be targetted. However, in the present incarnation, only the 
 * "ALL_MODULES" option is explicitly supported. This will be enforced by the cli.
 */

int num_go_down_response_required = 0;  /* number of modules that need to respond to the GOING_DOWN pub message.*/
int num_go_down_response_recv     = 0;  /* number of modules that have responded to the GOING_DOWN pub message. */
int mod_id_in_process = -1;

int module_response_timeout = DEFAULT_MODULE_RESPONSE_TIMEOUT;

pthread_mutex_t lock = PTHREAD_MUTEX_INITIALIZER;
pthread_cond_t  cond = PTHREAD_COND_INITIALIZER;

bool
csmgr_admin_modules (Mode reason, Module target_mod, bool up) {
    bool status;
    char buf[MAX_MSG_LEN];
    msg_pkg *out = (msg_pkg *)buf;
    msg *m = (msg *)out->entry;
    module_mode *mod_mode = (module_mode *)m->data;
    struct timespec time_to_wait = {0, 0};

    num_go_down_response_required = csmgr_get_num_active_modules();  /* all registered modules. */

    operative_mode = reason;
    
    if (!num_go_down_response_required) {
        INFOLOG("no registered modules\n");
        return(true);
    }

    m->len = sizeof(msg) + sizeof(module_mode);
    out->total_len = sizeof(msg_pkg) + m->len;

    /* for up broadcast for everyone. There should be no sequencing needed (revisit if required). */

    mod_mode->mode = reason;

    if (up == true) {
        csmgr_set_mod_state(ALL_MODULES, reason, COME_UP);
        m->type = COME_UP;
        mod_mode->mod = target_mod;
        mod_mode->state = COME_UP;
        status = csmgr_pub_msg(out->total_len, out);
	/* if in maintenance mode, go back to startup mode. */
	if (reason == MAINTENANCE) {
	    operative_mode = startup_operative_mode;
	    required_state = UP;
	}
	if (!csmgr_num_l3_mod_present()) {
	    status = csmgr_publish_nl_status(ALL_MODULES, operative_mode, SUCCESS, NO_ERROR);
	}
        return(status);
    }
    
    /*
     * Bringing down the stack is top down (as per mod_layering_order[]).
     *
     * Timeout is the aggreagte for all the modules to respond. If timeout occurs, we just reset 
     * the "responded" count to MODCOUNT_INFINITY to account for late arrivals. This while not fool proof, 
     * will avoid basic counting errors after a timeout. It is always reset when we start a new transaction.
     * A premature wakeup can occur if response for a previos timeout comes into the next transaction timeframe.
     * The timeouts are sufficiently large that such time outs will rarely occur and will not cause any serious
     * side effects.
     *
     * It is either a single module or all the modules (presently only ALL_MODULES is officially supported). 
     */

    required_state = DOWN;
    time_to_wait.tv_sec = time(NULL) + module_response_timeout;
    num_go_down_response_recv = 0;

    int ret = 0, ind;
    int num_mods = sizeof(mod_layering_order) / sizeof(mod_layering_order[0]);

    for (ind = 0; ind < num_mods; ind++) {
        if ((target_mod != ALL_MODULES) && (target_mod != mod_layering_order[ind])) continue;

        if (!csmgr_is_mod_registered(mod_layering_order[ind])) {
            continue;
        }

        INFOLOG("Send going down module %s\n", mod_id_to_str(mod_layering_order[ind]));

	csmgr_set_mod_state(mod_layering_order[ind], reason, GO_DOWN);
	
        num_go_down_response_recv = 0;
	mod_id_in_process = mod_layering_order[ind];

        m->type = GO_DOWN;
        mod_mode->mod = mod_layering_order[ind];
        mod_mode->state = GO_DOWN;
        status = csmgr_pub_msg(out->total_len, out);

        ret = pthread_mutex_init(&lock, NULL);
        if (ret != 0) { 
            ERRLOG("mutex init fail %d %s\n", ret, strerror(errno));
        }
        ret = pthread_cond_init(&cond, NULL);
        if (ret != 0) {
            ERRLOG("cond init fail %d %s\n", ret, strerror(errno));
        }
        pthread_mutex_lock(&lock);

        /* wait till everyone responds or timeout occurs. */

        ret = pthread_cond_timedwait(&cond, &lock, &time_to_wait);
        if (ret != 0) {
	    csmgr_set_mod_state(mod_layering_order[ind], reason, DOWN);  /* mark it as down anyway. */
            ERRLOG("%s: cond timeout error (%d) Expected %d responses, received %d (%s)\n",
		   mod_id_to_str(mod_layering_order[ind]), ret, num_go_down_response_required,
		   num_go_down_response_recv, strerror(errno));
            if (ret == ETIMEDOUT) {
                num_go_down_response_recv = MODCOUNT_INFINITY;   
            }
        }

        pthread_mutex_unlock(&lock);

        ret = pthread_cond_destroy(&cond);
        if (ret != 0) {
            ERRLOG("cond destroy fail %d %s\n", ret, strerror(errno));
        }
        ret = pthread_mutex_destroy(&lock);
        if (ret != 0) {
            ERRLOG("mutex destroy fail %d %s\n", ret, strerror(errno));
        }

        INFOLOG("Send down module 0x%x %s\n", mod_layering_order[ind], mod_id_to_str(mod_layering_order[ind]));
        m->type = DOWN;
        mod_mode->state = DOWN;
        status = csmgr_pub_msg(out->total_len, out);

        if (target_mod != ALL_MODULES) break;     /* only one or all. */
    }

    return(true);
}


bool
csmgr_pub_msg (int len, void *m) {
    int i;

    i = zmq_send(pub_socket, m, len, ZMQ_DONTWAIT);

    if (i == len) {
        return(true);
    }

    ERRLOG("failed publish len %d (send %d) %s\n", len, i, zmq_strerror(errno));
    
    if (zmq_socket_restart_on_error) {
        ERRLOG("restarting publish channel");
        csmgr_create_pub_channel();
    }

    return(false);
}

/*
 * csmgr_send_nack_response
 *
 * Stub NACK to complete the REQ-RESP cycle for REQ errors.
 */

bool
csmgr_send_nack_response (void *comm_socket, msg *req, int *resp_len, int buf_len, void *buf) {
    msg_pkg *out = (msg_pkg *)buf;
    msg *resp = (msg *)out->entry;
    int send;

    resp->type = NACK;
    resp->len = sizeof(*resp);

    out->total_len = sizeof(*out) + resp->len;

    send = zmq_send(comm_socket, out, out->total_len, 0);

    if (send != out->total_len) {
        ERRLOG("failed send nack %d bytes (send %d): %s\n", out->total_len, send, zmq_strerror(errno));
        return(false);
    }

    return(true);
}

/*
 * csmgr_process_keepalive_response
 *
 * Handle process acks.
 */

bool
csmgr_process_keepalive_response (void *comm_socket, msg *req, int *resp_len, int buf_len, void *buf) {
    keepalive_response *ka_r = (keepalive_response *)req->data;
    module_status *mod_status = &(ka_r->mod_status);
    Module mod_id = mod_status->mode.mod;

    /* store the last keepalive response. */

    registered_mods[mod_id].last_keepalive = ka_r->seq;

    if (debug_csmgrd) {
        DLOG("%s: keepalive from %s (0x%x), seq %d\n", __FUNCTION__, mod_id_to_str(mod_id), mod_id, ka_r->seq);
    }

    /* send back ack */

    msg_pkg *out = (msg_pkg *)buf;
    msg *resp = (msg *)out->entry;

    resp->type = ACK;
    resp->len = sizeof(*resp);   /* nothing else in this. */

    out->total_len = sizeof(*out) + resp->len;

    int send;

    send = zmq_send(comm_socket, out, out->total_len, 0);

    if (send != out->total_len) {
        ERRLOG("%d failed send len %d (%s)\n", send, out->total_len, zmq_strerror(errno));
        return(false);
    }

    return(true);
}

/*
 * csmgr_process_module_status_response
 *
 * Module discovery response processing.
 */

bool
csmgr_process_module_status_response (void *comm_socket, msg *req, int *resp_len, int buf_len, void *buf) {
    module_status_response *mod_status_resp = (module_status_response *)(req->data);
    Module mod = mod_status_resp->mode.mod;
    int send;
    
    if (mod >= MAX_MODULES) {
        ERRLOG("%s: invalid mod id (%d)\n", __FUNCTION__, mod);
	return(csmgr_send_nack_response(comm_socket, req, resp_len, buf_len, buf));
    }

    INFOLOG("module status response from %s (0x%x) Mode %s State %s\n", mod_id_to_str(mod), mod,
	    mode_to_str(mod_status_resp->mode.mode), mod_state_to_str(mod_status_resp->mode.state));
    
    msg_pkg *out = (msg_pkg *)buf;
    msg *resp = (msg *)out->entry;

    resp->type = ACK;
    resp->len = sizeof(*resp);

    out->total_len = sizeof(*out) + resp->len;

    send = zmq_send(comm_socket, out, out->total_len, 0);

    if (send != out->total_len) {
        ERRLOG("failed send ack %d bytes send %d, %s\n", out->total_len, send, zmq_strerror(errno));
        return(false);
    }

    /*
     * If the module is not registered at all => it was loaded before csmgrd - csmgrd has come after this perhaps 
     * through its own restart.
     */

    if (csmgr_is_mod_registered(mod)) {
        return(true);
    }
    
    registered_mods[mod].mod_status.mode.mod = mod;   
    registered_mods[mod].mod_status.mode.state = mod_status_resp->mode.state;
    registered_mods[mod].mod_status.mode.mode = mod_status_resp->mode.mode;

    /*
     * We will trust switchD to give us back the state (we have restarted here). Typically will be useful if csmgrd
     * restarts when in maintenance mode. We will pass on upgrade in maintenance and restart of csmgrd!
     */

    if (mod == SWITCHD) {
        operative_mode = mod_status_resp->mode.mode;
	required_state = mod_status_resp->mode.state;
	if (mod_status_resp->mode.mode != MAINTENANCE) {
	    startup_operative_mode = operative_mode;
	}
    }
    
    return(true);
}

/*
 * csmgr_process_go_down_response
 *
 * Handle process acks.
 */

bool
csmgr_process_go_down_response (void *comm_socket, msg *req, int *resp_len, int buf_len, void *buf) {
    module_down_status *mod_down_status = (module_down_status *)req->data;
    Module for_mod_id = mod_down_status->mod;
    Module from_mod_id = mod_down_status->mode.mod;
    State state;
    
    INFOLOG("GOING_DOWN ack from %s for %s (%d of %d)", mod_id_to_str(from_mod_id), mod_id_to_str(for_mod_id),
	    num_go_down_response_recv + 1, num_go_down_response_required);

    if (for_mod_id == from_mod_id) {
      state = (mod_down_status->failure_reason == NO_ERROR) ? DOWN : FAILURE;
      csmgr_set_mod_state(for_mod_id, operative_mode, state);
    }
    
    /* send back ack */

    msg_pkg *out = (msg_pkg *)buf;
    msg *resp = (msg *)out->entry;

    resp->type = ACK;
    resp->len = sizeof(*resp);   /* nothing else in this. */

    out->total_len = sizeof(*out) + resp->len;

    int send;

    send = zmq_send(comm_socket, out, out->total_len, 0);

    if (send != out->total_len) {
        ERRLOG("failed send len %d (send %d) %s\n", out->total_len, send, zmq_strerror(errno));
        return(false);
    }

    /* Ignore if a response comes after timeout has expired. */

    if (for_mod_id != mod_id_in_process) {
      INFOLOG("%s: delayed going_down response\n", mod_id_to_str(for_mod_id));
      return(true);
    }
    
    /* wake up the blocked thread if all have responded. */

    if (++num_go_down_response_recv < num_go_down_response_required) {
        return(true);
    }

    pthread_mutex_lock(&lock);
    send = pthread_cond_signal(&cond);
    pthread_mutex_unlock(&lock);

    if (send != 0) {
        ERRLOG("%s: *error %d in sending signal %s\n", __FUNCTION__, send, zmq_strerror(errno));
    }

    return(true);
}

/*
 * csmgr_process_load_complete
 *
 * First call by the module. Respond with the operation mode and list of operational modules.
 */

bool
csmgr_process_load_complete (void *comm_socket, msg *req, int *resp_len, int buf_len, void *buf) {
    module_status *mod_status = (module_status *)req->data;
    Module mod = mod_status->mode.mod;

    INFOLOG("%s: load complete from %s (0x%x)\n", __FUNCTION__, mod_id_to_str(mod), mod);

    if ((mod >= MAX_MODULES) || (!buf)) {
        ERRLOG("%s invalid mod id (%d) or null buffer\n", __FUNCTION__, mod);
	return(csmgr_send_nack_response(comm_socket, req, resp_len, buf_len, buf));
    }

    /* this should come at init or when module restarts or comes out of maintenance mode. */

    registered_mods[mod].mod_status.mode.mod = mod;
    registered_mods[mod].mod_status.mode.state = LOAD_COMPLETE;
    registered_mods[mod].mod_status.mode.mode = operative_mode;

    /* send the start up mode and the list of modules that are already up. */

    msg_pkg *out = (msg_pkg *)buf;
    msg *resp = (msg *)out->entry;
    module_mode *mod_mode = (module_mode *)resp->data;

    out->total_len = sizeof(msg_pkg) + sizeof(msg) + sizeof(module_mode); /* more added later. */
    resp->type = MODE_INFO;
    resp->len = sizeof(msg) + sizeof(module_mode);

    mod_mode->mod = mod;
    mod_mode->mode = operative_mode;
    mod_mode->state = required_state;

    /* add the list of modules that are already up. */

    resp = (msg *)((char *)resp + resp->len);

    resp->type = MOD_LIST;

    Module mods;
    int num_mods = 0;
    Module *mod_list = (Module *)resp->data;

    for (mods = 0; mods < MAX_MODULES; mods++) {
        if (mods == mod) continue;
        if (registered_mods[mods].mod_status.mode.state != UP) continue;
        mod_list[num_mods++] = registered_mods[mods].mod_status.mode.mod;
    }

    INFOLOG("%d modules already up\n", num_mods);

    resp->len = sizeof(msg) + num_mods * sizeof(Module);

    out->total_len += resp->len;

    *resp_len = out->total_len;

    int send;

    send = zmq_send(comm_socket, out, out->total_len, 0);

    if (send != out->total_len) {
        ERRLOG("%d failed send len %d (%s)\n", send, out->total_len, zmq_strerror(errno));
        return(false);
    }

    return(true);
}

/*
 * csmgr_process_init_complete
 *
 * Ack the init complete (module is ready for operation). Broadcast for the subscribers.
 */

bool
csmgr_process_init_complete (void *comm_socket, msg *req, int *resp_len, int buf_len, void *buf) {
    module_status *mod_status = (module_status *)(req->data);
    Module mod = ((module_status *)(req->data))->mode.mod;
    int send;
    bool status;

    if ((mod >= MAX_MODULES) || (!buf)) {
        ERRLOG("%s: invalid mod id (%d) or null buffer\n", __FUNCTION__, mod);
	return(csmgr_send_nack_response(comm_socket, req, resp_len, buf_len, buf));
    }

    /* the module has initialized (TBD: handle failure cases). */

    INFOLOG("%s: init complete from %s %s %s\n", __FUNCTION__, mod_id_to_str(mod),
	    mode_to_str(mod_status->mode.mode), mod_state_to_str(mod_status->mode.state));

    registered_mods[mod].mod_status.mode.mod = mod;   /* should normally already be there. */
    registered_mods[mod].mod_status.mode.state = required_state;

    msg_pkg *out = (msg_pkg *)buf;
    msg *resp = (msg *)out->entry;

    resp->type = UP;
    resp->len = sizeof(msg) + sizeof(module_status);

    mod_status = (module_status *)resp->data;
    mod_status->mode.mod = mod;
    mod_status->mode.mode = operative_mode;
    mod_status->mode.state = required_state;   /* we may want to distribute failures too. */
    mod_status->failure_reason = NO_ERROR;

    out->total_len = sizeof(msg_pkg) + resp->len;

    send = zmq_send(comm_socket, out, out->total_len, 0);

    if (send != out->total_len) {
        ERRLOG("failed send %d bytes (send %d) %s\n", out->total_len, send, zmq_strerror(errno));
        return(false);
    }

    /* broadcast module status. */

    status = csmgr_pub_msg(out->total_len, out);

    /* 
     * if it is a network layer module, broadcast NL up as well. The network layer hierarchy (IGP vs EGP) is
     * outside this scope. At bootup, L2 will start before L3. For L2 module restart, the L3 should usually be up
     * if present. If not (e.g coming out of maintenance mode), we have to wait for it. Otherwise, give some time
     * before broadcasting the message.
     */

    if (csmgr_is_mod_l3(mod)) {
        resp->type = NETWORK_LAYER_INFO;
	status = csmgr_pub_msg(out->total_len, out);
    } else if (csmgr_num_l3_mod_present() && csmgr_num_mod_l3_up()) {
        status = csmgr_broadcast_nl_status();
    } else if (broadcast_l3_up == false) {
        broadcast_l3_up = true;
	status = csmgr_arm_monitor_timer(1, CSMGR_MODULE_UP_POLL_INTERVAL);
    }
    
    return(status);
}

bool
csmgr_process_module_registration (void *comm_socket, msg *req, int *resp_len, int buf_len, void *buf) {
    module_connect *mod_connect = (module_connect *)req->data;
    Module mod_id = mod_connect->mod;

    if (mod_id >= MAX_MODULES) {
        ERRLOG("Invalid Module 0x%x", mod_id);
	return(csmgr_send_nack_response(comm_socket, req, resp_len, buf_len, buf));
    }

    DLOG("%s: %s from %s (0x%x)\n", __FUNCTION__, (req->type == MODULE_REGISTER) ? "Register" : "Unregister",
	 mod_id_to_str(mod_id), mod_id);

    /* must match up with csmgr_is_mod_registered(). */
    
    if (req->type == MODULE_REGISTER) {
        registered_mods[mod_id].mod_status.mode.mod = mod_id;   /* this means it is there. */
	registered_mods[mod_id].mod_status.mode.state = MODULE_REGISTER;
    } else {
        registered_mods[mod_id].mod_status.mode.mod = 0;
    }

    /* send back ack */

    msg_pkg *out = (msg_pkg *)buf;
    msg *resp = (msg *)out->entry;

    resp->type = ACK;
    resp->len = sizeof(*resp);   /* nothing else in this. */

    out->total_len = sizeof(*out) + resp->len;

    int send;

    send = zmq_send(comm_socket, out, out->total_len, 0);

    if (send != out->total_len) {
        ERRLOG("failed send len %d (send %d) %s\n", out->total_len, send, zmq_strerror(errno));
        return(false);
    }

    return(true);
}

bool
csmgr_process_message (void *comm_socket, int rcv_len, void *recv) {
    int len, i = 0;
    msg_pkg *in = (msg_pkg *)recv;
    msg *entry;
    bool status = true;
    char resp[MAX_MSG_LEN];
    int resp_len, off = 0;
    
    if (!recv) {
        return(false);
    }

    len = in->total_len - sizeof(msg_pkg);
    entry = (msg *)in->entry;

    /* There can be multiple messages - we have only one now. */

    while (i < len) {
        switch (entry->type) {
	     case MODULE_REGISTER:
	         status = csmgr_process_module_registration(comm_socket, entry, &resp_len, sizeof(resp) - off, resp + off);
	         break;
	    case MODULE_UNREGISTER:
	        status = csmgr_process_module_registration(comm_socket, entry, &resp_len, sizeof(resp) - off, resp + off);
	        break;
            case LOAD_COMPLETE:
                status = csmgr_process_load_complete(comm_socket, entry, &resp_len, sizeof(resp) - off, resp + off);
                break;
            case INIT_COMPLETE:
                status = csmgr_process_init_complete(comm_socket, entry, &resp_len, sizeof(resp) - off, resp + off);
                break;
            case KEEP_ALIVE_RESP:
                status = csmgr_process_keepalive_response(comm_socket, entry, &resp_len, sizeof(resp) - off, resp + off);
                break;
            case GO_DOWN:
                status = csmgr_process_go_down_response(comm_socket, entry, &resp_len, sizeof(resp) - off, resp + off);
                break;
	    case MODULE_STATUS_RESP:
	        status = csmgr_process_module_status_response(comm_socket, entry, &resp_len, sizeof(resp) - off, resp + off);
		break;
            default:
                INFOLOG("%s:%d unknown request 0x%x len\n", __FUNCTION__, entry->type, entry->len);
        }
        i += entry->len;
        off += resp_len;
        entry = (msg *)((char *)entry + entry->len);
    }

    return(status);
}

void *
csmgr_comm_thread (void *nothing) {
    char recv[MAX_MSG_LEN];
    void *status = NULL;
    int recv_len;

    comm_thread_active = true;

    /* listen and process messages (the thread is always active). */

    while (comm_thread_active) {
        if ((recv_len = zmq_recv(comm_socket, recv, sizeof(recv), 0)) == -1) {
	    ERRLOG("%s: comm_thread receive error errno %d %s\n", __FUNCTION__, errno, zmq_strerror(errno));
	    if (zmq_socket_restart_on_error) {
	        ERRLOG("restarting comm channel");
	        csmgr_create_comm_channel();   /* will close the present one before creating the new one. */
	    }
            continue;
        }

        csmgr_process_message(comm_socket, recv_len, recv);
    }

    INFOLOG("Exiting comm receive thread");

    status = (void *)1;   /* just non null. */

    return(status);
}

void
csmgr_destroy_pub_channel (void) {
    if (pub_socket) {
        zmq_close(pub_socket);
	pub_socket = NULL;
    }

    if (pub_context) {
        zmq_ctx_destroy(pub_context);
	pub_context = NULL;
    }
}

int
csmgr_create_pub_channel (void) {
    int i;

    if (pub_context || pub_socket) {
        csmgr_destroy_pub_channel();
    }

    pub_context = zmq_ctx_new();

    if (pub_context == NULL) {
        ERRLOG("failed pub context %s\n", zmq_strerror(errno));
        return(false);
    }

    pub_socket = zmq_socket(pub_context, ZMQ_PUB);

    if (pub_socket == NULL) {
        ERRLOG("failed pub zmq_socket %s\n", zmq_strerror(errno));
        goto csmgr_create_pub_channel_error;
    }

    i = zmq_bind(pub_socket, CSMGR_PUB_SUB_CHAN);

    if (i != 0) {
        ERRLOG("failed pub zmq_bind %s\n", zmq_strerror(errno));
        goto csmgr_create_pub_channel_error;
    }

    i = zmq_connect(pub_socket, CSMGR_PUB_SUB_CHAN);

    if (i != 0) {
        ERRLOG("failed pub zmq_connect %s\n", zmq_strerror(errno));
        goto csmgr_create_pub_channel_error;
    }

    return(true);

csmgr_create_pub_channel_error:

    csmgr_destroy_pub_channel();

    return(false);
}

void
csmgr_destroy_comm_channel (void) {
    if (comm_socket) {
        zmq_close(comm_socket);
    }

    if (comm_context) {
        zmq_ctx_destroy(comm_context);
    }
}

/*
 * csmgr_set_socket_channel_parms
 *
 * Sets the comm socket connection mode - Ignore req-resp mismatches (ZMQ_REQ_RELAXED), no wait on
 * close (ZMQ_LINGER).
 *
 * Note: Unlike client side, there is no timeout (wait for request is a separate thread).
 */

bool
csmgr_set_comm_socket_parms (void *comm_sock) {
    int val = 1;

    if (!comm_sock) {
        return(false);
    }

    zmq_setsockopt(comm_sock, ZMQ_REQ_CORRELATE, &val, sizeof(val));
    zmq_setsockopt(comm_sock, ZMQ_REQ_RELAXED, &val, sizeof(val));
    val = 0;
    zmq_setsockopt(comm_sock, ZMQ_LINGER, &val, sizeof(val));

    return(true);
}

/*
 * csmgr_create_comm_channel
 *
 * Create the resp end of the comm channel.
 */

int
csmgr_create_comm_channel (void) {

    if (comm_context || comm_socket) {
        csmgr_destroy_comm_channel();
    }

    comm_context = zmq_ctx_new ();

    if (comm_context == NULL) {
        ERRLOG("failed comm context %s\n",  zmq_strerror(errno));
        return(false);
    }

    comm_socket = zmq_socket(comm_context, ZMQ_REP);

    if (comm_socket == NULL) {
        ERRLOG("failed comm zmq_socket %s\n", zmq_strerror(errno));
        goto csmgr_create_comm_channel_err;
    }

    csmgr_set_comm_socket_parms(comm_socket);

    if (zmq_bind(comm_socket, CSMGR_COMM_CHAN) != 0) {
        ERRLOG("failed comm bind %s\n", zmq_strerror(errno));
        goto csmgr_create_comm_channel_err;
    }

    return(true);

 csmgr_create_comm_channel_err:
    csmgr_destroy_comm_channel();

    return(false);
}

/*
 * csmgr_discover_modules
 *
 * For a csmgr restart (without other module change), it will attempt to rediscover the exisitng modules with a
 * GET_MODULE_STATUS on the pub channel. If no one replies, it is the first module to start (by design at boot).
 * If at least one module replies, csmgrd has been restarted for some reason.
 */

void
csmgr_discover_modules (void) {
    char req[MAX_MSG_LEN];
    msg_pkg *m = (msg_pkg *)req;
    msg *entry = m->entry;
    int send;
    
    entry->type = MODULE_STATUS_REQ;
    entry->len = sizeof(*entry);    /* no body. */

    m->total_len = sizeof(*m) + entry->len;

    send = zmq_send(pub_socket, req, m->total_len, ZMQ_DONTWAIT);

    if (send != m->total_len) {
      ERRLOG("failed to send discovery probe %s\n", zmq_strerror(errno));
    } 
}

struct itimerspec monitor_ts;
timer_t monitor_timer_id = 0;
pthread_mutex_t monitor_mutex = PTHREAD_MUTEX_INITIALIZER;
pthread_cond_t monitor_cond = PTHREAD_COND_INITIALIZER;

/*
 * csmgr_arm_monitor_timer
 *
 * Seeting up time for the periodic background tasks.
 *
 * Note: At init time, it may be called before the timer is provisioned and will be ignored. The events will be handled
 * in the monitor thread before starting the background timer.
 */

bool
csmgr_arm_monitor_timer (int start_delay, int interval) {
    if (monitor_timer_id == 0) {
        return(true);
    }
    
    monitor_ts.it_value.tv_sec = start_delay;
    monitor_ts.it_value.tv_nsec = 1;    /* non zero. */
    monitor_ts.it_interval.tv_sec = interval;
    monitor_ts.it_interval.tv_nsec = 0;

    if (timer_settime(monitor_timer_id, 0, &monitor_ts, 0) == -1) {
        ERRLOG("Could not set timer delay %d interval %d (%s)\n", start_delay, interval, strerror(errno));
	return(false);
    }
    return(true);
}

/*
 * csmgr_monitor_timer_thread
 *
 * Periodic background thread for keepalives. If we are here for the defered network_up broadcast (which reduced the
 * timer interval, reset the timer to the keepalive interval. One cycle will be affected but the accuracy is not that
 * critical (yet). We don't want a premature keepalive - presumably, the nl_status will need actions to be performed.
 */

void
csmgr_monitor_timer_thread (union sigval arg) {
    int send;
    char req[MAX_MSG_LEN];
    msg_pkg *m = (msg_pkg *)req;
    msg *entry = m->entry;
    keepalive_request *keepalive_req = (keepalive_request *)entry->data;

    /* 
     * If we are there for the delayed nl broadcast, do that and reset the keepalive timer. The flag was set
     * only if there were no active L3 at that time. But an L3 could have started after that. The broadcast will
     * go from the up for that L3.
     */

    if (broadcast_l3_up) {
      broadcast_l3_up = false;
      if (!csmgr_num_l3_mod_present()) {
	  (void)csmgr_broadcast_nl_status();
      }
      csmgr_arm_monitor_timer(1, keepalive_interval); /* will stagger the timing for next cycle. */
      return;
    }

    entry->len = sizeof(*entry) + sizeof(*keepalive_req);
    entry->type = KEEP_ALIVE_REQ;

    m->total_len = sizeof(*m) + entry->len;

    keepalive_req->seq = keepalive_seq_no++;

    send = zmq_send(pub_socket, req, m->total_len, ZMQ_DONTWAIT);
    if (send != m->total_len) {
        ERRLOG("failed to send keepalive seq %d %s\n", keepalive_seq_no - 1, zmq_strerror(errno));
    } else {
      if (debug_csmgrd) {
	  DLOG("send keep alive seq %d\n", keepalive_seq_no - 1);
      }
    }
}

/*
 * csmgr_create_timer
 *
 * Timer used to trigger actions in the monitor process. The times are in seconds (there is no need for hyper accuracy
 * for most of these events.)
 */

bool
csmgr_create_monitor_timer (int start_delay, int interval)
{
    struct sigevent se;

    /*
     * Set the sigevent structure to cause the signal to be
     * delivered by creating a new thread.
     */

    se.sigev_notify = SIGEV_THREAD;
    se.sigev_value.sival_ptr = &monitor_timer_id;
    se.sigev_notify_function = csmgr_monitor_timer_thread;
    se.sigev_notify_attributes = NULL;

    if (timer_create(CLOCK_REALTIME, &se, &monitor_timer_id) == -1) {
        ERRLOG("Could not create timer %s\n", strerror(errno));
	return(false);
    }

    return(csmgr_arm_monitor_timer(start_delay, interval));
}

/*
 * csmgr_wait_for_init_complete
 *
 * Delay the start of the background tasks, allow more modules to initialize (so we know if there are L3 modules or not).
 * Do a round of discovery if csmgrd has restarted (modules will not re-register with the restarted csmgrd).
 *
 */

void
csmgr_wait_for_init_complete (void) {
    int mods_up, i = 0;

    /* 
     * wait for L2 modules to come up or timeout. If we just finish L2, give L3 one more shot. (Equals should 
     * suffice - we are being liberal here,) If not, assume no L3 - basically other modules (clagd now)
     * can move on. However, if an L3 module is present, we have to let it finish (INIT_COMPLETE).
     * 
     * CSMgr may have restarted - try a self discovery.
     */

    mods_up = csmgr_get_num_active_modules();
    
    while ((mods_up < CSMGR_NUM_L2_MODULES) && (i++ < CSMGR_MODULE_UP_POLL)) {
        sleep(CSMGR_MODULE_UP_POLL_INTERVAL);
	mods_up = csmgr_get_num_active_modules();
	if (!mods_up) {
	    csmgr_discover_modules();
	}
    }

    if (mods_up <= CSMGR_NUM_L2_MODULES) {
        sleep(CSMGR_MODULE_UP_POLL_INTERVAL);
    }

    if (!csmgr_num_l3_mod_present()) {
        INFOLOG(" No L3 modules, (%d registered modules)", csmgr_get_num_active_modules())
        (void)csmgr_publish_nl_status(ALL_MODULES, operative_mode, SUCCESS, NO_ERROR);
    }

    /*
     * This is init time. Either no l3 modules are present (above) or L2 modules have come up before L3 modules =>
     * they received the NETWORK_LAYER_INFO when the L3 module came up.
     */

    broadcast_l3_up = false; 
}

int
csmgr_create_background_thread (void) {

    csmgr_wait_for_init_complete();

    if (!csmgr_create_monitor_timer(DEFAULT_KEEPALIVE_INIT_DELAY, keepalive_interval)) {
        ERRLOG("failed to create background timer thread: %s\n", strerror(errno));
	return(false);
    }

    return(true);
}

int
csmgr_start_channels (void) {

    /* open the pub channel */

    if (!csmgr_create_pub_channel()) {
        return(false);
    }

    /* open the REQ-RESP comm channel */

    if (!csmgr_create_comm_channel()) {
        return(false);
    }

    /* start the listener thread */

    if (pthread_create(&comm_thread, NULL, csmgr_comm_thread,  NULL)) {
        ERRLOG("error in comm pthread creation %s\n", strerror(errno));
        return(false);
    }

    return(true);
}

static void log_exit(void)
{
    INFOLOG("CS manager exiting.");
    sd_notify(0, "STOPPING=1"); /*  notify systemd that we are stopping */
}


int main(int argc, char *argv[])
{
    bool daemonize = false;
    int ch;
    int my_pid;
    char loglevel[16] = "INFO";

    while ((ch = getopt(argc, argv, "dhc:l:")) != -1) {
        switch(ch) {
            case 'd':
                daemonize = true;
                break;
            case 'l':
                strncpy(loglevel, optarg, sizeof(loglevel) - 1);
                break;
            default:
                exit(0);
        }
    }

    /*
     * Check to see if another csmgrd is running.
     */
    if ((pid_fp = fopen(CSMGR_PIDFILE, "a+")) == NULL) {
        ERRLOG("Couldn't open pid file %s\n", CSMGR_PIDFILE);
        exit (1);
    } else {
        /* Use file locking to ensure we're the only instance */
        /* XXX: Use lockf() if ported to other platforms */
        /* Using flock as it works across daemon() call */
        if (flock(fileno(pid_fp), LOCK_EX | LOCK_NB) < 0) {
            fscanf(pid_fp, "%d", &my_pid);
            ERRLOG("Another instance of csmgr ? (PID=%d, err=%s)\n",
                    my_pid, strerror(errno));
            exit(1);
        }
    }

    atexit(log_exit);

    if (daemonize) {
        int retval = daemon(0, 0);
        if (retval != 0) {
            ERRLOG("daemon call failed with rv %d", retval);
            return 1;
        }
    }
    /* The PID will now be right */
    ftruncate(fileno(pid_fp), 0);
    fprintf(pid_fp, "%d\n", getpid());
    fflush(pid_fp);

    startup_operative_mode = operative_mode = read_csmgr_state(ALL_MODULES);
    INFOLOG("CS manager starting in %s mode\n", mode_to_str(operative_mode));
    required_state = UP;

    if (!csmgr_start_channels()) {
        ERRLOG("failed to start channels %s %s\n", __FUNCTION__, strerror(errno));
        return(false);
    } 

    csmgr_start_cli();

    sd_notify(0, "READY=1"); /*  notify systemd that we are ready */

    /* periodic monitoring thread. */

    if (!csmgr_create_background_thread()) {
        return(false);
    }

    if (pthread_join(comm_thread, (void **)NULL)) {
        return(false);
    }

    /* clean up before exit. */

    csmgr_destroy_comm_channel();

    timer_delete(monitor_timer_id);

    csmgr_stop_cli();

    csmgr_destroy_pub_channel();

    return 0;
}
