#! /usr/bin/python
#
# Copyright (C) 2017, 2019, 2020 Cumulus Networks, Inc.
#
# bmcd --
#
#     Interact with the BMC or BMCs
#

import os
import os.path
import sys
import optparse
import syslog
import exceptions
import traceback
import signal
import time
import select
import re
import urllib2
import json
import subprocess
import pickle
import pprint
import string
import types
import errno
import threading
from cumulus.multicast import *
from cumulus.vhwmon import *
import cumulus.platforms
import cumulus.sensors


default_mc_group = "ff12::5345"
default_mc_port = 0x5346
default_poll_period = 5


daemon = None
opts = None
platform = None
vhwmon = None
sensors = None
board = None
bmc = None    # local BMC


def main():
    status = 0
    try:
        init()
        run()
    except RuntimeError as e:
        daemon.err(str(e))
        status = 1
    except KeyboardInterrupt:
        pass
    except Exception:
        (t, v, tb) = sys.exc_info()
        daemon.err("unhandled exeption: %s" %
                   "".join(traceback.format_exception(t, v, tb)))
        status = 1
    finally:
        try:
            os.unlink(cumulus.sensors.dynamic_sensors_file)
        except Exception:
            pass
    daemon.exit(status)

def init():
    global daemon
    daemon = Daemon()

    global platform
    platform = cumulus.platforms.probe()

    global opts
    opts = parse_args("/etc/cumulus/%s.conf" % daemon.name, None)

    daemon.start(opts.debug_level, opts.debug_throttle)

    global vhwmon
    vhwmon = VHWMon()

    global sensors
    sensors = Sensors(platform.use_dynamic_sensors)

    if platform.bmc:
        global bmc
        bmc = make_bmc(platform.bmc)

def parse_args(default_config, default_multicast_if):
    op = optparse.OptionParser(usage = "Usage: %s [options]" % daemon.name)
    if default_multicast_if:
        op.add_option("-i", "--mc-if", dest = "mc_if",
                      action = "store", default = default_multicast_if,
                      help = "multicast interface (default %s)" %
                             default_multicast_if)
        op.add_option("-g", "--mc-group", dest = "mc_group",
                      action = "store", default = default_mc_group,
                      help = "multicast group (default %s)" % default_mc_group)
        op.add_option("-p", "--mc-port", dest = "mc_port",
                      action = "store", default = default_mc_port, type = "int",
                      help = "port number (default %#x)" % default_mc_port)
    op.add_option("-r", "--poll-period", dest = "poll_period",
                  action = "store", default = default_poll_period,
                  type = "int",
                  help = "poll period in seconds (default %d)" %
                         default_poll_period)
    op.add_option("-d", "--debug-level", dest = "debug_level",
                  action = "store", default = 0, type = "int",
                  help = "set debug level")
    op.add_option("-t", "--debug-throttle", dest = "debug_throttle",
                  action = "store", default = daemon.debug_throttle,
                  type = "int",
                  help = "set debug throttle count (default %d)" %
                         daemon.debug_throttle)
    (opts, extra) = op.parse_args()
    if extra:
        raise RuntimeError("Extra command-line arguments: %s" %
                           " ".join(extra))
    return opts

def run():
    #
    # Set up event callbacks.
    # Proper operation depends on the order of the callbacks,
    # which the Poll module guarantees.
    #
    try:
        poll = Poll()
        p = opts.poll_period
        push_delay = 0
        if board:
            poll.register_timer("board.query", board.query, (), p, 0)
        if bmc:
            if bmc.start():
                # If bmc query is asynchronous, then we don't get
                # the result until at least a cycle later.
                push_delay += p
            poll.register_timer("bmc.query", bmc.query, (), p, 0)
        poll.register_timer("sensors.push", sensors.push, (), p, push_delay)
        poll.register_timer("daemon.timer", daemon.timer, (), p, 0)

        poll.loop()  # never returns
    finally:
        if bmc:
            bmc.stop()

def make_bmc(platform_bmc, name = None):
    bmc_types = {
        "openbmc": BMC_OpenBMC,
        "ipmibmc": BMC_IpmiBMC,
    }
    bmc_class = bmc_types.get(platform_bmc.type)
    if bmc_class is None:
        raise RuntimeError("Unknown BMC type \"%s\"" % type)
    return bmc_class(platform_bmc, name)

class BMC:
    def __init__(self, pbmc, name = None):
        self.name = name or pbmc.name
        self.ignores = pbmc.ignores
        self.renames = pbmc.renames

    def query(self):
        '''
        Query the BMC and populate the sensor database.
        '''

    def start(self):
        '''
        Start thread, for BMCs that require asychronous queries.
        Return True if a thread is started.
        '''
        return False

    def stop(self):
        '''
        Stop thread, for BMCs that require asychronous queries.
        '''
        pass

class BMC_OpenBMC(BMC):
    '''
    Handle BMC-connected sensors.
    '''

    def __init__(self, pbmc, name = None):
        BMC.__init__(self, pbmc, name)
	if pbmc.ipv6_addr:
            host = "[" + pbmc.ipv6_addr + "]"
        else:
            assert pbmc.ipv4_addr
            host = pbmc.ipv4_addr
        self.url = "http://%s:8080/api/sys/sensors-full" % host
        self.urlold = "http://%s:8080/api/sys/sensors" % host

	#
	# Asynchronous REST query thread
	#
	# We have to run the query in a thread because it can take
	# a very long time, much longer than our main loop period.
	# This is a typical sequence of events:
	#   query thread makes blocking http request
	#   main thread goes about its business
	#       which includes checking thread_result but it's None
	#   query thread gets the result and saves it in thread_result
	#   query thread wait on thread_cv until thread_result is None
	#       (see below for why it waits here)
	#   main thread (on next tick) sees thread_result is not None
	#       saves thread_result
	#       sets thread_result to None
	#       signals thread_cv
	#       (hopefully, query thread gets scheduled immediately)
	#   query thread wakes up, repeat (make http request...)
	#   main thread processes the result
	#   main thread goes on...
	#
	# The condition variable thread_cv protects thread_result.
	# In addition, the query thread waits on it when thread_result
	# is not None, and the main thread signals it whenever it
	# resets thread_result to None.  (This is like a reverse
	# producer-consumer model, in which the producer waits
	# for the consumer.)
	#
	# The thread has no other side effects.  It does not log
	# or raise exceptions.
	#
	# Compared with synchronous queries, there is an additional
	# delay of less than a tick, between finishing the query and
	# publishing the result.  When queries are slow, there are
	# sychronization schemes with more overlap and more frequent
	# queries.  However, they tend to have longer query-to-publish
	# latency, especially when queries are fast.  In a perfect
	# world, we would predict how long a query will take and
	# start it at just the right time.  (Or we can use a
	# sychronization mechanism that works with epoll(), or
	# we can lock around the global data structures so the thread
	# can update them directly.  There are many roads not taken.)
	#
	# The thread does not exit except when requested by the main
	# thread, by calling stop().  The main thread must call stop()
	# before it exits.  stop() can take some time to complete
	# because the query thread may be blocked in an http request.
	# urllib2.urlopen() has timeouts so should never hang.
	# The query thread has no other blocking operations.
        #
        # All functions that run in the query thread have the
        # thread_ prefix.
	#
        self.thread = None
	# these variables are protected by thread_cv
        self.thread_cv = threading.Condition()
        self.thread_quit = False
        self.thread_result = None

    def start(self):
	assert self.thread is None
	self.thread = threading.Thread(target = self.thread_main, args = (),
                                       name = self.name)
        self.thread.start()
        return True

    def stop(self):
        if self.thread is None:
            return
        with self.thread_cv:
            self.thread_quit = True
            self.thread_cv.notify()
        daemon.info("waiting for thread %s to exit" % self.thread.name)
        self.thread.join()
        self.thread = None

    def thread_main(self):
        '''
        Thread main function
        '''
        #
        # This is what we do:
        #   query the BMC (trying two different URLs)
        #   park the result or error message in a shared variable
        #   return if requested
        #   repeat
        #
        while True:
            if self.thread_wait():
                break

            data, error = self.thread_query_url(self.url)
            if data or error:
                self.thread_set_result(data, False, error)
                continue

            data, error = self.thread_query_url(self.urlold)
            if not data and not error:
                error = "BMC query failed"
            self.thread_set_result(data, True, error)

    def thread_wait(self):
        '''
        Wait for the main thread.  Return true iff main thread
        wants us to quit.
        '''
        #
        # The protocol is that self.thread_result acts like
        # a one-entry queue protected by self.thread_cv, and we
        # wait for the main thread to empty it before we can
        # fill it again.
        # The exit request is passed in self.thread_quit.
        #
        with self.thread_cv:
            while True:
                if self.thread_quit:
                    return True
                if self.thread_result is None:
                    return False
                self.thread_cv.wait()

    def thread_query_url(self, url):
        '''
        Query a URL and return
            (data, None) on success
            (None, error) on hard error (server not available)
            (None, None) on soft error (URL not found)
        '''

        #
        # One rule is we don't want the thread to die.
        # While the main thread can always restart the query
        # thread, it's better for error reporting to catch
        # exceptions and log them, and try to continue.
        # We do it here instead of at a higher level,
        # so the logging and recovery can be more specific.
        #

        try:
            u = urllib2.urlopen(url, timeout = 60)
        except urllib2.HTTPError as e:
            if e.code == 404:
                return (None, None)
            # reraise exception to be caught below
            raise e
        except IOError as e:
            # There are a number exceptions here that signal
            # normal errors (socket.timeout, urllib2.HTTPError,
            # etc.), but they are all subclasses of IOError.
            return (None, "BMC unreachable: %s" % str(e))
        except Exception as e:
            # We don't want to ever die in the thread, so really
            # catch everything.  urllib2 can and does throw odd
            # exceptions (like httplib.BadStatusLine when
            # the server just craps out).
            return (None,
                    "Unexpected error in urlopen(): %s: %s" %
                        (repr(e), str(e)))

        try:
            if u.info().gettype() != "application/json":
                return (None, None)
            return (u.read(), None)
        except Exception as e:
            # Again, while these two functions are not known to raise
            # exceptions, catch any and all so the thread doesn't die.
            return (None,
                    "Unexpected error in gettype() or read(): %s: %s" %
                        (repr(e), str(e)))
        finally:
            u.close()

    def thread_set_result(self, result, oldurl, error):
        '''
        Store a query result or an error message in self.thread_result.
        '''
        with self.thread_cv:
            assert self.thread_result is None
            self.thread_result = (result, oldurl, error)

    def query(self):
        '''
        Query BMC sensors and populate hwmon.
        '''
        #
        # Or more precisely, pick up the result from the query
        # thread and process it.  If there isn't a result (yet),
        # just return.  This function is in the main event loop
        # and shouldn't block, or in other words, what's the point
        # of using a thread if we're going to wait for it?
        #
        with self.thread_cv:
            result = self.thread_result
            if result:
                # tell the thread it can return the next result
                self.thread_result = None
                self.thread_cv.notify()
        if result is None:
            if daemon.debugging(1):
                daemon.info("query %s waiting" % self.thread.name)
            # prevent our sensors from going stale and being deleted
            sensors.refresh(self.name)
            return

        data, oldurl, error = result
        if error:
            daemon.err("%s" % error)
            return
        if daemon.debugging(1):
            daemon.info("query %s got %d bytes" %
                        (self.thread.name, len(data)))
        data = json.loads(data)
        #pprint.pprint(data)
        if oldurl:
            self._parse_sensors_old(data["Information"])
        else:
            self._parse_sensors(data["Information"])

    def _parse_sensors_old(self, list):
        #
        # The input is a list of entries that look like this:
        # {
        #    "name": <device-name>
        #    "Adapter": <adapter>
        #    <label>: <value> <unit>
        #    ...
        # }
        #
        for entry in list:
            device_name = str(entry["name"])  # squash unicode with str()
            adapter = str(entry["Adapter"])
            s = sensors.get_sensor(self.name, device_name, True, True)

	    # sort keys to make label-to-attribute mapping stable
	    keys = entry.keys()
	    keys.sort()
	    for k in keys:
		v = entry[k]
                if k == "name" or k == "Adapter":
                    continue
                label = str(k)   # squash unicode via str()
                if 'minipack' in platform.name:
                    type, value = self._parse_old_value_mp(k, v)
                else:
                    type, value = self._parse_old_value(v)
                if type is None:
                    daemon.err("name: '%s'  adapter: '%s'" % (device_name, adapter))
                    daemon.err(str(list))  
                    continue
                index = s.get_index(label, type)
                prefix = HWMon.attr_name_prefixes[type] + str(index)
                g = s.get_group(label, prefix, type)
                g.set(prefix + "_label", HWMon.LABEL, label)
                g.set(prefix + "_input", type, value)

            if s.size() == 0:
                # we errored out of all the groups
                sensors.del_sensor(self.name, device_name)
                continue
            s.update(True)

    _temp_re = re.compile("^\s*((-|\+|)\d+(\.\d*|)).+C\s*$")
    _fan_re = re.compile("^\s*(\d+) RPM$")
    _voltage_re = re.compile("^\s*((-|\+|)\d+(\.\d*|)) V\s*$")
    _current_re = re.compile("^\s*((-|\+|)\d+(\.\d*|)) A\s*$")
    _power_re = re.compile("^\s*((?:-|\+|)\d+(?:\.\d*|)) (mW|W)\s*$")

    def _parse_old_value(self, valstring):
        m = self._temp_re.match(valstring)
        if m:
            return (HWMon.TEMP, float(m.group(1)))
        m = self._fan_re.match(valstring)
        if m:
            return (HWMon.FAN, int(m.group(1)))
        m = self._voltage_re.match(valstring)
        if m:
            return (HWMon.VOLTAGE, float(m.group(1)))
        m = self._current_re.match(valstring)
        if m:
            return (HWMon.CURRENT, float(m.group(1)))
        m = self._power_re.match(valstring)
        if m:
	    # sensors -u returns power in watts, which is fine
	    # except we lose some precision
            v = float(m.group(1))
            if m.group(2) == "mW":
                v /= 1000
            return (HWMon.POWER, v)
        if valstring != "N/A":
            daemon.err("unparsible sensor value \"%s\"" % valstring)
        return (None, None)

    _temp_mp_re = re.compile("^\s*(\w+)TEMP\d*$")
    _fan_mp_re = re.compile("^\s*(\w+)SPEED\d*$")
    _voltage_mp_re = re.compile("^\s*(\w+)VOLT\d*$")
    _current_mp_re = re.compile("^\s*(\w+)CURR\d*$")
    _power_mp_re = re.compile("^\s*(\w+)POWER\d*$")
    _vmon_mp_re = re.compile("^\s*(\w+)VMON\d*$")
    _airflow_mp_re = re.compile("^\s*(\w+)AIRFLOW\d*$")
    _vcc_mp_re = re.compile("^\s*(\w+)VCC\w+\d*$")

    def _parse_old_value_mp(self, key, valstring):
        m = self._temp_mp_re.match(key)
        if m:
            return (HWMon.TEMP, float(valstring))
        m = self._fan_mp_re.match(key)
        if m:
            return (HWMon.FAN, float(valstring))
        m = self._airflow_mp_re.match(key)
        if m:
            return (HWMon.FAN, float(valstring))
        m = self._vcc_mp_re.match(key)
        if m:
            return (HWMon.POWER, float(valstring))
        m = self._voltage_mp_re.match(key)
        if m:
            return (HWMon.VOLTAGE, float(valstring))
        m = self._vmon_mp_re.match(key)
        if m:
            return (HWMon.VOLTAGE, float(valstring))
        m = self._current_mp_re.match(key)
        if m:
            return (HWMon.CURRENT, float(valstring))
        m = self._power_mp_re.match(key)
        if m:
            v = float(valstring)
            return (HWMon.POWER, v)
        if valstring != "N/A":
            daemon.err("unparsible sensor value \"%s\"" % valstring)
        return (None, None)

    def _parse_sensors(self, list):
        #
        # The input is a list of entries that look like this:
        # {
        #    "name": <device-name>
        #    "adapter": <adapter>
        #    <label>: {
        #       <prefix>_<suffix>: <value>
        #       ...
        #    }
        #    ...
        # }
        #
        for entry in list:
            device_name = str(entry["name"])  # squash unicode with str()
            adapter = str(entry["adapter"])
	    prefixes = {}  # duplicate detection
            s = sensors.get_sensor(self.name, device_name, True)

            for k, v in entry.iteritems():
                if k == "name" or k == "adapter":
                    continue
                label = str(k)   # squash unicode via str()

		if len(v) == 0:
                    daemon.err("empty sensor group \"%s\" in \"%s\"" %
                               (label, device_name))
                    continue

                aname, aprefix, atype = self._parse_attr_name(v.keys()[0])
                if aprefix is None:
                    continue
                if aprefix in prefixes:
                    daemon.err("duplicate attribute name prefix "
                               "\"%s\" in group \"%s\" "
                               "sensor \"%s\"" %
                               (aprefix, label, device_name))
                    continue
                prefixes[aprefix] = True
                g = s.get_group(label, aprefix, atype)

                for aname, avalue in v.iteritems():
                    aname, aprefix, atype = self._parse_attr_name(aname)
                    if aprefix is None:
                        continue
                    # verify prefix consistency
                    if aprefix != g.prefix:
                        daemon.err("sensor attribute name \"%s\" doesn't "
                                   "match group prefix \"%s\" in \"%s\"" %
                                   (aname, g.prefix, label))
                        continue
                    g.set(aname, atype, float(avalue))

		if len(g.entries) == 0:
                    # we errored out of all the items
                    s.del_group(label)
                    continue
                # add label attribute
                g.set(g.prefix + "_label", HWMon.LABEL, label)

            if s.size() == 0:
                # we errored out of all the groups
                sensors.del_sensor(self.name, device_name)
                continue
            s.update(True)

    _name_re = re.compile("^(([a-z]+)\d+)_\S+$")

    def _parse_attr_name(self, aname):
        aname = str(aname) # squash unicode
        m = self._name_re.match(aname)
        if not m:
            daemon.err("unparsable attribute name \"%s\"" % aname)
            return aname, None, None
        atype = m.group(2)
        # convert string type to internal type
        if atype not in HWMon.attr_name_prefixes:
            daemon.err("unrecognized sensor type \"%s\"" % aname)
            return aname, None, None
        atype = HWMon.attr_name_prefixes.index(atype)
        return aname, m.group(1), atype

class BMC_IpmiBMC(BMC):

    def query(self):
        for retries in range(0, 3):
            try:
                res = subprocess.check_output(['/usr/bin/ipmitool', 'sensor', 'list'])
                break
            except subprocess.CalledProcessError as e:
                daemon.err("Command {} failed with return code {}".format(e.cmd, e.returncode))
                # 3 exceptions raised, exit now.
                if retries == 2:
                    exit(1)
                time.sleep(0.5)

        ''' the 'ipmitool sensor list' command gives us the list of sensors and their info. eg:
            Temp_Ambient_0   | 32.000     | degrees C  | ok    | na        | na        | na        | 52.000    | 54.000    | 57.000
            Fan_SYS_0        | 1700.000   | RPM        | ok    | na        | 500.000   | 1000.000  | na        | na        | na
            PSU2_VOLTAGE_OUT | 12.180     | Volts      | ok    | na        | 10.800    | 11.400    | 12.600    | 13.560    | na

            We just grab the first two columns: name and current reading
        '''
        sensor_list = set()
        for line in res.split('\n'):
            columns = line.split('|')
            if len(columns) < 10:
                continue
            label = columns[0].strip()
            if label in self.ignores:
                continue
            label = self.renames.get(label, label)
            value = columns[1].strip()
            device_name = self._get_sensor_group(label)
            if not device_name:
                continue
            type = self._get_sensor_type(label)
            if not type:
                continue
            sensor = sensors.get_sensor(self.name, device_name, True, True)
            sensor_list.add(sensor)
            index = sensor.get_index(label, type)
            prefix = '%s%u' % (HWMon.attr_name_prefixes[type], index)
            g = sensor.get_group(label, prefix, type)
            try:
                ival = float(value)
            except:
                ival = 'N/A'
            g.set(prefix + '_label', HWMon.LABEL, label)
            g.set(prefix + '_input', type, ival)
        for sensor in sensor_list:
            sensor.update(True)

    def _get_sensor_group(self, sensor_name):
        name = sensor_name.lower()
        # 'psu' should always come after any psu* strings in
        # the group_id list below to prevent a name like psu1
        # falsely matching with 'psu'.
        for group_id in ['psu1', 'psu2', 'psul', 'psur', 'psu', 'temp', 'fan']:
            if name.find(group_id) >= 0:
                return group_id

    def _get_sensor_type(self, sensor_name):
        type_list = (('temp', HWMon.TEMP), ('fan', HWMon.FAN), ('rpm', HWMon.FAN),
                    ('volt', HWMon.VOLTAGE), ('current', HWMon.CURRENT),
                    ('power', HWMon.POWER))
        name = sensor_name.lower()
        for key, type in type_list:
            if name.find(key) >= 0:
                return type



class Board:
    '''
    Handle on-board sensors.
    '''

    def __init__(self, name):
        self.name = name

    _vhwmon_re = re.compile(".+-virtual-0$", re.MULTILINE)
    _labelline_re = re.compile("^(\S.*):$")

    def query(self):
        '''
        Query on-board sensors and populate hwmon.
        '''
        # would use close_fds = True but it's very slow
        # because os.sysconf("SC_OPEN_MAX") is 64k
        raw = subprocess.check_output(["/usr/bin/sensors", "-uA"])
        for section in raw.split("\n\n"):
            #
            # A section of sensors -uA output looks something like this:
            #
            #   coretemp-isa-0000
            #   Core 0:
            #     temp2_input: 40.000
            #     temp2_max: 110.000
            #     temp2_crit: 110.000
            #     temp2_crit_alarm: 0.000
            #   Core 1:
            #     temp3_input: 39.000
            #     temp3_max: 110.000
            #     temp3_crit: 110.000
            #     temp3_crit_alarm: 0.000
            #
            # First line is the device name, follwed by groups
            # of attributes.
            #
            # The first line of each group is the label for
            # the following set of attributes.  Each attribute
            # will become a sysfs attribute.  The label will also
            # be stored in sysfs as the <prefix>_label attribute.
            # For example, "Core 0" will be the value of "temp2_label".
            #

            if section == "":
                continue
            if Board._vhwmon_re.match(section):
                # ignore virtual nodes
                continue

            lines = section.split("\n")

            # first line is the device name
            device_name = lines[0]
            labels = {}    # for duplicate detection
            prefixes = {}  # for duplicate detection
            s = sensors.get_sensor(self.name, device_name, True)

            lineno = 1
            while lineno < len(lines):
                # first line should be the label
                line = lines[lineno]
                lineno += 1
                m = Board._labelline_re.match(line)
                if not m:
                    daemon.err("unparsable sensor line \"%s\"" % line)
                    continue
                label = m.group(1)

                try:
                    if label in labels:
                        self.perr("duplicate group label \"%s\" in \"%s\"" %
                                  (label, device_name))
                    labels[label] = True

                    # process first attribute line to set up the group

                    # if there is a next line, parse it
                    aname = None
                    if lineno < len(lines):
                        aname, aprefix, atype, avalue = \
                            self._parse_attr_line(lines[lineno])
                    if aname is None:
                        # didn't match, assume end of group
                        self.perr("empty sensor group \"%s\" in \"%s\"" %
                                  (label, device_name))
                    lineno += 1

                    # able to parse line, but is it good?

                    if atype is None:
                        # type is bad, error already reported
                        self.perr("")

                    if aprefix in prefixes:
                        self.perr("duplicate attribute name prefix \"%s\" "
                                  "in group \"%s\" sensor \"%s\"" %
                                  (aprefix, label, device_name))

                    # name must not clash with label
                    lname = aprefix + "_label"
                    if aname == lname:
                        self.perr("duplicate sensor label \"%s\" value \"%s\" "
                                  "in group \"%s\" sensor \"%s\"" %
                                  (aname, avalue, label, device_name))
                except self.ParseError as e:
                    msg = str(e)
                    if msg != "":
                        daemon.err("%s" % msg)
                    # skip to next group
                    while lineno < len(lines):
                        m = Board._labelline_re.match(lines[lineno])
                        if m:
                            break
                        lineno += 1
                    continue

                prefixes[aprefix] = True

                # get group based on first attribute
                g = s.get_group(label, aprefix, atype)
                g.set(lname, HWMon.LABEL, label)
                g.set(aname, atype, avalue)

                # for duplicate detection
                attr_names = {lname: True, aname: True}

                # After the first attribute, parsing the remaining ones
                # is fairly simple.
                while lineno < len(lines):
                    line = lines[lineno]
                    aname, aprefix, atype, avalue = self._parse_attr_line(line)
                    if aname is None:
                        # didn't match assume end of group
                        break
                    lineno += 1

                    # verify prefix consistency
                    if aprefix != g.prefix:
                        daemon.err("sensor attribute name \"%s\" doesn't "
                                   "match group prefix \"%s\" in \"%s\"" %
                                   (aname, g.prefix, label))
                        continue
                    # check for duplicate attribute name
                    if aname in attr_names:
                        daemon.err("duplicate attribute name \"%s\"" % aname)
                        continue
                    attr_names[aname] = True
                    # type shoud be good because prefix is good
                    assert(atype is not None)

                    g.set(aname, atype, avalue)

            if s.size() == 0:
                # we errored out of all the groups
                sensors.del_sensor(self.name, device_name)
                continue
            s.update(True)

    _attrline_re = re.compile("^\s+((([a-z]+)\d+)_[^:]+):\s*(.+)$")

    def _parse_attr_line(self, line):
        m = Board._attrline_re.match(line)
        if not m:
            return None, None, None, None

        #
        # Matched a line of the form " xxxnn_yyy: zzz"
        # The parts are
        #   aname    xxxnn_yyy
        #   aprefix  xxxnn_
        #   atype    xxx
        #   avalue   zzz
        #
        aname = m.group(1)
        aprefix = m.group(2)
        atype = m.group(3)
        avalue = float(m.group(4))

        # convert string type to internal type
        if atype not in HWMon.attr_name_prefixes:
            daemon.err("unrecognized sensor type \"%s\" in \"%s\"" %
                       (atype, line))
            return aname, aprefix, None, avalue
        atype = HWMon.attr_name_prefixes.index(atype)
        return aname, aprefix, atype, avalue

    class ParseError(RuntimeError):
        pass

    def perr(self, msg):
        raise self.ParseError(msg)


class Sensors:

    def __init__(self, support_platform_config):
        self.sensors = {}
        self.modified = False
        self.support_platform_config = support_platform_config
        try:
            # unlink it even (or especially) if we're not using it
            os.unlink(cumulus.sensors.dynamic_sensors_file)
        except OSError:
            pass

    def get_sensor(self, source, name, local, oldurl = False):
        key = (source, name)
        if key in self.sensors:
            s = self.sensors[key]
        else:
            s = SensorDevice(source, name, local)
            self.sensors[key] = s
        s.set_oldurl(oldurl)
        return s

    def del_sensor(self, source, name):
        key = (source, name)
        s = self.sensors[key]
        del self.sensors[key]
        if s.pushed:
            self.modified = True
        s.free()

    def push(self):
        to_delete = []
        for key, s in self.sensors.iteritems():
            # delete empties left after unupdated groups got deleted
            if s.size() == 0:
                to_delete.append(key)
                continue
            # age out unupdated entries
            if s.updated:
                s.updated = False
                s.age = 0
            else:
                s.age += 1
                if daemon.debugging(8):
                    daemon.info("age  sensor %s %d" % (s.fullname, s.age))
                if s.age > 2:
                    daemon.info("deleting stale sensor %s" % (s.fullname))
                    to_delete.append(key)
                    continue

        for key in to_delete:
            s = self.sensors[key]
            del self.sensors[key]
            if s.pushed:
                self.modified = True
            s.free()

        for s in self.sensors.itervalues():
            if s.modified:
                self.modified = True
            s.push()

        if self.modified:
            self.write_platform_config()
            self.modified = False

    def get_data(self, local_only):
        data = []
        for (source, name), s in self.sensors.iteritems():
            if not local_only or s.local:
                data.append((source, name, s.get_data()))
        return data

    def bulk_update(self, data, local):
        for source, name, sdata in data:
            s = self.get_sensor(source, name, local)
            s.bulk_update(sdata, local)

    def refresh(self, from_source = None):
        # XXX this can be slow
        for (source, name), s in self.sensors.iteritems():
            if from_source is None or source == from_source:
                s.refresh()

    def write_platform_config(self):
        # XXX This is currently an unused feature.
        # If we ever delete it, there is a lot of support code that
        # will also become unnecessary.
        if not self.support_platform_config:
            return
        if daemon.debugging(1):
            daemon.info("writing platform config file")

        filename = cumulus.sensors.dynamic_sensors_file
        dirname = os.path.dirname(filename)
        if not os.path.exists(dirname):
            os.makedirs(dirname, 0755)
        tempname = filename + ".tmp"

        with open(tempname, "w") as file:
            file.write("import cumulus.sensors\n\n")
            n = 1
            # try to list sensors in some consistent order
            keys = self.sensors.keys()
            keys.sort()
            for key in keys:
                s = self.sensors[key]
                n = s.write_platform_config(file, n)
            file.write("sensors = (\n")
            for i in range(1, n):
                file.write("    DynamicTempV%03d(),\n" % i)
            file.write(")\n")
        os.rename(tempname, filename)

        subprocess.call(["systemctl", "--no-block",
                         "restart", "smond.service"])


class SensorDevice:
    def __init__(self, source, name, local):
        self.source = source
        self.name = name
        self.fullname = source + ":" + name
        self.local = local
        self.groups = {}
        self.hwmon = None
        # modified means entries added or deleted, not values changed
        self.modified = False
        self.updated = False
        self.pushed = False
        self.age = 0
        self.oldurl = False
        if daemon.debugging(9):
            daemon.info("init sensor %s" % (self.fullname))

    def free(self):
        if daemon.debugging(9):
            daemon.info("free sensor %s" % (self.fullname))
        if self.hwmon:
            self.hwmon.free()

    def get_group(self, label, prefix, type):
        g = self.groups.get(label)
        if g:
            return g
        if daemon.debugging(9):
            daemon.info("add  sensor %s group \"%s\" %s %d" %
                        (self.fullname, label, prefix, type))
        g = SensorGroup(label, prefix, type)
        self.groups[label] = g
        self.modified = True
        return g

    def del_group(self, label):
        if daemon.debugging(9):
            daemon.info("del  sensor %s group \"%s\"" %
                        (self.fullname, label))
        del self.groups[label]
        self.modified = True

    def size(self):
        size = 0
        for g in self.groups.itervalues():
            size += len(g.entries)
        return size

    def update(self, local):
        self.local = local

        # process each group: update(), propagate mod flag, delete empty
        to_delete = []
        for label, g in self.groups.iteritems():
            g.update()
            if g.modified:
                self.modified = True
                g.modified = False
            if len(g.entries) == 0:
                to_delete.append(label)
        if len(to_delete) > 0:
            if daemon.debugging(9):
                daemon.info("del  sensor %s empty groups %s" %
                            (self.fullname, to_delete))
            for label in to_delete:
                del self.groups[label]
            self.modified = True

        if daemon.debugging(10):
            daemon.info("update sensor %s" % self.fullname)
            for label, g in self.groups.iteritems():
                daemon.info("  %s:" % label)
                for aname, (x, atype, avalue) in g.entries.iteritems():
                    daemon.info("    %s: %s" % (aname, avalue))
        self.updated = True

    def refresh(self):
        # Stop the entry from aging, for this one cycle.
        # We don't set update, because that would reset the age to 0.
        # Instead, just decrease the age (yes, it goes negative)
        # so when it get bumped later it'll be back to the same age.
        self.age -= 1

    def set_oldurl(self, oldurl):
        if oldurl is self.oldurl:
            return
        self.oldurl = oldurl
        if oldurl:
            self.label_to_index = {}
            self.attr_indices = {
                HWMon.TEMP: 1,
                HWMon.FAN: 1,
                # for some reason, voltage sensor names start at 0
                HWMon.VOLTAGE: 0,
                HWMon.CURRENT: 1,
                HWMon.POWER: 1,
            }

    def get_index(self, label, type):
        assert self.oldurl
        i = self.label_to_index.get(label)
        if i is None:
            i = self.attr_indices[type]
            self.attr_indices[type] += 1
            self.label_to_index[label] = i
        return i

    def push(self):
        if not self.hwmon:
            self.hwmon = HWMon(self)
        else:
            self.hwmon.update(self, self.modified)
        self.hwmon.push()
        self.modified = False
        self.pushed = True

    def get_data(self):
        data = []
        for label, g in self.groups.iteritems():
            data.append((label, g.prefix, g.type, g.get_data()))
        return data

    def bulk_update(self, data, local):
        for label, prefix, type, gdata in data:
            g = self.get_group(label, prefix, type)
            g.bulk_update(gdata)
        self.update(local)

    def write_platform_config(self, file, n):
        labels = self.groups.keys()
        labels.sort()
        for label in labels:
            g = self.groups[label]
            n = g.write_platform_config(file, label, self, n)
        return n

class SensorGroup:
    def __init__(self, label, prefix, type):
        self.prefix = prefix
        self.label = label
        self.type = type
        self.entries = {}
        # modified means entries added or deleted, not values changed
        self.modified = False

    def set(self, name, type, value):
        e = self.entries.get(name)
        if not e:
            if daemon.debugging(9):
                daemon.info("add  group \"%s\" %s" % (self.label, name))
            self.entries[name] = [True, type, value]
            self.modified = True
        else:
            assert e[1] == type
            e[0] = True
            e[2] = value

    def update(self):
        # delete untouched entries
        to_delete = []
        for name, e in self.entries.iteritems():
            if e[0]:
                e[0] = False
            else:
                to_delete.append(name)
        if len(to_delete) > 0:
            if daemon.debugging(9):
                daemon.info("del  group \"%s\" %s" % (self.label, to_delete))
            for name in to_delete:
                del self.entries[name]
            self.modified = True

    def get_data(self):
        data = []
        for name, (updated, type, value) in self.entries.iteritems():
            data.append((name, type, value))
        return data

    def bulk_update(self, data):
        for name, type, value in data:
            self.set(name, type, value)

    def write_platform_config(self, file, label, s, n):
        if self.type != HWMon.TEMP:
            return n
        file.write("class DynamicTempV%03d(cumulus.sensors.Temp_Unit):\n" % n)
        file.write("    name = 'TempV%03d'\n" % n)
        file.write("    description = %s\n" % repr(s.source + ": " + label))
        file.write("    driver_path = %s\n" %
                   repr("/sys/class/hwmon/" + s.hwmon.hwmon_name))
        file.write("    driver_hwmon = [%s]\n" % repr(self.prefix))
        file.write("    hwmon_search = False\n")
        file.write("    dynamic_limits = True\n")
        file.write("    no_shutdown = True\n")
        file.write("\n")
        return n + 1


class HWMon:
    """
    Manage hwmon nodes

    Each instance is an hwmon device (a sysfs hwmon directory),
    created using the vhwmon driver.  Each device has a number
    of sensor attributes, each of which has a value and a type
    (voltage, temperature, etc.).  The type determines the name
    of the attribute (for example, in<n>_input for voltage)
    and the format of the value.

    The sysfs directories and attributes are created at instance
    creation time.  Currently, they cannot be changed later.
    Only the values can be updated.
    """

    # attribute types
    LABEL = 0
    TEMP = 1
    FAN = 2
    VOLTAGE = 3
    CURRENT = 4
    POWER = 5

    attr_name_prefixes = ["label", "temp", "fan", "in", "curr", "power",]

    def __init__(self, s):
        self.name = s.fullname
        self.id = None
        self.realloc = False
        self.init(s)

    def init(self, s):
        if daemon.debugging(9):
            daemon.info("init hwmon %s" % (self.name))
        self.modified = True
        self.indices = {}
        self.size = s.size()
        self.attr_types = [None] * self.size
        self.attr_names = [None] * self.size
        self.attr_values = [None] * self.size
        i = 0
        for g in s.groups.itervalues():
            for aname, (x, type, value) in g.entries.iteritems():
                # there should be no duplicates at this point
                assert aname not in self.indices
                self.indices[aname] = i
                value = self._format_value(aname, type, value)
                aname = self._check_attr_name(aname)
                value = self._check_attr_value(value)
                self.attr_names[i] = aname
                self.attr_types[i] = type
                self.attr_values[i] = value
                i += 1

    def free(self):
        if self.id is not None:
            if daemon.debugging(9):
                daemon.info("free hwmon %s %d" % (self.name, self.id))
            vhwmon.free(self.id)
            self.id = None

    def update(self, s, reinit):
        if reinit:
            self.init(s)
            self.realloc = True
            return

        assert self.size == s.size()
        for g in s.groups.itervalues():
            for aname, (x, atype, avalue) in g.entries.iteritems():
                i = self.indices[aname]
                if atype != self.attr_types[i]:
                    daemon.err("\"%s\" in hwmon %s changed from "
                               "atype %d to %d" %
                               (aname, self.name,
                               self.attr_types[i], atype))
                    # just fix it up and go on
                    self.attr_types[i] = atype
                avalue = self._format_value(self.attr_names[i],
                                            atype, avalue)
                avalue = self._check_attr_value(avalue)
                if avalue != self.attr_values[i]:
                    self.attr_values[i] = avalue
                    self.modified = True

    def push(self):
        '''
        Update vhwmon if this is a new device or if a value has changed.
        '''

        if self.realloc:
            self.free()
            self.realloc = False

        if self.size == 0:
            self.free()

        elif self.id is None:
            # "-" is special to lm-sensors, so don't use it
            vhwmon_name = self.name.replace("-", "_")
            vhwmon_name = self._check_device_name(vhwmon_name)
            # XXX compatibility can be phased out eventually
            x = vhwmon.alloc(vhwmon_name,
                             self.attr_names, self.attr_values)
            if type(x) == tuple:
                self.id, self.hwmon_name = x
            else:
                self.id = x
                self.hwmon_name = None
            if daemon.debugging(9):
                daemon.info("allo hwmon %s %d %s" %
                            (self.name, self.id, self.hwmon_name))
                if daemon.debugging(10):
                    daemon.info("  names %s" % (self.attr_names))
                    daemon.info("  values %s" % (self.attr_values))

        elif self.modified:
            if daemon.debugging(9):
                daemon.info("set  hwmon %s" % (self.name))
                if daemon.debugging(10):
                    daemon.info("  values %s" % (self.attr_values))
            vhwmon.set(self.id, self.attr_values)

        self.modified = False

    def _format_value(self, name, type, value):
        if isinstance(value, types.StringType):
            return value
        if type == HWMon.LABEL:
            return str(value)
        if type == HWMon.TEMP:
            return str(int(value * 1000 + 0.5))
        if type == HWMon.FAN:
            return str(int(value + 0.5))
        if type == HWMon.VOLTAGE:
            return str(int(value * 1000 + 0.5))
        if type == HWMon.CURRENT:
            return str(int(value * 1000 + 0.5))
        if type == HWMon.POWER:
            if name.endswith("_interval"):
                return str(int(value * 1000 + 0.5))
            else:
                return str(int(value * 1000000 + 0.5))
        raise RuntimeError("Unknown sensor value type %d (value \"%s\")" %
                           (type, str(value)))

    def _check_device_name(self, name):
        if len(name) >= vhwmon.DEVICE_NAME_LEN:
            daemon.warn("hwmon device name \"%s\" too long, truncated" %
                        name)
            name = name[:vhwmon.DEVICE_NAME_LEN - 1]
        return name
    def _check_attr_name(self, name):
        # XXX this can lead to duplicate attribute names
        if len(name) >= vhwmon.ATTR_NAME_LEN:
            daemon.warn("hwmon attribute name \"%s\" too long, truncated" %
                        name)
            name = name[:vhwmon.ATTR_NAME_LEN - 1]
        return name
    def _check_attr_value(self, value):
        if len(value) >= vhwmon.ATTR_VALUE_LEN:
            '''
            daemon.warn("hwmon attribute value \"%s\" too long, truncated" %
                        value)
            '''
            value = value[:vhwmon.ATTR_VALUE_LEN - 1]
        return value


class Daemon:
    '''
    Process management for a daemon.
    An instance should be created as early as possible.
    '''

    pid_file = None
    started = False

    def __init__(self, name = None, debug_level = 0, debug_throttle = 1):
        '''
        name is the program name (defaulting to basename(argv[0])).
        debug_level determines whether the program is to run in
        the foreground and whether messages go to stdout or syslog.
        debug_throttle is for reducing the frequency of debug and
        logging activity.
        There is another chance to set debug_level and debug_throttle
        in start().
        '''
        if name is None:
            name = os.path.basename(sys.argv[0])
        self.name = name
        self.debug_level = debug_level
        self.debug_throttle = debug_throttle
        self.tick = 0

        syslog.openlog(name)
        signal.signal(signal.SIGTERM, lambda s, f: self.exit())

    def start(self, debug_level = None, debug_throttle = None):
        '''
        Start running.
        This should be called after early initialization (like argv
        parsing) and before any serious work.
        debug_level and debug_throttle can be specified here,
        if __init__() happened too early.
        '''
        if debug_level is not None:
            self.debug_level = debug_level
        if debug_throttle is not None:
            self.debug_throttle = debug_throttle
        self.started = True
        #
        # If not in debug mode, check for other instances of this program
        # and create pid file.
        #
        if not self.debugging(1):
            self.pid_file = "/var/run/%s.pid" % self.name
            if self.already_running():
                exit()
            file(self.pid_file, "w").write("%d\n" % os.getpid())

    def exit(self, status = 0):
        '''
        Clean up and exit.
        '''
        if self.pid_file:
            os.unlink(self.pid_file)
        if status == 0:
            self.info("clean exit")
        else:
            self.err("exiting with status %d" % status)
        exit(status)

    def already_running(self):
        try:
            if not os.path.isfile(self.pid_file):
                return False
            oldpid = re.findall("\D*(\d+).*",
                                (file(self.pid_file, "r").readline()))[0]
            if not os.path.exists("/proc/%s" % oldpid):
                return False
            if self.name not in \
               file("/proc/%s/cmdline" % oldpid, "r").readline():
                return False
            self.err("%s already running as process %s" % (self.name, oldpid))
            return True
        except Exception as e:
            raise RuntimeError("Unable to validate PID file %s: %s" %
                               (pid_file, str(e)))

    def timer(self):
        self.tick += 1

    def debugging(self, level):
        '''
        Decide if logging or debugging at the given level should be done.
        If throttling, show message only every debug_throttle poll periods.
        '''
        if self.tick % self.debug_throttle != 0:
            return False
        return self.debug_level >= level

    #
    # Logging
    #
    # Write messages to stdout in debug mode.
    # Use syslog if not.
    # Do both if we haven't started running yet.
    #
    def info(self, message):
        self._log(message, "INFO", syslog.LOG_INFO)
    def warn(self, message):
        self._log(message, "WARN", syslog.LOG_WARNING)
    def err(self, message):
        self._log(message, "ERR ", syslog.LOG_ERR)
    def _log(self, message, tag, level):
        for line in message.split('\n'):
            if self.debug_level >= 1 or not self.started:
                print tag, line
                sys.stdout.flush()
            if self.debug_level < 1 or not self.started:
                syslog.syslog(level, line)

class Poll:
    '''
    File descriptor and timer event management.

    Guarantees:

    Timers due at the same time fire in registration order.

    Phase relationship between timers is preserved.

    Timers may fire late but the frequency does not drift over time.
    However, this means we fall behind (and possibly never
    catch up) if there isn't enough CPU time to execute
    everything within the specified timer period.  We will peg
    the CPU in that case.

    An important assumption is that the time taken to do
    all the work is small relative to the shortest timer period,
    so there's time to sleep and enough slack to catch up.

    Use integer arithmethic in the right places to avoid floating point
    imprecision.
    '''

    def __init__(self):
        self.timers = []
        self.fds = {}
        self.epoll = select.epoll()
        self.time0 = None

    def loop(self):
        self.time0 = time.time()
        last_time = 0.0

        while True:
            now = time.time() - self.time0

            # make sure time didn't go backwards
            if len(self.timers) == 0:
                # no timers so don't care about time warps
                pass
            elif now < 0:
                daemon.warn("time went backward (by %f to %f)" %
                            (now - last_time, now))
                # The important thing is to avoid sleeping too long.
                # Pretend no actual time has elapsed since last_time.
                self.time0 += now - last_time
                now = last_time
            last_time = now

            # find next firing time and compute sleep time
            next_time = None
            min_period = None
            for e in self.timers:
                nt = e.next_time + 0.0
                if next_time is None or nt < next_time:
                    next_time = nt
                if min_period is None or e.period < min_period:
                    min_period = e.period
            if next_time is None:
                sleep = -1
            elif next_time <= now:
                sleep = 0
            else:
                sleep = next_time - now

            # sleep and poll
            if daemon.debugging(2):
                daemon.info("sleep %.3f" % sleep)
            try:
                fds = self.epoll.poll(sleep)
            except IOError as e:
                if e.errno != errno.EINTR:
                    raise
                fds = []

            # fire fds
            for fd, ev in fds:
                #daemon.info("epoll returns %d %#x" % (fd, ev))
                assert ev == select.EPOLLIN
                assert fd in self.fds
                e = self.fds[fd]
                if daemon.debugging(2):
                    daemon.info("fire  fd %s%s" % (e.name, e.args))
                e.func(*e.args)

            now = time.time() - self.time0

            # look for time weirdness while we slept
            if next_time is None:
                # no timers so don't care about time warps
                pass
            elif now < last_time or now > next_time + min_period:
                if now < last_time:
                    # time went back
                    daemon.warn("time went backward (by %f to %f)" %
                                (now - last_time, now))
                else:
                    # time jumped forward or we've fallen massively behind
                    daemon.warn("fell behind or time jumped (by %f to %f)" %
                                (now - last_time, now))
                # In either case, assume enough time has passed
                # to fire the next timer.
                self.time0 += now - next_time
                now = next_time
            last_time = now

            # fire timers
            for e in self.timers:
                # fudge: don't try to sleep less than a millisecond
                # fire timers early instead
                if e.next_time <= now + 0.001:
                    if daemon.debugging(2):
                        daemon.info("fire  %.3f %s%s" %
                                    (time.time() - self.time0,
                                     e.name, e.args))
                    e.func(*e.args)
                    e.next_time += e.period
                    assert type(e.next_time) is int

    def register_timer(self, name, func, args, period, next_time):
        assert self.time0 is None
        e = Poll.PollTimer(name, func, args, period, next_time)
        self.timers.append(e)

    def register_fd(self, name, func, args, fd):
        e = Poll.PollFD(name, func, args, fd)
        self.fds[fd] = e
        self.epoll.register(fd, select.EPOLLIN)

    class PollTimer:
        def __init__(self, name, func, args, period, next_time):
            assert type(period) is int
            assert type(next_time) is int
            self.name = name
            self.func = func
            self.args = args
            self.period = period
            self.next_time = next_time

    class PollFD:
        def __init__(self, name, func, args, fd):
            self.name = name
            self.func = func
            self.args = args
            self.fd = fd

if __name__ == "__main__":
    main()
    assert False
