/*
 *  net/core/port - generic netlink port handler
 *  Copyright (C) 2015 Cumulus Networks
 *
 *  This program is free software; you can redistribute it and/or modify
 *  it under the terms of the GNU General Public License as published by
 *  the Free Software Foundation; either version 2 of the License, or
 *  (at your option) any later version.
 *
 *  This program is distributed in the hope that it will be useful,
 *  but WITHOUT ANY WARRANTY; without even the implied warranty of
 *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
 *  GNU General Public License for more details.
 */

#include <linux/module.h>
#include <linux/types.h>
#include <linux/capability.h>
#include <linux/errno.h>
#include <linux/ethtool.h>
#include <linux/netdevice.h>
#include <linux/list.h>
#include <linux/rtnetlink.h>
#include <linux/hashtable.h>
#include <linux/rcupdate.h>
#include <linux/nsproxy.h>
#include <linux/mroute.h>
#include <linux/mroute6.h>
#include <linux/port.h>
#include <linux/if_vlan.h>
#include <net/net_namespace.h>
#include <net/netns/generic.h>
#include <net/genetlink.h>
#include <net/ipv6_stubs.h>

static DEFINE_HASHTABLE(port_cache, 10);
static DEFINE_SPINLOCK(port_cache_lock);
static DEFINE_RWLOCK(port_wq_lock);
static DEFINE_SEQLOCK(vstats_seqlock);

static struct bridge_vlan_xstats *port_vstats;

static bool port_vlan_stats_valid;
static struct genl_family port_family;

struct port_node {
	struct hlist_node hash_node;
	int ifindex;
	struct ethtool_link_ksettings link_ksettings;
	int settings_valid;
	struct ethtool_fecparam fecparam;
	u32 sset_count[ETH_SS_FEATURES + 1];
	u64 *stat_data;
	u64 *stat_data_old; /* snapshot of old stats */
	int stat_data_count;
	u8 *stat_strings;
	struct rcu_head rcu;
};

#define ETH_STAT_HwIfOutQLen_idx 24

static struct bridge_vlan_xstats *port_vstats_entry(u16 vid)
{
	if (unlikely(!port_vstats))
		return NULL;
	return &port_vstats[vid & VLAN_VID_MASK];
}

static int __port_vstats_update(struct bridge_vlan_xstats *stats, u32 add)
{
	struct bridge_vlan_xstats *entry;

	entry = port_vstats_entry(stats->vid);
	if (unlikely(!entry))
		return -ENOMEM;
	entry->rx_bytes = stats->rx_bytes + entry->rx_bytes * add;
	entry->rx_packets = stats->rx_packets + entry->rx_packets * add;
	entry->tx_bytes = stats->tx_bytes + entry->tx_bytes * add;
	entry->tx_packets = stats->tx_packets + entry->tx_packets * add;

	return 0;
}

static void port_vstats_init(void)
{
	seqlock_init(&vstats_seqlock);
	port_vstats = kcalloc(VLAN_N_VID, sizeof(*port_vstats), GFP_KERNEL);
	if (!port_vstats)
		pr_warn("port: couldn't allocate vlan array, hw stats are disabled\n");
}

void port_vstats_fetch(struct bridge_vlan_xstats *vstore)
{
	struct bridge_vlan_xstats *entry = port_vstats_entry(vstore->vid);
	unsigned int seq;

	if (unlikely(!entry)) {
		memset(vstore, 0, sizeof(*vstore));
		return;
	}
	do {
		seq = read_seqbegin(&vstats_seqlock);
		memcpy(vstore, entry, sizeof(*vstore));
	} while (read_seqretry(&vstats_seqlock, seq));
}
EXPORT_SYMBOL_GPL(port_vstats_fetch);

void port_vstats_reset(u16 vid)
{
	struct bridge_vlan_xstats vinit = { .vid = vid };

	__port_vstats_update(&vinit, 0);
}
EXPORT_SYMBOL_GPL(port_vstats_reset);

bool port_vstats_valid(void)
{
	return port_vlan_stats_valid;
}
EXPORT_SYMBOL_GPL(port_vstats_valid);

/* must be invoked with rcu_read_lock  or port_cache_lock */
static inline struct port_node *__port_cache_get(int ifindex)
{
	struct port_node *port;

	hash_for_each_possible_rcu(port_cache, port, hash_node, ifindex)
		if (port->ifindex == ifindex)
			return port;

	return NULL;
}

static void __port_cache_init(int ifindex)
{
	struct port_node *port;
	unsigned long flags;

	spin_lock_irqsave(&port_cache_lock, flags);
	port = __port_cache_get(ifindex);
	if (port) {
		spin_unlock_irqrestore(&port_cache_lock, flags);
		return;
	}

	port = kzalloc(sizeof(*port), GFP_ATOMIC);
	if (!port) {
		spin_unlock_irqrestore(&port_cache_lock, flags);
		return;
	}

	port->ifindex = ifindex;

	hash_add_rcu(port_cache, &port->hash_node, ifindex);
	spin_unlock_irqrestore(&port_cache_lock, flags);
}

static void __port_cache_free(struct rcu_head *head)
{
	struct port_node *p = container_of(head, struct port_node, rcu);

	kfree(p->stat_data);
	kfree(p->stat_data_old);
	kfree(p->stat_strings);
	kfree(p);
}

static void __port_cache_uninit(int ifindex)
{
	struct port_node *port;
	unsigned long flags;

	spin_lock_irqsave(&port_cache_lock, flags);
	port = __port_cache_get(ifindex);
	if (!port) {
		spin_unlock_irqrestore(&port_cache_lock, flags);
		return;
	}

	hash_del_rcu(&port->hash_node);
	spin_unlock_irqrestore(&port_cache_lock, flags);

	call_rcu(&port->rcu, __port_cache_free);
}

static int port_cache_get_sset_count(int ifindex, int sset)
{
	struct port_node *port;
	int count = 0;

	if (sset < ETH_SS_TEST || sset > ETH_SS_FEATURES)
		return -EINVAL;

	rcu_read_lock();
	port = __port_cache_get(ifindex);
	if (port)
		count = port->sset_count[sset];
	rcu_read_unlock();

	return count;
}

static void __port_cache_set_sset_count(struct port_node *port, int sset,
					int count)
{
	if (sset < ETH_SS_TEST || sset > ETH_SS_FEATURES)
		return;

	if (port)
		port->sset_count[sset] = count;
}

static void port_cache_set_sset_count(int ifindex, int sset, int count)
{
	struct port_node *port;
	unsigned long flags;

	spin_lock_irqsave(&port_cache_lock, flags);
	port = __port_cache_get(ifindex);
	__port_cache_set_sset_count(port, sset, count);
	spin_unlock_irqrestore(&port_cache_lock, flags);
}

static int port_cache_get_stat_strings(int ifindex, int count, u8 *strings)
{
	struct port_node *port;
	u8 *stat_strings;
	int err = -ENODATA;

	memset(strings, 0, count * ETH_GSTRING_LEN);

	rcu_read_lock();
	port = __port_cache_get(ifindex);
	if (port) {
		stat_strings = rcu_dereference(port->stat_strings);
		if (stat_strings) {
			memcpy(strings, stat_strings, count * ETH_GSTRING_LEN);
			err = 0;
		}
	}
	rcu_read_unlock();

	return err;
}

static void port_cache_set_stat_strings(int ifindex, int count, u8 *strings)
{
	struct port_node *port;
	u8 *old_strings, *new_strings;
	unsigned long flags;

	spin_lock_irqsave(&port_cache_lock, flags);
	port = __port_cache_get(ifindex);
	if (!port) {
		spin_unlock_irqrestore(&port_cache_lock, flags);
		return;
	}

	new_strings = kmalloc(count * ETH_GSTRING_LEN, GFP_ATOMIC);
	if (!new_strings) {
		spin_unlock_irqrestore(&port_cache_lock, flags);
		return;
	}

	memcpy(new_strings, strings, count * ETH_GSTRING_LEN);

	old_strings = port->stat_strings;
	rcu_assign_pointer(port->stat_strings, new_strings);
	spin_unlock_irqrestore(&port_cache_lock, flags);
	synchronize_rcu();

	kfree(old_strings);
}

static void port_cache_clear_stats(int ifindex)
{
	struct port_node *port;
	unsigned long flags;
	int i;

	spin_lock_irqsave(&port_cache_lock, flags);
	port = __port_cache_get(ifindex);
	if (!port) {
		spin_unlock_irqrestore(&port_cache_lock, flags);
		return;
	}
	if (port->stat_data) {
		if (!port->stat_data_old)
			rcu_assign_pointer(port->stat_data_old,
					   kzalloc(port->stat_data_count * sizeof(u64),
						   GFP_ATOMIC));
		if (port->stat_data_old)
			for (i = 0; i < port->stat_data_count; i++)
				/* since data_data_old is added to
				 * stat_data later, making it equal to
				 * -stat_data is an 'elegant' way to
				 * clear the cached stats
				 */
				port->stat_data_old[i] = -port->stat_data[i];
	}
	spin_unlock_irqrestore(&port_cache_lock, flags);
}

static void port_cache_get_stats(int ifindex, struct ethtool_stats *stats,
				 u64 *data, int clear)
{
	u64 *stat_data, *stat_data_old;
	struct port_node *port;
	__u32 count;
	int i;

	rcu_read_lock();
	port = __port_cache_get(ifindex);
	if (port) {
		stat_data = rcu_dereference(port->stat_data);
		stat_data_old = rcu_dereference(port->stat_data_old);
		count = min_t(__u32, stats->n_stats, port->stat_data_count);

		if (stat_data)
			memcpy(data, stat_data, count * sizeof(u64));

		/* add in any earlier stats saved in snapshot */
		if (stat_data_old)
			for (i = 0; i < count; i++)
				data[i] += stat_data_old[i];
	}
	rcu_read_unlock();

	/* if requested, clear stats */
	if (clear)
		port_cache_clear_stats(ifindex);
}

static void port_cache_set_stats(int ifindex, int count, u64 *data, int restart)
{
	struct port_node *port;
	unsigned long flags;
	int i;

	spin_lock_irqsave(&port_cache_lock, flags);
	port = __port_cache_get(ifindex);
	if (!port) {
		spin_unlock_irqrestore(&port_cache_lock, flags);
		return;
	}
	if (port->stat_data && port->stat_data_count != count) {
		pr_debug("device %d: trying to change stats count %d -> %d\n",
			 port->ifindex, port->stat_data_count, count);
		spin_unlock_irqrestore(&port_cache_lock, flags);
		return;
	}
	__port_cache_set_sset_count(port, ETH_SS_STATS, count);

	/* if restarting, take snapshot of stats */
	if (restart && port->stat_data) {
		if (!port->stat_data_old)
			rcu_assign_pointer(port->stat_data_old,
					   kcalloc(count,
						   sizeof(u64),
						   GFP_ATOMIC));
		if (port->stat_data_old) {
			for (i = 0; i < count; i++) {
				if (i == ETH_STAT_HwIfOutQLen_idx)
					continue;
				port->stat_data_old[i] += port->stat_data[i];
			}
		}
	}

	if (!port->stat_data) {
		port->stat_data_count = count;
		rcu_assign_pointer(port->stat_data,
				   kmalloc(count * sizeof(u64), GFP_ATOMIC));
	}

	if (port->stat_data)
		memcpy(port->stat_data, data, count * sizeof(u64));
	spin_unlock_irqrestore(&port_cache_lock, flags);
}

static int port_cache_get_settings(int ifindex,
				   struct ethtool_link_ksettings *link_ksettings)
{
	struct port_node *port;
	int valid = 0;

	rcu_read_lock();
	port = __port_cache_get(ifindex);
	if (port) {
		valid = port->settings_valid;
		smp_rmb();
		if (valid)
			memcpy(link_ksettings,
			       &port->link_ksettings,
			       sizeof(struct ethtool_link_ksettings));
	}
	rcu_read_unlock();

	return valid ? 0 : -ENODATA;
}

static void port_cache_set_settings(int ifindex,
				    struct ethtool_link_ksettings *link_ksettings)
{
	struct port_node *port;
	unsigned long flags;

	spin_lock_irqsave(&port_cache_lock, flags);
	port = __port_cache_get(ifindex);
	if (!port) {
		spin_unlock_irqrestore(&port_cache_lock, flags);
		return;
	}

	memcpy(&port->link_ksettings,
	       link_ksettings,
	       sizeof(struct ethtool_link_ksettings));

	smp_wmb();
	port->settings_valid = 1;
	spin_unlock_irqrestore(&port_cache_lock, flags);
}

static int port_cache_get_fec(int ifindex,
			      struct ethtool_fecparam *fecparam)
{
	struct port_node *port;

	rcu_read_lock();
	port = __port_cache_get(ifindex);
	if (port) {
		smp_rmb();
		*fecparam = port->fecparam;
	}
	rcu_read_unlock();

	return port ? 0 : -ENODATA;
}

static void port_cache_set_fec(int ifindex,
			       struct ethtool_fecparam *fecparam)
{
	struct port_node *port;
	unsigned long flags;

	spin_lock_irqsave(&port_cache_lock, flags);
	port = __port_cache_get(ifindex);
	if (!port) {
		spin_unlock_irqrestore(&port_cache_lock, flags);
		return;
	}

	port->fecparam = *fecparam;

	spin_unlock_irqrestore(&port_cache_lock, flags);
}

static const struct nla_policy port_policy[PORT_ATTR_MAX + 1] = {
	[PORT_ATTR_STATUS]		= { .type = NLA_U32 },
	[PORT_ATTR_IFINDEX]		= { .type = NLA_U32 },
	[PORT_ATTR_FLAGS]		= { .type = NLA_U32 },
	[PORT_ATTR_CARRIER]		= { .type = NLA_U8 },
	[PORT_ATTR_PHYS_ID_STATE]	= { .type = NLA_U8 },
	[PORT_ATTR_SETTINGS]		= { .type = NLA_BINARY,
		.len = sizeof(struct ethtool_link_ksettings) },
	[PORT_ATTR_PAUSE]		= { .type = NLA_BINARY,
		.len = sizeof(struct ethtool_pauseparam) },
	[PORT_ATTR_MODINFO]		= { .type = NLA_BINARY,
		.len = sizeof(struct ethtool_modinfo) },
	[PORT_ATTR_EEPROM]		= { .type = NLA_BINARY,
		.len = sizeof(struct ethtool_eeprom) },
	[PORT_ATTR_EEPROM_DATA]		= { .type = NLA_BINARY },
	[PORT_ATTR_STATS]		= { .type = NLA_NESTED },
	[PORT_ATTR_STAT]		= { .type = NLA_U32 },
	[PORT_ATTR_STRINGS]		= { .type = NLA_NESTED },
	[PORT_ATTR_STRING]		= { .type = NLA_STRING,
		.len = ETH_GSTRING_LEN },
	[PORT_ATTR_SSET]		= { .type = NLA_U32 },
	[PORT_ATTR_SSET_COUNT]		= { .type = NLA_U32 },
	[PORT_ATTR_FEC]		= { .type = NLA_BINARY,
		.len = sizeof(struct ethtool_fecparam) },
	[MC_ATTR_FAMILY]		= { .type = NLA_U16 },
	[PORT_VSTATS_ATTR_OP]		= { .type = NLA_U32 },
};

static struct genl_multicast_group port_mcgrp[] = {
	{ .name = "port_mc", },
};

static LIST_HEAD(wq_list);

struct wq {
	wait_queue_head_t wq;
	int seq;
	int hit;
	int (*decode)(struct nlattr **attrs, void *arg1, void *arg2);
	void *arg1, *arg2;
	int status;
	struct list_head list;
};

static struct wq *find_wq(int seq)
{
	struct list_head *pos;
	struct wq *wq;

	list_for_each(pos, &wq_list) {
		wq = list_entry(pos, struct wq, list);
		if (wq->seq == seq)
			return wq;
	}

	return NULL;
}

static struct wq *alloc_wq(int seq)
{
	struct wq *wq = kzalloc(sizeof(*wq), GFP_ATOMIC);

	if (!wq)
		return NULL;

	init_waitqueue_head(&wq->wq);
	INIT_LIST_HEAD(&wq->list);
	wq->seq = seq;

	return wq;
}

static int encode_struct(struct sk_buff *skb, int attrtype,
			 size_t size, void *src)
{
	if (nla_put(skb, attrtype, size, src))
		return -EMSGSIZE;
	return 0;
}

static int decode_struct(struct nlattr *attr, size_t size, void *dst)
{
	if (!attr)
		return -ENODEV;

	memcpy(dst, nla_data(attr), size);
	return 0;
}

static int port_wait(int seq, int wait,
		     int (*decode)(struct nlattr **attrs,
				   void *arg1, void *arg2),
		     void *arg1, void *arg2)
{
	struct wq *wq;
	int err;

	write_lock(&port_wq_lock);

	wq = alloc_wq(seq);
	if (!wq) {
		write_unlock(&port_wq_lock);
		return -ENOMEM;
	}

	list_add(&wq->list, &wq_list);
	wq->decode = decode;
	wq->arg1 = arg1;
	wq->arg2 = arg2;

	write_unlock(&port_wq_lock);

	/* Unconditionally rtnl_unlock here as we should not be here
	 * with rtnl not held
	 */
	ASSERT_RTNL();
	rtnl_unlock();

	err = wait_event_interruptible_timeout(wq->wq, wq->hit, wait * HZ);

	rtnl_lock(); /* re-grab rtnl lock */

	if (err == 0) {	/* timed out */
		err = -ETIMEDOUT;
		goto err_out;
	} else if (wq->status) {
		err = wq->status;
	} else {
		err = 0;
	}

err_out:
	write_lock(&port_wq_lock);
	list_del(&wq->list);
	kfree(wq);
	write_unlock(&port_wq_lock);

	return err;
}

static int port_send(struct net_device *dev, u8 cmd, int seq, int size,
		     int (*encode)(struct sk_buff *skb,
				   void *arg1,
				   void *arg2),
		     int (*decode)(struct nlattr **attrs,
				   void *arg1,
				   void *arg2),
		     void *arg1, void *arg2)
{
	struct sk_buff *skb;
	void *hdr;
	int err = -EMSGSIZE;

	size += nla_total_size(sizeof(u32));    /* PORT_ATTR_IFINDEX */

	skb = genlmsg_new(size, GFP_ATOMIC);
	if (!skb)
		return -ENOMEM;

	hdr = genlmsg_put(skb, 0, seq, &port_family, 0, cmd);
	if (!hdr)
		goto err_out;

	if (nla_put_u32(skb, PORT_ATTR_IFINDEX, dev->ifindex))
		goto nla_put_failure;

	if (encode) {
		err = encode(skb, arg1, arg2);
		if (err < 0) {
nla_put_failure:
			genlmsg_cancel(skb, hdr);
			goto err_out;
		}
	}

	genlmsg_end(skb, hdr);

	err = genlmsg_multicast(&port_family, skb, 0, 0, GFP_ATOMIC);
	if (err < 0)
		goto err_out_sent;

	return err;

err_out:
	nlmsg_free(skb);
err_out_sent:
	return err;
}

static int port_reply(struct sk_buff *skb, struct genl_info *info)
{
	struct wq *wq;
	int err;

	read_lock(&port_wq_lock);
	wq = find_wq(info->snd_seq);
	if (wq) {
		wq->hit = 1;
		if (info->attrs[PORT_ATTR_STATUS]) {
			err = nla_get_u32(info->attrs[PORT_ATTR_STATUS]);
			if (!err) {
				if (wq->decode)
					wq->decode(info->attrs,
						   wq->arg1,
						   wq->arg2);
			} else {
				wq->status = err;
			}
		}
		wake_up_interruptible(&wq->wq);
	}
	read_unlock(&port_wq_lock);

	return 0;
}

static int port_set_stats(struct sk_buff *skb, struct genl_info *info)
{
	struct nlattr **attrs = info->attrs;
	int ifindex = 0, count = 0;
	u32 flags = 0;
	struct nlattr *attr;
	u64 *data;
	int i = 0, rem, restart;

	if (attrs[PORT_ATTR_IFINDEX])
		ifindex = nla_get_u32(attrs[PORT_ATTR_IFINDEX]);

	if (attrs[PORT_ATTR_FLAGS])
		flags = nla_get_u32(attrs[PORT_ATTR_FLAGS]);

	restart = !!(flags & PORT_ATTR_FLAG_STAT_RESTART);

	if (attrs[PORT_ATTR_SSET_COUNT])
		count = nla_get_u32(attrs[PORT_ATTR_SSET_COUNT]);

	if (!count)
		return -EINVAL;

	data = kcalloc(count, sizeof(u64), GFP_ATOMIC);
	if (!data)
		return -ENOMEM;

	if (attrs[PORT_ATTR_STATS])
		nla_for_each_nested(attr, attrs[PORT_ATTR_STATS], rem)
			if (i < count)
				data[i++] = nla_get_u64(attr);

	port_cache_set_stats(ifindex, count, data, restart);

	kfree(data);

	return 0;
}

static int port_set_settings_push(struct sk_buff *skb, struct genl_info *info)
{
	struct nlattr **attrs = info->attrs;
	struct ethtool_link_ksettings link_ksettings = { { 0 }, };
	int ifindex = 0;

	if (attrs[PORT_ATTR_IFINDEX])
		ifindex = nla_get_u32(attrs[PORT_ATTR_IFINDEX]);

	if (attrs[PORT_ATTR_SETTINGS])
		memcpy(&link_ksettings, nla_data(attrs[PORT_ATTR_SETTINGS]),
		       sizeof(struct ethtool_link_ksettings));

	if (ifindex <= 0)
		return -ENOTSUPP;

	port_cache_set_settings(ifindex, &link_ksettings);

	return 0;
}

static int port_set_carrier(struct sk_buff *skb, struct genl_info *info)
{
	struct nlattr **attrs = info->attrs;
	int ifindex = 0, carrier = 0;
	struct net_device *dev;
	struct net *net;
	bool setqueues;
	u32 flags = 0;

	if (attrs[PORT_ATTR_IFINDEX])
		ifindex = nla_get_u32(attrs[PORT_ATTR_IFINDEX]);

	if (ifindex <= 0)
		return -ENOTSUPP;

	if (attrs[PORT_ATTR_CARRIER])
		carrier = nla_get_u8(attrs[PORT_ATTR_CARRIER]);

	if (attrs[PORT_ATTR_FLAGS])
		flags = nla_get_u32(attrs[PORT_ATTR_FLAGS]);

	setqueues = !!(flags & PORT_ATTR_FLAG_CARRIER_SET_QUEUE);

	net = get_net(current->nsproxy->net_ns);

	dev = dev_get_by_index(net, ifindex);
	if (dev) {
		netdev_dbg(dev, "%s: setting carrier %s, flags (setqueues %d) (netif_running %s)\n",
			   __func__, (carrier ? "on" : "off"), setqueues,
			   netif_running(dev) ? "yes" : "no");
		if (carrier) {
			netif_carrier_on(dev);
			if (setqueues) {
				if (netif_running(dev))
					netif_start_queue(dev);
				else
					netdev_warn(dev, "%s called with setqueues when device is not running\n", __func__);
			}
		} else {
			if (setqueues)
				netif_tx_disable(dev);
			netif_carrier_off(dev);
		}
		dev_put(dev);
	}

	put_net(net);
	return 0;
}

static int port_send_cached_settings(int ifindex,
				     struct genl_info *info,
				     struct ethtool_link_ksettings *link_ksettings,
				     int status)
{
	struct sk_buff *skb;
	int err = 0, size = 0;
	void *hdr;

	/* Uses unicast reply to requester. This function can be
	 * generalized
	 */
	size += nla_total_size(sizeof(u32)) +
			nla_total_size(sizeof(u32)) +
			nla_total_size(sizeof(struct ethtool_link_ksettings));

	skb = genlmsg_new(size, GFP_ATOMIC);
	if (!skb)
		return -ENOMEM;

	hdr = genlmsg_put(skb, info->snd_portid, info->snd_seq, &port_family,
			  0, PORT_CMD_GET_CACHED_SETTINGS);
	if (!hdr)
		goto err_out;

	if (nla_put_u32(skb, PORT_ATTR_IFINDEX, ifindex))
		goto nla_put_failure;
	if (nla_put_u32(skb, PORT_ATTR_STATUS, status))
		goto nla_put_failure;

	err = encode_struct(skb, PORT_ATTR_SETTINGS,
			    sizeof(struct ethtool_link_ksettings), link_ksettings);
	if (err < 0) {
nla_put_failure:
		genlmsg_cancel(skb, hdr);
		goto err_out;
	}

	genlmsg_end(skb, hdr);

	err = genlmsg_reply(skb, info);
	if (err < 0)
		goto err_out_sent;

	return err;

err_out:
	nlmsg_free(skb);

err_out_sent:
	return err;
}

static int port_cache_settings_pull(struct sk_buff *skb, struct genl_info *info)
{
	struct nlattr **attrs = info->attrs;
	struct ethtool_link_ksettings link_ksettings = { { 0 }, };
	int err = 0, ifindex = 0;

	if (attrs[PORT_ATTR_IFINDEX])
		ifindex = nla_get_u32(attrs[PORT_ATTR_IFINDEX]);

	if (ifindex <= 0)
		return -ENOTSUPP;

	err = port_cache_get_settings(ifindex, &link_ksettings);

	return port_send_cached_settings(ifindex, info, &link_ksettings, err);
}

/* Accept a push request via netlink and store fec in the kernel cache */
static int port_set_fec_push(struct sk_buff *skb, struct genl_info *info)
{
    struct nlattr **attrs = info->attrs;
    struct ethtool_fecparam fecparam = { 0 };
    int ifindex = 0;

    if (attrs[PORT_ATTR_IFINDEX])
        ifindex = nla_get_u32(attrs[PORT_ATTR_IFINDEX]);

    if (ifindex <= 0)
        return -ENOTSUPP;

    if (attrs[PORT_ATTR_FEC])
        memcpy(&fecparam, nla_data(attrs[PORT_ATTR_FEC]),
               sizeof(struct ethtool_fecparam));

    port_cache_set_fec(ifindex, &fecparam);

    return 0;
}

static int port_send_cached_fec(int ifindex,
				struct genl_info *info,
				struct ethtool_fecparam *fecparam,
				int status)
{
	struct sk_buff *skb;
	int err = 0, size = 0;
	void *hdr;

	/* Uses unicast reply to requester. This function can be
	 * generalized
	 */
	size += nla_total_size(sizeof(u32)) +
			nla_total_size(sizeof(u32)) +
			nla_total_size(sizeof(struct ethtool_fecparam));

	skb = genlmsg_new(size, GFP_ATOMIC);
	if (!skb)
		return -ENOMEM;

	hdr = genlmsg_put(skb, info->snd_portid, info->snd_seq, &port_family,
			  0, PORT_CMD_GET_CACHED_FEC);
	if (!hdr)
		goto err_out;

	if (nla_put_u32(skb, PORT_ATTR_IFINDEX, ifindex))
		goto nla_put_failure;
	if (nla_put_u32(skb, PORT_ATTR_STATUS, status))
		goto nla_put_failure;

	err = encode_struct(skb, PORT_ATTR_FEC,
			    sizeof(struct ethtool_fecparam), fecparam);
	if (err < 0) {
nla_put_failure:
		genlmsg_cancel(skb, hdr);
		goto err_out;
	}

	genlmsg_end(skb, hdr);

	err = genlmsg_reply(skb, info);
	if (err < 0)
		goto err_out_sent;

	return err;

err_out:
	nlmsg_free(skb);

err_out_sent:
	return err;
}

static int port_cache_fec_pull(struct sk_buff *skb, struct genl_info *info)
{
	struct nlattr **attrs = info->attrs;
	struct ethtool_fecparam fecparam = { 0 };
	int err = 0, ifindex = 0;

	if (attrs[PORT_ATTR_IFINDEX])
		ifindex = nla_get_u32(attrs[PORT_ATTR_IFINDEX]);

	if (ifindex <= 0)
		return -ENOTSUPP;

	err = port_cache_get_fec(ifindex, &fecparam);

	return port_send_cached_fec(ifindex, info, &fecparam, err);
}

static int mc_refresh_age(struct sk_buff *skb, struct genl_info *info)
{
	struct nlattr **attrs = info->attrs;
	struct mc_addr_vrf *mcad_vrf;
	union mc_addr *mcad;
	__be16 family;
	int rem, i;

	if (!attrs[MC_ATTR_FAMILY] ||
	    (!attrs[MC_ATTR_ARRAY] && !attrs[MC_ATTR_VRF_ARRAY]))
		return -EINVAL;

	if (attrs[MC_ATTR_ARRAY] && attrs[MC_ATTR_VRF_ARRAY])
		return -EINVAL;

	family = nla_get_be16(attrs[MC_ATTR_FAMILY]);
	if (attrs[MC_ATTR_ARRAY]) {
		if (nla_len(attrs[MC_ATTR_ARRAY]) % sizeof(union mc_addr) != 0)
			return -EINVAL;

		mcad = nla_data(attrs[MC_ATTR_ARRAY]);
		for (rem = 0, i = 0; rem < nla_len(attrs[MC_ATTR_ARRAY]);
		     rem += sizeof(*mcad), i++) {
			switch (family) {
			case htons(ETH_P_IP):
				ipmr_refresh_age(0, &mcad[i]);
				break;
#if IS_ENABLED(CONFIG_IPV6)
			case htons(ETH_P_IPV6):
				ipv6_stub->ip6mr_refresh_age(0, &mcad[i]);
				break;
			}
#endif
		}
	} else if (attrs[MC_ATTR_VRF_ARRAY]) {
		struct nlattr *attr;

		attr = attrs[MC_ATTR_VRF_ARRAY];
		if (nla_len(attr) % sizeof(struct mc_addr_vrf) != 0)
			return -EINVAL;

		mcad_vrf = nla_data(attrs[MC_ATTR_VRF_ARRAY]);
		for (rem = 0, i = 0; rem < nla_len(attrs[MC_ATTR_VRF_ARRAY]);
		     rem += sizeof(*mcad_vrf), i++) {
			switch (family) {
			case htons(ETH_P_IP):
				ipmr_refresh_age(mcad_vrf[i].table,
						 &mcad_vrf[i].mcaddr);
				break;
#if IS_ENABLED(CONFIG_IPV6)
			case htons(ETH_P_IPV6):
				ipv6_stub->ip6mr_refresh_age(mcad_vrf[i].table,
							     &mcad_vrf[i].mcaddr);
				break;
			}
#endif
		}

	}

	return 0;
}

static int port_vlan_update_stats(struct sk_buff *skb, struct genl_info *info)
{
	struct nlattr **attrs = info->attrs;
	struct bridge_vlan_xstats *vstats;
	u32 add = 1;
	int rem, i;

	if (!attrs[PORT_VSTATS_ATTR_ARRAY] ||
	    nla_len(attrs[PORT_VSTATS_ATTR_ARRAY]) < sizeof(*vstats))
		return -EINVAL;

	if (unlikely(!port_vstats))
		return -ENOMEM;

	if (attrs[PORT_VSTATS_ATTR_OP])
		add = nla_get_u32(attrs[PORT_VSTATS_ATTR_OP]);

	port_vlan_stats_valid = true;
	write_seqlock_bh(&vstats_seqlock);
	vstats = nla_data(attrs[PORT_VSTATS_ATTR_ARRAY]);
	for (rem = 0, i = 0; rem < nla_len(attrs[PORT_VSTATS_ATTR_ARRAY]);
	     rem += sizeof(*vstats), i++) {
		/* update 512 entries at a time */
		if (i && (i % 512) == 0) {
			write_sequnlock_bh(&vstats_seqlock);
			cond_resched();
			write_seqlock_bh(&vstats_seqlock);
		}
		__port_vstats_update(&vstats[i], add);
	}
	write_sequnlock_bh(&vstats_seqlock);

	return 0;
}

static struct genl_ops port_ops[] = {
	{
		.cmd = PORT_CMD_REPLY,
		.policy = port_policy,
		.doit = port_reply,
	},
	{
		.cmd = PORT_CMD_SET_STATS,
		.policy = port_policy,
		.doit = port_set_stats,
	},
	{
		.cmd = PORT_CMD_SET_SETTINGS,
		.policy = port_policy,
		.doit = port_set_settings_push,
	},
	{
		.cmd = PORT_CMD_SET_CARRIER,
		.policy = port_policy,
		.doit = port_set_carrier,
	},
        {
                .cmd = PORT_CMD_SET_FEC,
                .policy = port_policy,
                .doit = port_set_fec_push,
        },
	{
		.cmd = PORT_CMD_GET_CACHED_SETTINGS,
		.policy = port_policy,
		.doit = port_cache_settings_pull,
	},
	{
		.cmd = PORT_CMD_GET_CACHED_FEC,
		.policy = port_policy,
		.doit = port_cache_fec_pull,
	},
	{
		.cmd = MC_CMD_REFRESH_AGE,
		.policy = port_policy,
		.doit = mc_refresh_age,
	},
	{
		.cmd = PORT_CMD_UPDATE_VSTATS,
		.policy = port_policy,
		.doit = port_vlan_update_stats,
	},
};

void port_init_ethtool_stats(struct net_device *dev)
{
	__port_cache_init(dev->ifindex);
}
EXPORT_SYMBOL_GPL(port_init_ethtool_stats);

void port_uninit_ethtool_stats(struct net_device *dev)
{
	__port_cache_uninit(dev->ifindex);
}
EXPORT_SYMBOL_GPL(port_uninit_ethtool_stats);

static int handle_remote_ops(struct net_device *dev,  u8 cmd, int size,
			     int wait,
			     int (*encode)(struct sk_buff *skb,
					   void *arg1,
					   void *arg2),
			     int (*decode)(struct nlattr **attrs,
					   void *arg1,
					   void *arg2),
			     void *arg1, void *arg2)
{
	int err = 0, count, scount = 0, seq;
	static atomic_t next_seq;

	/* these follow a simple model
	 * since they can call port_wait we will unlock rtnl_lock
	 * before we call the underlying function
	 * subsequently we will trylock and restart_syscall when
	 * we wake up and process returned data. Only works
	 * for the case where we think the cmd came from ethtool
	 */

	/* use unique seq for each request */
	seq = atomic_inc_return(&next_seq);
	dev_hold(dev);

	scount = port_send(dev, cmd, seq, size,
			   encode, decode/*XXX*/, arg1, arg2);
	if (scount < 0) {
		err = scount;
		goto err_out;
	}

	if (wait) {
		err = port_wait(seq, wait, decode, arg1, arg2);
		if (err < 0)
			goto err_out;
	}

	switch (cmd) {
	case PORT_CMD_GET_STRINGS:
		count = port_cache_get_sset_count(dev->ifindex, *(int *)arg1);
		port_cache_set_stat_strings(dev->ifindex, count, arg2);
		break;
	case PORT_CMD_GET_SSET_COUNT:
		err = scount;
		port_cache_set_sset_count(dev->ifindex, *(int *)arg1, scount);
		break;
	case PORT_CMD_SET_SETTINGS:
		port_cache_set_settings(dev->ifindex, arg1);
		break;
	case PORT_CMD_SET_FEC:
		port_cache_set_fec(dev->ifindex, arg1);
		break;
	case PORT_CMD_GET_MODULE_INFO:
	case PORT_CMD_GET_MODULE_EEPROM:
	case PORT_CMD_SET_PHYS_ID_STATE:
	case PORT_CMD_GET_PAUSE:
	case PORT_CMD_SET_PAUSE:
	case PORT_CMD_GET_FEC:
	default:
		/* nothing to see here */
		break;
	}

err_out:
	dev_put(dev);

	return err;
}

int port_get_link_settings(struct net_device *dev,
			   struct ethtool_link_ksettings *link_ksettings)
{
	int err;

	err = port_cache_get_settings(dev->ifindex, link_ksettings);
	/* If we dont have ethtool data from the driver, its better to return
	 * zero to the caller instead of returning an err. Because this
	 * is really not an error and sets can really go through at this
	 * point.
	 */
	if (err == -ENODATA)
		return 0;

	return err;
}
EXPORT_SYMBOL_GPL(port_get_link_settings);

int port_set_link_settings(struct net_device *dev,
			   const struct ethtool_link_ksettings *link_ksettings)
{
	int size = nla_total_size(sizeof(struct ethtool_link_ksettings));
	int wait = 20;

	int encode(struct sk_buff *skb, void *arg1, void *arg2)
	{
		return encode_struct(skb, PORT_ATTR_SETTINGS,
				     sizeof(struct ethtool_link_ksettings), arg1);
	}

	return handle_remote_ops(dev, PORT_CMD_SET_SETTINGS, size, wait,
				 encode, NULL, (struct ethtool_link_ksettings *)link_ksettings, NULL);
}
EXPORT_SYMBOL_GPL(port_set_link_settings);

void port_get_pauseparam(struct net_device *dev,
			 struct ethtool_pauseparam *pause)
{
	int size = 0, wait = 1;

	int decode(struct nlattr **attrs, void *arg1, void *arg2)
	{
		return decode_struct(attrs[PORT_ATTR_PAUSE],
				     sizeof(struct ethtool_pauseparam),
				     arg1);
	}

	handle_remote_ops(dev, PORT_CMD_GET_PAUSE, size, wait, NULL,
			  decode, pause, NULL);
}
EXPORT_SYMBOL_GPL(port_get_pauseparam);

int port_set_pauseparam(struct net_device *dev,
			struct ethtool_pauseparam *pause)
{
	int size = nla_total_size(sizeof(struct ethtool_pauseparam));
	int wait = 1;

	int encode(struct sk_buff *skb, void *arg1, void *arg2)
	{
		return encode_struct(skb, PORT_ATTR_PAUSE,
				     sizeof(struct ethtool_pauseparam),
				     arg1);
	}
/* XXX we want to ensure this handling in switchd is done right */
	return handle_remote_ops(dev, PORT_CMD_SET_PAUSE, size, wait,
				 encode, NULL, pause, NULL);
}
EXPORT_SYMBOL_GPL(port_set_pauseparam);

void port_get_ethtool_stats(struct net_device *dev,
			    struct ethtool_stats *stats,
			    u64 *data)
{
	memset(data, 0, stats->n_stats * sizeof(u64));
	port_cache_get_stats(dev->ifindex, stats, data, 0);
}
EXPORT_SYMBOL_GPL(port_get_ethtool_stats);

void port_get_ethtool_stats_clear(struct net_device *dev,
				  struct ethtool_stats *stats,
				  u64 *data)
{
	memset(data, 0, stats->n_stats * sizeof(u64));
	port_cache_get_stats(dev->ifindex, stats, data, 1);
}
EXPORT_SYMBOL_GPL(port_get_ethtool_stats_clear);

void port_get_strings(struct net_device *dev, u32 stringset, u8 *data)
{
	int size = nla_total_size(sizeof(u32)), wait = 30;
	int err, count;

	int encode(struct sk_buff *skb, void *arg1, void *arg2)
	{
		u32 *stringset = arg1;

		if (nla_put_u32(skb, PORT_ATTR_SSET, *stringset))
			return -EMSGSIZE;
		return 0;
	}

	int decode(struct nlattr **attrs, void *arg1, void *arg2)
	{
		u8 *data = arg2;
		struct nlattr *attr;
		int rem;

		if (!attrs[PORT_ATTR_STRINGS])
			return -EINVAL;
		nla_for_each_nested(attr, attrs[PORT_ATTR_STRINGS], rem) {
			memcpy(data, nla_data(attr), ETH_GSTRING_LEN);
			data += ETH_GSTRING_LEN;
		}
		return 0;
	}

	count = port_cache_get_sset_count(dev->ifindex, stringset);
	if (count <= 0)
		return;

	err = port_cache_get_stat_strings(dev->ifindex, count, data);
	if (!err)
		return;

	err = handle_remote_ops(dev, PORT_CMD_GET_STRINGS, size, wait,
				encode, decode, &stringset, data);
	if (err)
		return;
}
EXPORT_SYMBOL_GPL(port_get_strings);

int port_get_sset_count(struct net_device *dev, int sset)
{
	int size = nla_total_size(sizeof(u32)), wait = 30;
	int count;

	int encode(struct sk_buff *skb, void *arg1, void *arg2)
	{
		int *sset = arg1;

		if (nla_put_u32(skb, PORT_ATTR_SSET, *sset))
			return -EMSGSIZE;
		return 0;
	}

	int decode(struct nlattr **attrs, void *arg1, void *arg2)
	{
		if (!attrs[PORT_ATTR_SSET_COUNT])
			return -EINVAL;
		return nla_get_u32(attrs[PORT_ATTR_SSET_COUNT]);
	}

	count = port_cache_get_sset_count(dev->ifindex, sset);
	if (count >= 0)
		return count;

	handle_remote_ops(dev, PORT_CMD_GET_SSET_COUNT, size, wait,
			  encode, decode, &sset, NULL);
	return count;
}
EXPORT_SYMBOL_GPL(port_get_sset_count);

int port_set_phys_id(struct net_device *dev, enum ethtool_phys_id_state state)
{
	int size = nla_total_size(sizeof(u8));
	int wait = 0;

	int encode(struct sk_buff *skb, void *arg1, void *arg2)
	{
		enum ethtool_phys_id_state *state = arg1;

		if (nla_put_u8(skb, PORT_ATTR_PHYS_ID_STATE, *state))
			return -EMSGSIZE;
		return 0;
	}

	if (state == ETHTOOL_ID_ACTIVE)
		return 1;	/* 1 sec period */

	return handle_remote_ops(dev, PORT_CMD_SET_PHYS_ID_STATE, size, wait,
				 encode, NULL, &state, NULL);
}
EXPORT_SYMBOL_GPL(port_set_phys_id);

int port_get_module_info(struct net_device *dev, struct ethtool_modinfo *info)
{
	int size = 0, wait = 30;

	int decode(struct nlattr **attrs, void *arg1, void *arg2)
	{
		return decode_struct(attrs[PORT_ATTR_MODINFO],
				     sizeof(struct ethtool_modinfo), arg1);
	}

	return handle_remote_ops(dev, PORT_CMD_GET_MODULE_INFO, size, wait,
				 NULL, decode, info, NULL);
}
EXPORT_SYMBOL_GPL(port_get_module_info);

int port_get_module_eeprom(struct net_device *dev,
			   struct ethtool_eeprom *eeprom, u8 *data)
{
	int size = nla_total_size(sizeof(struct ethtool_eeprom));
	int wait = 30;

	int encode(struct sk_buff *skb, void *arg1, void *arg2)
	{
		return encode_struct(skb, PORT_ATTR_EEPROM,
				     sizeof(struct ethtool_eeprom),
				     arg1);
	}

	int decode(struct nlattr **attrs, void *arg1, void *arg2)
	{
		if (!attrs[PORT_ATTR_EEPROM_DATA])
			return -EINVAL;
		memcpy(arg2, nla_data(attrs[PORT_ATTR_EEPROM_DATA]),
		       nla_len(attrs[PORT_ATTR_EEPROM_DATA]));
		return 0;
	}

	return handle_remote_ops(dev, PORT_CMD_GET_MODULE_EEPROM, size, wait,
				 encode, decode, eeprom, data);
}
EXPORT_SYMBOL_GPL(port_get_module_eeprom);

int port_get_fecparam(struct net_device *dev,
		      struct ethtool_fecparam *fecparam)
{
	int err;

	err = port_cache_get_fec(dev->ifindex, fecparam);
	if (err == -ENODATA)
		return 0;

	return err;

}
EXPORT_SYMBOL_GPL(port_get_fecparam);

int port_set_fecparam(struct net_device *dev,
		      struct ethtool_fecparam *fecparam)
{
	int size = nla_total_size(sizeof(struct ethtool_fecparam));
	int wait = 20;

	int encode(struct sk_buff *skb, void *arg1, void *arg2)
	{
		return encode_struct(skb, PORT_ATTR_FEC,
				     sizeof(struct ethtool_fecparam),
				     arg1);
	}
	return handle_remote_ops(dev, PORT_CMD_SET_FEC, size, wait,
				 encode, NULL, fecparam, NULL);
}
EXPORT_SYMBOL_GPL(port_set_fecparam);

static struct genl_family port_family = {
	.name	= "port_family",
	.ops	= port_ops,
	.n_ops	= ARRAY_SIZE(port_ops),
	.mcgrps	= port_mcgrp,
	.n_mcgrps = ARRAY_SIZE(port_mcgrp),
	.version = 1,
	.maxattr = PORT_ATTR_MAX,
};

static int __init port_init(void)
{
	int err;

	hash_init(port_cache);
	port_vstats_init();

	err = genl_register_family(&port_family);
	if (err) {
		genl_unregister_family(&port_family);
		return err;
	}
	pr_debug("port family register OK\n");

	return 0;
}
late_initcall(port_init);
