Skip to content
This repository has been archived by the owner on Sep 7, 2020. It is now read-only.

Feature/collect link metrics #831

Merged
merged 14 commits into from
Mar 19, 2020
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 3 additions & 0 deletions agent/src/beerocks/slave/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,9 @@ set(MODULE_PATH ${CMAKE_CURRENT_LIST_DIR})

file(GLOB beerocks_agent_sources
${MODULE_PATH}/backhaul_manager/*.c*
# This code should be moved to BPL
# [TASK] Move link metric related classes to BPL #910
${MODULE_PATH}/link_metrics/*.c*
${MODULE_PATH}/platform_manager/*.c*
${MODULE_PATH}/*.c*
)
Expand Down
421 changes: 409 additions & 12 deletions agent/src/beerocks/slave/backhaul_manager/backhaul_manager_thread.cpp

Large diffs are not rendered by default.

Original file line number Diff line number Diff line change
Expand Up @@ -80,6 +80,8 @@ class backhaul_manager : public btl::transport_socket_thread {
bool handle_1905_topology_query(ieee1905_1::CmduMessageRx &cmdu_rx, const std::string &src_mac);
bool handle_1905_higher_layer_data_message(ieee1905_1::CmduMessageRx &cmdu_rx,
const std::string &src_mac);
bool handle_1905_link_metric_query(ieee1905_1::CmduMessageRx &cmdu_rx,
const std::string &src_mac);
bool handle_1905_combined_infrastructure_metrics(ieee1905_1::CmduMessageRx &cmdu_rx,
const std::string &src_mac);
bool handle_ap_capability_query(ieee1905_1::CmduMessageRx &cmdu_rx, const std::string &src_mac);
Expand Down Expand Up @@ -268,6 +270,23 @@ class backhaul_manager : public btl::transport_socket_thread {
*/
std::unordered_map<sMacAddr, sRadioInfo> m_radio_info_map;

/**
* @brief Gets the list of neighbor links from topology database.
*
* Neighbor links are pairs (interface, neighbor) where 'interface' is the name of the interface
* that connects to the neighbor device and 'neighbor' is the MAC address of the neighbor device.
*
* @param[in] neighbor_mac_filter Optional MAC address to filter the neighbor links to be
* returned. A value of network_utils::ZERO_MAC means no filter has to be applied. A specific
* MAC address means that only links to that device must be included.
* @param[in, out] neighbor_links_map Map containing lists of neighbors grouped by the interface
* that connects to them.
*
* @return True on success and false otherwise.
*/
bool get_neighbor_links(const sMacAddr &neighbor_mac_filter,
std::map<std::string, std::vector<sMacAddr>> &neighbor_links_map);

/*
* State Machines
*/
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,277 @@
/* SPDX-License-Identifier: BSD-2-Clause-Patent
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

It took me some time to understand that this is only for ethernet links. I guess I should know by heart that 802.3 is Ethernet, but consider changing the file name to eth_link_metrics_collector.cpp (nitpick)

*
* Copyright (c) 2020 MaxLinear
*
* This code is subject to the terms of the BSD+Patent license.
* See LICENSE file for more details.
*/

#include "ieee802_3_link_metrics_collector.h"

#include <bcl/network/network_utils.h>

#include <easylogging++.h>

#include <errno.h>
#include <linux/netlink.h>
#include <linux/rtnetlink.h>
#include <net/if.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <sys/ioctl.h>
#include <sys/socket.h>
#include <sys/types.h>
#include <unistd.h>

// SPEED values
#include <linux/ethtool.h>

#define IFLIST_REPLY_BUFFER 8192

namespace beerocks {

/**
* @brief Netlink request type.
*/
struct nl_req_t {
/**
* Netlink message
*/
struct nlmsghdr hdr;

/**
* "general form of address family dependent" message, i.e. how to tell which AF we are
* interested in. */
struct rtgenmsg gen;
};

/**
* @brief Gets link metrics of an Ethernet network interface.
*
* Gets link metrics for given Ethernet network interface from the specified Netlink message of
* type RTM_NEWLINK.
*
* @param[in] h Pointer to the Netlink message containing the data.
* @param[in] local_interface_name Name of the Ethernet network interface.
* @param[in, out] link_metris Link metrics structure with read values.
*
* @return True on success and false otherwise.
*/
static bool get_link_metrics(const struct nlmsghdr *h, const std::string &local_interface_name,
sLinkMetrics &link_metrics)
{
bool result = false;

struct ifinfomsg *iface = static_cast<ifinfomsg *>(NLMSG_DATA(h));

size_t length = 0;
if (h->nlmsg_len > NLMSG_LENGTH(sizeof(*iface))) {
length = h->nlmsg_len - NLMSG_LENGTH(sizeof(*iface));
}

/**
* Loop over all attributes of the RTM_NEWLINK message
*/
for (struct rtattr *attribute = IFLA_RTA(iface); RTA_OK(attribute, length);
attribute = RTA_NEXT(attribute, length)) {
switch (attribute->rta_type) {
case IFLA_IFNAME:
/**
* This message contains the stats for the interface we are interested in
*/
if (0 == std::strncmp(local_interface_name.c_str(), (char *)RTA_DATA(attribute),
local_interface_name.length() + 1)) {
result = true;
}
break;
case IFLA_STATS:
if (result) {
struct rtnl_link_stats *stats = (struct rtnl_link_stats *)RTA_DATA(attribute);

/**
* Get interface speed into PHY rate.
*/
uint32_t phy_rate_mbps = UINT32_MAX;
beerocks::net::network_utils::linux_iface_get_speed(local_interface_name,
phy_rate_mbps);

link_metrics.transmitter.packet_errors = stats->tx_errors;
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I believe we have to do this right...

All the stats fields are uint32_t and wrap. tx_errors probably won't wrap, but the number of rx/tx packets will wrap if the device stays up for a couple of days.

Therefore, we have to keep the previously measured value and subtract it (wrapping subtraction, I'm not entirely sure what the standard C++ way of doing that is).

While we're at it, we should probably also keep the timestamp. In 1905.1 the time of the measurement isn't specified (which renders it completely useless), but in R2 there is a timestamp included. And to make the metrics somewhat useful even in R1, we should probably divide by the sampling time so the numbers are at least comparable with each other.

So, I think it would be better to keep an object for the link metrics for each interface.

That also means you can keep the netlink fd open. Minor optimisation.

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Sorry but I cannot see the timestamp field you mentioned. Maybe I'm not looking at the right document. Can you please clarify which are those R1 and R2 docs?
I agree with you that if we cannot include the time measurement window along with the counter, then it is useless to provide the number of packets/errors during such measurement period.

I think however that, even if we could provide timestamp or elapsed time since last measure, we have no way to know if the number of packets we read with the netlink socket have wrapped (and how many times) since last call to get link metrics. Although such wrapping is quite improbable to go unnoticed, given the frequency with which calls to get link metrics will be probably issued, we cannot guarantee that wrapping won't happen. Keeping timestamps is useless if they are not provided by the kernel.

Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Sorry but I cannot see the timestamp field you mentioned.

I should have written "we should keep a timestamp".

we have no way to know if the number of packets we read with the netlink socket have wrapped (and how many times) since last call to get link metrics.

Stupid me again, I thought these were 64-bit numbers, but they're only 64-bit if you get them through netlink, not if you get them from IFLA_STATS64. Which apparently requires special handling because they're unaligned...

Which brings me to the question: why don't we use rtnl for this? E.g. https://gist.github.com/beejeebus/cc4fb07472cf5a0afd41 rtnl_link takes care of the 64-bit handling and fallback to 32-bit if the 64-bit stats are not available.

Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Just to be clear: since the code is there now anyway, I would merge it as is. But when later we refactor with #900, I think we should also look into using rtnl_link instead of doing all the parsing manually. However, before going into that, we should check if the libnl-tiny used in openwrt also has it.

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Read in https://wireless.wiki.kernel.org/en/developers/documentation/nl80211

libnl tiny
OpenWrt folks created a tiny version of libnl based on a git snapshot, which only contains genl, not rtnetlink or any of the netfilter stuff, and compiles down to less than 30k in binary size. You can find it here:
https://git.openwrt.org/?p=openwrt/openwrt.git;a=tree;f=package/libs/libnl-tiny;hb=HEAD

link_metrics.transmitter.transmitted_packets = stats->tx_packets;
/**
* Note: The MAC throughput capacity is a function of the physical data rate and
* of the MAC overhead. We could somehow compute such overhead or, for simplicity,
* set the MAC throughput as a percentage of the physical data rate.
* For Ethernet, we can estimate the overhead: 7 bytes preamble, 1 byte SFD, 14
* bytes header, 4 bytes CRC and 12 bytes of interpacket gap on a 1500 byte
* payload. So 1500/1538.
* (see https://en.wikipedia.org/wiki/Ethernet_frame)
*/
const float layer2_payload_size = 1500;
const float layer1_total_size = 1538;
link_metrics.transmitter.mac_throughput_capacity_mbps =
phy_rate_mbps * (layer2_payload_size / layer1_total_size);
// Note: For simplicity, link availability is set to "100% of the time"
link_metrics.transmitter.link_availability = 100;
link_metrics.transmitter.phy_rate_mbps = phy_rate_mbps;

link_metrics.receiver.packet_errors = stats->rx_errors;
link_metrics.receiver.packets_received = stats->rx_packets;
link_metrics.receiver.rssi = UINT8_MAX;
}
break;
}
}

return result;
}

/**
* @brief Gets link metrics of an Ethernet network interface.
*
* Gets link metrics for given Ethernet network interface by sending a RTM_GETLINK Netlink request
* through the specified Netlink socket and parsing received response.
*
* @param[in] fd File descriptor of a connected Netlink socket.
* @param[in] local_interface_name Name of the Ethernet network interface.
* @param[in, out] link_metris Link metrics structure with read values.
*
* @return True on success and false otherwise.
*/
static bool get_link_metrics(int fd, const std::string &local_interface_name,
sLinkMetrics &link_metrics)
{
bool result = false;

struct sockaddr_nl socket; /* the remote (kernel space) side of the communication */

struct msghdr rtnl_msg {
}; /* generic msghdr struct for use with sendmsg */
struct iovec io {
}; /* IO vector for sendmsg */
struct nl_req_t req {
}; /* structure that describes the Netlink packet itself */

/**
* Netlink socket is ready for use, prepare and send request
*/
socket.nl_family = AF_NETLINK; /* fill-in kernel address (destination of our message) */

req.hdr.nlmsg_len = NLMSG_LENGTH(sizeof(struct rtgenmsg));
req.hdr.nlmsg_type = RTM_GETLINK;
req.hdr.nlmsg_flags = NLM_F_REQUEST | NLM_F_DUMP;
req.hdr.nlmsg_seq = 1;
req.hdr.nlmsg_pid = 0;
req.gen.rtgen_family = AF_PACKET; /* no preferred AF, we will get *all* interfaces */

io.iov_base = &req;
io.iov_len = req.hdr.nlmsg_len;
rtnl_msg.msg_iov = &io;
rtnl_msg.msg_iovlen = 1;
rtnl_msg.msg_name = &socket;
rtnl_msg.msg_namelen = sizeof(socket);

if (sendmsg(fd, (struct msghdr *)&rtnl_msg, 0) < 0) {
LOG(ERROR) << "Unable to send message through Netlink socket: " << strerror(errno);
} else {
int msg_done = 0; /* flag to end loop parsing */

/**
* Parse reply until message is done
*/
while (!msg_done) {
int length;
struct nlmsghdr *msg_ptr; /* pointer to current message part */

struct msghdr rtnl_reply {
}; /* generic msghdr structure for use with recvmsg */

/* a large buffer to receive lots of link information */
char reply[IFLIST_REPLY_BUFFER];

io.iov_base = reply;
io.iov_len = IFLIST_REPLY_BUFFER;
rtnl_reply.msg_iov = &io;
rtnl_reply.msg_iovlen = 1;
rtnl_reply.msg_name = &socket;
rtnl_reply.msg_namelen = sizeof(socket);

/**
* Read as much data as fits in the receive buffer
*/
if ((length = recvmsg(fd, &rtnl_reply, 0)) != 0) {
for (msg_ptr = (struct nlmsghdr *)reply; NLMSG_OK(msg_ptr, length);
msg_ptr = NLMSG_NEXT(msg_ptr, length)) {
switch (msg_ptr->nlmsg_type) {
case NLMSG_DONE:
/**
* This is the special meaning NLMSG_DONE message we asked for by using NLM_F_DUMP flag
*/
msg_done = 1;
break;
case RTM_NEWLINK:
/**
* This is a RTM_NEWLINK message, which contains lots of information about a link
*/
if (get_link_metrics(msg_ptr, local_interface_name, link_metrics)) {
msg_done = 1;
result = true;
}
break;
}
}
}
}
}

return result;
}

/**
* @brief Gets link metrics of an Ethernet network interface.
*
* Gets link metrics for given Ethernet network by means of a Netlink socket using NETLINK_ROUTE
* protocol.
*
* @param[in] local_interface_name Name of the Ethernet network interface.
* @param[in, out] link_metris Link metrics structure with read values.
*
* @return True on success and false otherwise.
*/
static bool get_link_metrics(const std::string &local_interface_name, sLinkMetrics &link_metrics)
{
bool result = false;
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Not needed, just return true of false where needed.

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Single vs. Multiple exit point, that's the question.
Have you agreed on which one to use or is it up to the programmer? I haven't seen anything about this topic in CODINGSTYLE.md

Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think in general we favour multiple exit points, it keeps the code indentation shorter and is what we have in most of prplmesh. I will update the CODYINGSTULE.md if agreed - @fvbogaert you have a chance for another poll :)

Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

We use multiple exit points everywhere. Because normally, all cleanup is automatic since we use smart pointers etc.


/**
* Create Netlink socket for kernel/user-space communication.
* No need to call bind() as packets are sent only between the kernel and the originating
* process (no multicasting).
*/
int fd = socket(AF_NETLINK, SOCK_RAW, NETLINK_ROUTE);
if (fd < 0) {
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Get rid of unnecessary indentation:

Suggested change
if (fd < 0) {
if (fd < 0) {
LOG(ERROR) << "Failed creating Netlink socket: " << strerror(errno);
return false;
}

LOG(ERROR) << "Failed creating Netlink socket: " << strerror(errno);
} else {
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Suggested change
} else {
/**
* Get link metrics using Netlink socket
*/
result = get_link_metrics(fd, local_interface_name, link_metrics);
/**
* Clean up and finish properly
*/
close(fd);
return true;

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

result from get_link_metrics(fd, ...) is being ignored with your proposal

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

If we opt for multiple exit point we must call close twice.

Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

If we're going for single exit point then we should go all the way and do it like in the kernel, with goto. @fvbogaert should we use goto? Another poll..

Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yes, that's my preferred pattern if there is any cleanup to do. However, we really should use automatic cleanup everywhere. So the better approach would be to wrap the file descriptor so it gets closed automatically by the destructor.

/**
* Get link metrics using Netlink socket
*/
result = get_link_metrics(fd, local_interface_name, link_metrics);

/**
* Clean up and finish properly
*/
close(fd);
}

return result;
}

ieee802_3_link_metrics_collector::~ieee802_3_link_metrics_collector() {}

bool ieee802_3_link_metrics_collector::get_link_metrics(
const std::string &local_interface_name,
[[gnu::unused]] const sMacAddr &neighbor_interface_address, sLinkMetrics &link_metrics)
{
return beerocks::get_link_metrics(local_interface_name, link_metrics);
}

} // namespace beerocks
Original file line number Diff line number Diff line change
@@ -0,0 +1,47 @@
/* SPDX-License-Identifier: BSD-2-Clause-Patent
*
* Copyright (c) 2020 MaxLinear
*
* This code is subject to the terms of the BSD+Patent license.
* See LICENSE file for more details.
*/
#ifndef __IEEE802_3_LINK_METRICS_COLLECTOR_H__
#define __IEEE802_3_LINK_METRICS_COLLECTOR_H__

#include "link_metrics.h"

namespace beerocks {

class ieee802_3_link_metrics_collector : public link_metrics_collector {

public:
virtual ~ieee802_3_link_metrics_collector();

/**
* @brief Gets link metrics information.
*
* Gets link metrics associated to the link between given local interface and a neighbor's
* interface whose MAC address is 'neighbor_interface_address'.
*
* Note that metrics are associated to a link and not to an interface. For Ethernet interfaces
* and in Linux though, it is not possible to obtain per link stats: in Linux is easy to check
* how many packets were received by "eth0" *in total*, but it is not trivial to find out how
* many packets were received by "eth0" *from each neighbor*. For the sake of simplicity this
* implementation just reports the overall per-interface stats (thus ignoring the
* 'neighbor_interface_address' parameter).
*
* @param[in] local_interface_name Name of the local interface.
* @param[in] neighbor_interface_address MAC address at the other end of the link (this MAC
* address belongs to a neighbor's interface.
* @param[out] link_metrics Link metrics information.
*
* @return True on success and false otherwise.
*/
virtual bool get_link_metrics(const std::string &local_interface_name,
const sMacAddr &neighbor_interface_address,
sLinkMetrics &link_metrics) override;
};

} // namespace beerocks

#endif
Loading