Skip to content

Commit

Permalink
ospfd: Improve OSPF neighbor retransmission list granularity and prec…
Browse files Browse the repository at this point in the history
…ision

The current OSPF neighbor retransmission operates on a single per-neighbor
periodic timer that sends all LSAs on the list when it expires.
Additionally, since it skips the first retransmission of received LSAs so
that at least the retransmission interval (resulting in a delay of between
the retransmission interval and twice the interval. In environments where
the links are lossy on P2MP networks with "delay-reflood" configured (which
relies on neighbor retransmission in partial meshs), the implementation
is sub-optimal (to say the least).

This commit reimplements OSPF neighbor retransmission as follows:

   1. A new data structure making use the application managed
      linklist.c library functions implements an OSPF temporal
      list where each node includes a timestamp.
   2. The existing neighbor LS retransmission LSDB data structure
      is augmented with a pointer to the list node on the temporal
      list to faciliate O(1) removal when the LSA is acknowledged.
   3. The neighbor LS retransmission timer is set to the expiration
      timer of the LSA at the top of the list.
   4. When the timer expires, LSAs are retransmitted that within
      the window of the current time and a small dela (20 milli-secs).
      The LSAs that are retransmited are given an updated
      retransmissionn time and moved to the end of the list.
   5. Neighbor and interface LSA retransmission counters are added
      to provide insight into the lossiness of the links. However,
      these will increment quickly on non-fully meshed P2MP networks
      with "delay-reflood" configured.
   6. Added a topotest to exercise the implementation on a non-fully
      meshed P2MP network with "delay-reflood" configured. The
      alternative was to use existing mechanisms to instroduce loss
      but these seem less determistic in a topotest.

Signed-off-by: Acee Lindem <[email protected]>
  • Loading branch information
aceelindem committed Jun 1, 2024
1 parent db1e2a0 commit 829f4df
Show file tree
Hide file tree
Showing 18 changed files with 418 additions and 93 deletions.
137 changes: 114 additions & 23 deletions ospfd/ospf_flood.c
Original file line number Diff line number Diff line change
Expand Up @@ -1015,7 +1015,7 @@ void ospf_ls_request_delete(struct ospf_neighbor *nbr, struct ospf_lsa *lsa)
ospf_lsdb_delete(&nbr->ls_req, lsa);
}

/* Remove all LSA from neighbor's ls-requenst list. */
/* Remove all LSAs from neighbor's ls-request list. */
void ospf_ls_request_delete_all(struct ospf_neighbor *nbr)
{
ospf_lsa_unlock(&nbr->ls_req_last);
Expand Down Expand Up @@ -1061,59 +1061,105 @@ int ospf_ls_retransmit_isempty(struct ospf_neighbor *nbr)
/* Add LSA to be retransmitted to neighbor's ls-retransmit list. */
void ospf_ls_retransmit_add(struct ospf_neighbor *nbr, struct ospf_lsa *lsa)
{
struct ospf_lsa *old;
struct ospf_lsdb_linked_node *ls_rxmt_node;
struct ospf_temporal_listnode *ls_rxmt_listnode;
struct ospf_lsa *old = NULL;

old = ospf_ls_retransmit_lookup(nbr, lsa);
ls_rxmt_node = ospf_lsdb_linked_lookup(&nbr->ls_rxmt, lsa);
if (ls_rxmt_node)
old = ls_rxmt_node->info;

if (ospf_lsa_more_recent(old, lsa) < 0) {
if (old) {
old->retransmit_counter--;
list_delete_node(nbr->ls_rxmt_list,
&ls_rxmt_node->temporal_listnode
->listnode);
XFREE(MTYPE_OSPF_TEMPORAL_LIST,
ls_rxmt_node->temporal_listnode);
ospf_lsdb_delete(&nbr->ls_rxmt, old);
if (IS_DEBUG_OSPF(lsa, LSA_FLOODING))
zlog_debug("RXmtL(%lu)--, NBR(%pI4(%s)), LSA[%s]",
zlog_debug("RXmtL(%lu) NBR(%pI4(%s)) Old Delete LSA[%s] on Add",
ospf_ls_retransmit_count(nbr),
&nbr->router_id,
ospf_get_name(nbr->oi->ospf),
dump_lsa_key(old));
ospf_lsdb_delete(&nbr->ls_rxmt, old);
dump_lsa_key(lsa));
ospf_lsa_unlock(&old);
}
lsa->retransmit_counter++;
ls_rxmt_listnode =
XCALLOC(MTYPE_OSPF_TEMPORAL_LIST,
sizeof(struct ospf_temporal_listnode));
/*
* Set the LSA retransmission time for the neighbor;
*/
monotime(&ls_rxmt_listnode->listnode_time);
ls_rxmt_listnode->listnode_time.tv_sec += nbr->v_ls_rxmt;

/*
* Add the LSA to the neighbor retransmission list managing
* list node memory locally.
*/
ls_rxmt_listnode->listnode.data = ospf_lsa_lock(lsa);
listnode_add(nbr->ls_rxmt_list, ls_rxmt_listnode);
ospf_lsdb_add(&nbr->ls_rxmt, lsa);

/*
* We cannot make use of the newly introduced callback function
* "lsdb->new_lsa_hook" to replace debug output below, just
* because
* it seems no simple and smart way to pass neighbor information
* to
* the common function "ospf_lsdb_add()" -- endo.
* Look up the newly added node and set the list pointer.
*/
ls_rxmt_node = ospf_lsdb_linked_lookup(&nbr->ls_rxmt, lsa);
ls_rxmt_node->temporal_listnode = ls_rxmt_listnode;

if (IS_DEBUG_OSPF(lsa, LSA_FLOODING))
zlog_debug("RXmtL(%lu)++, NBR(%pI4(%s)), LSA[%s]",
zlog_debug("RXmtL(%lu) NBR(%pI4(%s)) Add LSA[%s] retrans at (%ld/%ld)",
ospf_ls_retransmit_count(nbr),
&nbr->router_id,
ospf_get_name(nbr->oi->ospf),
dump_lsa_key(lsa));
ospf_lsdb_add(&nbr->ls_rxmt, lsa);
&nbr->router_id, ospf_get_name(nbr->oi->ospf),
dump_lsa_key(lsa),
(long) ls_rxmt_listnode->listnode_time.tv_sec,
(long) ls_rxmt_listnode->listnode_time.tv_usec);
/*
* Assure the neighbor LSA retransmission timer is set.
*/
if (!nbr->t_ls_rxmt)
ospf_ls_retransmit_set_timer(nbr);
}
}

/* Remove LSA from neibghbor's ls-retransmit list. */
void ospf_ls_retransmit_delete(struct ospf_neighbor *nbr, struct ospf_lsa *lsa)
{
if (ospf_ls_retransmit_lookup(nbr, lsa)) {
struct ospf_lsdb_linked_node *ls_rxmt_node;

ls_rxmt_node = ospf_lsdb_linked_lookup(&nbr->ls_rxmt, lsa);

if (ls_rxmt_node) {
lsa->retransmit_counter--;
if (IS_DEBUG_OSPF(lsa, LSA_FLOODING)) /* -- endo. */
zlog_debug("RXmtL(%lu)--, NBR(%pI4(%s)), LSA[%s]",
list_delete_node(nbr->ls_rxmt_list,
&ls_rxmt_node->temporal_listnode->listnode);
XFREE(MTYPE_OSPF_TEMPORAL_LIST, ls_rxmt_node->temporal_listnode);
ospf_lsdb_delete(&nbr->ls_rxmt, lsa);
if (IS_DEBUG_OSPF(lsa, LSA_FLOODING))
zlog_debug("RXmtL(%lu) NBR(%pI4(%s)) Delete LSA[%s]",
ospf_ls_retransmit_count(nbr),
&nbr->router_id,
ospf_get_name(nbr->oi->ospf),
&nbr->router_id, ospf_get_name(nbr->oi->ospf),
dump_lsa_key(lsa));
ospf_lsdb_delete(&nbr->ls_rxmt, lsa);
ospf_lsa_unlock(&lsa);

/*
* If the LS retransmission list is empty, reset the
* neighbor's timer.
*/
if (ospf_ls_retransmit_isempty(nbr))
EVENT_OFF(nbr->t_ls_rxmt);
}
}

/* Clear neighbor's ls-retransmit list. */
void ospf_ls_retransmit_clear(struct ospf_neighbor *nbr)
{
struct listnode *node, *next_node;
struct ospf_lsdb *lsdb;
struct ospf_lsa *lsa;
int i;

lsdb = &nbr->ls_rxmt;
Expand All @@ -1128,10 +1174,55 @@ void ospf_ls_retransmit_clear(struct ospf_neighbor *nbr)
ospf_ls_retransmit_delete(nbr, lsa);
}

for (ALL_LIST_ELEMENTS(nbr->ls_rxmt_list, node, next_node, lsa)) {
list_delete_node(nbr->ls_rxmt_list, node);
XFREE(MTYPE_OSPF_TEMPORAL_LIST, node);
ospf_lsa_unlock(&lsa);
}

ospf_lsa_unlock(&nbr->ls_req_last);
nbr->ls_req_last = NULL;
}

/*
* Set the neighbor's ls-retransmit timer based on the next
* LSA retransmit time.
*/
void ospf_ls_retransmit_set_timer(struct ospf_neighbor *nbr)
{
struct ospf_temporal_listnode *ls_rxmt_listnode;

if (nbr->t_ls_rxmt)
EVENT_OFF(nbr->t_ls_rxmt);

ls_rxmt_listnode =
(struct ospf_temporal_listnode *)listhead(nbr->ls_rxmt_list);
if (ls_rxmt_listnode) {
struct timeval current_time, delay;
unsigned long delay_milliseconds;
struct ospf_lsa *lsa = ls_rxmt_listnode->listnode.data;

monotime(&current_time);
if (timercmp(&current_time, &ls_rxmt_listnode->listnode_time,
>=))
delay_milliseconds = 10;
else {
timersub(&ls_rxmt_listnode->listnode_time,
&current_time, &delay);
delay_milliseconds = (delay.tv_sec * 1000) +
(delay.tv_usec / 1000);
}

event_add_timer_msec(master, ospf_ls_rxmt_timer, nbr,
delay_milliseconds, &nbr->t_ls_rxmt);
if (IS_DEBUG_OSPF(lsa, LSA_FLOODING))
zlog_debug("RXmtL(%lu) NBR(%pI4(%s)) retrans timer set in %ld msecs - Head LSA(%s)",
ospf_ls_retransmit_count(nbr),
&nbr->router_id, ospf_get_name(nbr->oi->ospf),
delay_milliseconds, dump_lsa_key(lsa));
}
}

/* Lookup LSA from neighbor's ls-retransmit list. */
struct ospf_lsa *ospf_ls_retransmit_lookup(struct ospf_neighbor *nbr,
struct ospf_lsa *lsa)
Expand Down
16 changes: 15 additions & 1 deletion ospfd/ospf_flood.h
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,19 @@
#ifndef _ZEBRA_OSPF_FLOOD_H
#define _ZEBRA_OSPF_FLOOD_H

/*
* OSPF Temporal list - Used for Link State Retransmission List Nodes.
*/
struct ospf_temporal_listnode {
struct listnode listnode;

/*
* Time associated with this list node. For example, for a neigbhor
* link retransmission list, this is the retransmission time.
*/
struct timeval listnode_time;
};

extern int ospf_flood(struct ospf *, struct ospf_neighbor *, struct ospf_lsa *,
struct ospf_lsa *);
extern int ospf_flood_through(struct ospf *, struct ospf_neighbor *,
Expand All @@ -28,14 +41,15 @@ extern void ospf_ls_request_delete(struct ospf_neighbor *, struct ospf_lsa *);
extern void ospf_ls_request_delete_all(struct ospf_neighbor *);
extern struct ospf_lsa *ospf_ls_request_lookup(struct ospf_neighbor *,
struct ospf_lsa *);

extern unsigned long ospf_ls_retransmit_count(struct ospf_neighbor *);
extern unsigned long ospf_ls_retransmit_count_self(struct ospf_neighbor *, int);
extern int ospf_ls_retransmit_isempty(struct ospf_neighbor *);
extern void ospf_ls_retransmit_add(struct ospf_neighbor *, struct ospf_lsa *);
extern void ospf_ls_retransmit_delete(struct ospf_neighbor *,
struct ospf_lsa *);
extern void ospf_ls_retransmit_clear(struct ospf_neighbor *);
extern void ospf_ls_retransmit_set_timer(struct ospf_neighbor *nbr);

extern struct ospf_lsa *ospf_ls_retransmit_lookup(struct ospf_neighbor *,
struct ospf_lsa *);
extern void ospf_ls_retransmit_delete_nbr_area(struct ospf_area *,
Expand Down
1 change: 1 addition & 0 deletions ospfd/ospf_interface.h
Original file line number Diff line number Diff line change
Expand Up @@ -296,6 +296,7 @@ struct ospf_interface {
uint32_t ls_ack_out; /* LS Ack message output count. */
uint32_t discarded; /* discarded input count by error. */
uint32_t state_change; /* Number of status change. */
uint32_t ls_rxmt_lsa; /* Number of LSAs retransmitted. */

uint32_t full_nbrs;

Expand Down
53 changes: 53 additions & 0 deletions ospfd/ospf_lsdb.c
Original file line number Diff line number Diff line change
Expand Up @@ -34,6 +34,59 @@ void ospf_lsdb_init(struct ospf_lsdb *lsdb)
lsdb->type[i].db = route_table_init();
}

static struct route_node *
ospf_lsdb_linked_node_create(route_table_delegate_t *delegate,
struct route_table *table)
{
struct ospf_lsdb_linked_node *node;

node = XCALLOC(MTYPE_OSPF_LSDB_NODE,
sizeof(struct ospf_lsdb_linked_node));

return (struct route_node *)node;
}

static void ospf_lsdb_linked_node_destroy(route_table_delegate_t *delegate,
struct route_table *table,
struct route_node *node)
{
struct ospf_lsdb_linked_node *lsdb_linked_node =
(struct ospf_lsdb_linked_node *)node;

XFREE(MTYPE_OSPF_LSDB_NODE, lsdb_linked_node);
}

static route_table_delegate_t ospf_lsdb_linked_table_delegate = {
.create_node = ospf_lsdb_linked_node_create,
.destroy_node = ospf_lsdb_linked_node_destroy,
};

void ospf_lsdb_linked_init(struct ospf_lsdb *lsdb)
{
int i;

for (i = OSPF_MIN_LSA; i < OSPF_MAX_LSA; i++)
lsdb->type[i].db = route_table_init_with_delegate(
&ospf_lsdb_linked_table_delegate);
}

struct ospf_lsdb_linked_node *ospf_lsdb_linked_lookup(struct ospf_lsdb *lsdb,
struct ospf_lsa *lsa)
{
struct ospf_lsdb_linked_node *lsdb_linked_node;
struct route_table *table;
struct prefix_ls lp;

table = lsdb->type[lsa->data->type].db;
ls_prefix_set(&lp, lsa);
lsdb_linked_node = (struct ospf_lsdb_linked_node *)
route_node_lookup(table, (struct prefix *)&lp);
if (lsdb_linked_node)
route_unlock_node((struct route_node *)lsdb_linked_node);

return lsdb_linked_node;
}

void ospf_lsdb_free(struct ospf_lsdb *lsdb)
{
ospf_lsdb_cleanup(lsdb);
Expand Down
23 changes: 23 additions & 0 deletions ospfd/ospf_lsdb.h
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,9 @@
#ifndef _ZEBRA_OSPF_LSDB_H
#define _ZEBRA_OSPF_LSDB_H

#include "prefix.h"
#include "table.h"

/* OSPF LSDB structure. */
struct ospf_lsdb {
struct {
Expand Down Expand Up @@ -43,9 +46,29 @@ struct ospf_lsdb {
#define AREA_LSDB(A,T) ((A)->lsdb->type[(T)].db)
#define AS_LSDB(O,T) ((O)->lsdb->type[(T)].db)

/*
* Alternate route node structure for LSDB nodes linked to
* list elements.
*/
struct ospf_lsdb_linked_node {
/*
* Caution these must be the very first fields
*/
ROUTE_NODE_FIELDS

/*
* List node on a temporal list, e.g., a neighbor
* retransmission list.
*/
struct ospf_temporal_listnode *temporal_listnode;
};

/* OSPF LSDB related functions. */
extern struct ospf_lsdb *ospf_lsdb_new(void);
extern void ospf_lsdb_init(struct ospf_lsdb *);
extern void ospf_lsdb_linked_init(struct ospf_lsdb *lsdb);
extern struct ospf_lsdb_linked_node *
ospf_lsdb_linked_lookup(struct ospf_lsdb *lsdb, struct ospf_lsa *lsa);
extern void ospf_lsdb_free(struct ospf_lsdb *);
extern void ospf_lsdb_cleanup(struct ospf_lsdb *);
extern void ls_prefix_set(struct prefix_ls *lp, struct ospf_lsa *lsa);
Expand Down
2 changes: 2 additions & 0 deletions ospfd/ospf_memory.c
Original file line number Diff line number Diff line change
Expand Up @@ -45,3 +45,5 @@ DEFINE_MTYPE(OSPFD, OSPF_GR_HELPER, "OSPF Graceful Restart Helper");
DEFINE_MTYPE(OSPFD, OSPF_EXTERNAL_RT_AGGR, "OSPF External Route Summarisation");
DEFINE_MTYPE(OSPFD, OSPF_P_SPACE, "OSPF TI-LFA P-Space");
DEFINE_MTYPE(OSPFD, OSPF_Q_SPACE, "OSPF TI-LFA Q-Space");
DEFINE_MTYPE(OSPFD, OSPF_TEMPORAL_LIST, "OSPF Temporal List");
DEFINE_MTYPE(OSPFD, OSPF_LSDB_NODE, "OSPF LSDB Linked Node");
2 changes: 2 additions & 0 deletions ospfd/ospf_memory.h
Original file line number Diff line number Diff line change
Expand Up @@ -44,5 +44,7 @@ DECLARE_MTYPE(OSPF_GR_HELPER);
DECLARE_MTYPE(OSPF_EXTERNAL_RT_AGGR);
DECLARE_MTYPE(OSPF_P_SPACE);
DECLARE_MTYPE(OSPF_Q_SPACE);
DECLARE_MTYPE(OSPF_TEMPORAL_LIST);
DECLARE_MTYPE(OSPF_LSDB_NODE);

#endif /* _QUAGGA_OSPF_MEMORY_H */
9 changes: 6 additions & 3 deletions ospfd/ospf_neighbor.c
Original file line number Diff line number Diff line change
Expand Up @@ -68,7 +68,7 @@ struct ospf_neighbor *ospf_nbr_new(struct ospf_interface *oi)
nbr->v_inactivity = OSPF_IF_PARAM(oi, v_wait);
nbr->v_db_desc = OSPF_IF_PARAM(oi, retransmit_interval);
nbr->v_ls_req = OSPF_IF_PARAM(oi, retransmit_interval);
nbr->v_ls_upd = OSPF_IF_PARAM(oi, retransmit_interval);
nbr->v_ls_rxmt = OSPF_IF_PARAM(oi, retransmit_interval);
nbr->priority = -1;

/* DD flags. */
Expand All @@ -80,8 +80,10 @@ struct ospf_neighbor *ospf_nbr_new(struct ospf_interface *oi)
nbr->nbr_nbma = NULL;

ospf_lsdb_init(&nbr->db_sum);
ospf_lsdb_init(&nbr->ls_rxmt);
ospf_lsdb_linked_init(&nbr->ls_rxmt);
ospf_lsdb_init(&nbr->ls_req);
nbr->ls_rxmt_list = list_new();
listset_app_node_mem(nbr->ls_rxmt_list);

nbr->crypt_seqnum = 0;

Expand Down Expand Up @@ -109,6 +111,7 @@ void ospf_nbr_free(struct ospf_neighbor *nbr)
/* Free retransmit list. */
if (ospf_ls_retransmit_count(nbr))
ospf_ls_retransmit_clear(nbr);
list_delete(&nbr->ls_rxmt_list);

/* Cleanup LSDBs. */
ospf_lsdb_cleanup(&nbr->db_sum);
Expand All @@ -128,7 +131,7 @@ void ospf_nbr_free(struct ospf_neighbor *nbr)
EVENT_OFF(nbr->t_inactivity);
EVENT_OFF(nbr->t_db_desc);
EVENT_OFF(nbr->t_ls_req);
EVENT_OFF(nbr->t_ls_upd);
EVENT_OFF(nbr->t_ls_rxmt);

/* Cancel all events. */ /* Thread lookup cost would be negligible. */
event_cancel_event(master, nbr);
Expand Down
Loading

0 comments on commit 829f4df

Please sign in to comment.