Implement https://www.rfc-editor.org/rfc/rfc8654.txt
```
> | jq '."192.168.10.25".neighborCapabilities.extendedMessage'
"advertisedAndReceived"
```
Another side is Bird:
```
BIRD 2.0.7 ready.
Name Proto Table State Since Info
v4 BGP --- up 19:39:15.689 Established
BGP state: Established
Neighbor address: 192.168.10.123
Neighbor AS: 65534
Local AS: 65025
Neighbor ID: 192.168.100.1
Local capabilities
Multiprotocol
AF announced: ipv4
Route refresh
Extended message
Graceful restart
4-octet AS numbers
Enhanced refresh
Long-lived graceful restart
Neighbor capabilities
Multiprotocol
AF announced: ipv4
Route refresh
Extended message
Graceful restart
4-octet AS numbers
ADD-PATH
RX: ipv4
TX:
Enhanced refresh
Session: external AS4
Source address: 192.168.10.25
Hold timer: 140.139/180
Keepalive timer: 9.484/60
Channel ipv4
State: UP
Table: master4
Preference: 100
Input filter: ACCEPT
Output filter: ACCEPT
Routes: 9 imported, 3 exported, 8 preferred
Route change stats: received rejected filtered ignored accepted
Import updates: 9 0 0 0 9
Import withdraws: 2 0 --- 2 0
Export updates: 11 8 0 --- 3
Export withdraws: 0 --- --- --- 0
BGP Next hop: 192.168.10.25
```
Tested at least as well with to make sure it works with backward compat.:
ExaBGP 4.0.2-
1c737d99.
Arista vEOS 4.21.14M
Testing by injecint 10k routes with:
```
sharp install routes 172.16.0.1 nexthop 192.168.10.123 10000
```
Before extended message support:
```
2021/03/01 07:18:51 BGP: u1:s1 send UPDATE len 4096 (max message len: 4096) numpfx 809
2021/03/01 07:18:51 BGP: u1:s1 send UPDATE len 4096 (max message len: 4096) numpfx 809
2021/03/01 07:18:51 BGP: u1:s1 send UPDATE len 4096 (max message len: 4096) numpfx 809
2021/03/01 07:18:51 BGP: u1:s1 send UPDATE len 4096 (max message len: 4096) numpfx 809
2021/03/01 07:18:51 BGP: u1:s1 send UPDATE len 4096 (max message len: 4096) numpfx 809
2021/03/01 07:18:51 BGP: u1:s1 send UPDATE len 4096 (max message len: 4096) numpfx 809
2021/03/01 07:18:52 BGP: u1:s1 send UPDATE len 4096 (max message len: 4096) numpfx 809
2021/03/01 07:18:52 BGP: u1:s1 send UPDATE len 4096 (max message len: 4096) numpfx 809
2021/03/01 07:18:52 BGP: u1:s1 send UPDATE len 4096 (max message len: 4096) numpfx 809
2021/03/01 07:18:52 BGP: u1:s1 send UPDATE len 4096 (max message len: 4096) numpfx 809
2021/03/01 07:18:52 BGP: u1:s1 send UPDATE len 4096 (max message len: 4096) numpfx 809
2021/03/01 07:18:52 BGP: u1:s1 send UPDATE len 2186 (max message len: 4096) numpfx 427
2021/03/01 07:18:53 BGP: u1:s1 send UPDATE len 3421 (max message len: 4096) numpfx 674
```
After extended message support:
```
2021/03/01 07:20:11 BGP: u1:s1 send UPDATE len 50051 (max message len: 65535) numpfx 10000
```
Signed-off-by: Donatas Abraitis <donatas.abraitis@gmail.com>
* a stack buffer, since they perform bounds checking
* and we are working with untrusted data.
*/
- unsigned char ndata[BGP_MAX_PACKET_SIZE];
+ unsigned char ndata[peer->max_packet_size];
memset(ndata, 0x00, sizeof(ndata));
size_t lfl =
CHECK_FLAG(flag, BGP_ATTR_FLAG_EXTLEN) ? 2 : 1;
pktsize = ntohs(pktsize);
/* if this fails we are seriously screwed */
- assert(pktsize <= BGP_MAX_PACKET_SIZE);
+ assert(pktsize <= peer->max_packet_size);
/*
* If we have that much data, chuck it into its own
/* wipe buffer just in case someone screwed up */
ringbuf_wipe(peer->ibuf_work);
} else {
- assert(ringbuf_space(peer->ibuf_work) >= BGP_MAX_PACKET_SIZE);
+ assert(ringbuf_space(peer->ibuf_work) >= peer->max_packet_size);
thread_add_read(fpt->master, bgp_process_reads, peer, peer->fd,
&peer->t_read);
size_t readsize; // how many bytes we want to read
ssize_t nbytes; // how many bytes we actually read
uint16_t status = 0;
- static uint8_t ibw[BGP_MAX_PACKET_SIZE * BGP_READ_PACKET_MAX];
+ uint8_t ibw[peer->max_packet_size * BGP_READ_PACKET_MAX];
readsize = MIN(ringbuf_space(peer->ibuf_work), sizeof(ibw));
nbytes = read(peer->fd, ibw, readsize);
}
/* Minimum packet length check. */
- if ((size < BGP_HEADER_SIZE) || (size > BGP_MAX_PACKET_SIZE)
+ if ((size < BGP_HEADER_SIZE) || (size > peer->max_packet_size)
|| (type == BGP_MSG_OPEN && size < BGP_MSG_OPEN_MIN_SIZE)
|| (type == BGP_MSG_UPDATE && size < BGP_MSG_UPDATE_MIN_SIZE)
|| (type == BGP_MSG_NOTIFY && size < BGP_MSG_NOTIFY_MIN_SIZE)
return as4;
}
+static int bgp_capability_ext_message(struct peer *peer,
+ struct capability_header *hdr)
+{
+ if (hdr->length != CAPABILITY_CODE_EXT_MESSAGE_LEN) {
+ flog_err(
+ EC_BGP_PKT_OPEN,
+ "%s: BGP Extended Message capability has incorrect data length %d",
+ peer->host, hdr->length);
+ return -1;
+ }
+
+ SET_FLAG(peer->cap, PEER_CAP_EXTENDED_MESSAGE_RCV);
+
+ return 0;
+}
+
static int bgp_capability_addpath(struct peer *peer,
struct capability_header *hdr)
{
{CAPABILITY_CODE_ORF_OLD, "ORF (Old)"},
{CAPABILITY_CODE_FQDN, "FQDN"},
{CAPABILITY_CODE_ENHANCED_RR, "Enhanced Route Refresh"},
+ {CAPABILITY_CODE_EXT_MESSAGE, "BGP Extended Message"},
{0}};
/* Minimum sizes for length field of each cap (so not inc. the header) */
[CAPABILITY_CODE_ORF_OLD] = CAPABILITY_CODE_ORF_LEN,
[CAPABILITY_CODE_FQDN] = CAPABILITY_CODE_MIN_FQDN_LEN,
[CAPABILITY_CODE_ENHANCED_RR] = CAPABILITY_CODE_ENHANCED_LEN,
+ [CAPABILITY_CODE_EXT_MESSAGE] = CAPABILITY_CODE_EXT_MESSAGE_LEN,
};
/* value the capability must be a multiple of.
[CAPABILITY_CODE_ORF_OLD] = 1,
[CAPABILITY_CODE_FQDN] = 1,
[CAPABILITY_CODE_ENHANCED_RR] = 1,
+ [CAPABILITY_CODE_EXT_MESSAGE] = 1,
};
/**
case CAPABILITY_CODE_ENHE:
case CAPABILITY_CODE_FQDN:
case CAPABILITY_CODE_ENHANCED_RR:
+ case CAPABILITY_CODE_EXT_MESSAGE:
/* Check length. */
if (caphdr.length < cap_minsizes[caphdr.code]) {
zlog_info(
case CAPABILITY_CODE_ENHE:
ret = bgp_capability_enhe(peer, &caphdr);
break;
+ case CAPABILITY_CODE_EXT_MESSAGE:
+ ret = bgp_capability_ext_message(peer, &caphdr);
+ break;
case CAPABILITY_CODE_FQDN:
ret = bgp_capability_hostname(peer, &caphdr);
break;
}
}
+ /* Extended Message Support */
+ peer->max_packet_size =
+ CHECK_FLAG(peer->cap, PEER_CAP_EXTENDED_MESSAGE_RCV)
+ ? BGP_MAX_EXTENDED_MESSAGE_PACKET_SIZE
+ : BGP_MAX_PACKET_SIZE;
+
/* Check there are no common AFI/SAFIs and send Unsupported Capability
error. */
if (*mp_capability
local_as = peer->local_as;
stream_putl(s, local_as);
+ /* Extended Message Support */
+ SET_FLAG(peer->cap, PEER_CAP_EXTENDED_MESSAGE_ADV);
+ stream_putc(s, BGP_OPEN_OPT_CAP);
+ stream_putc(s, CAPABILITY_CODE_EXT_MESSAGE_LEN + 2);
+ stream_putc(s, CAPABILITY_CODE_EXT_MESSAGE);
+ stream_putc(s, CAPABILITY_CODE_EXT_MESSAGE_LEN);
+
/* AddPath */
FOREACH_AFI_SAFI (afi, safi) {
if (peer->afc[afi][safi]) {
#define CAPABILITY_CODE_ENHE 5 /* Extended Next Hop Encoding */
#define CAPABILITY_CODE_REFRESH_OLD 128 /* Route Refresh Capability(cisco) */
#define CAPABILITY_CODE_ORF_OLD 130 /* Cooperative Route Filtering Capability(cisco) */
+#define CAPABILITY_CODE_EXT_MESSAGE 6 /* Extended Message Support */
/* Capability Length */
#define CAPABILITY_CODE_MP_LEN 4
#define CAPABILITY_CODE_MIN_FQDN_LEN 2
#define CAPABILITY_CODE_ENHANCED_LEN 0
#define CAPABILITY_CODE_ORF_LEN 5
+#define CAPABILITY_CODE_EXT_MESSAGE_LEN 0 /* Extended Message Support */
/* Cooperative Route Filtering Capability. */
zlog_debug("send End-of-RIB for %s to %s",
get_afi_safi_str(afi, safi, false), peer->host);
- s = stream_new(BGP_MAX_PACKET_SIZE);
+ s = stream_new(peer->max_packet_size);
/* Make BGP update packet. */
bgp_packet_set_marker(s, BGP_MSG_UPDATE);
/* ============================================== */
/* Allocate new stream. */
- s = stream_new(BGP_MAX_PACKET_SIZE);
+ s = stream_new(peer->max_packet_size);
/* Make notify packet. */
bgp_packet_set_marker(s, BGP_MSG_NOTIFY);
/* Convert AFI, SAFI to values for packet. */
bgp_map_afi_safi_int2iana(afi, safi, &pkt_afi, &pkt_safi);
- s = stream_new(BGP_MAX_PACKET_SIZE);
+ s = stream_new(peer->max_packet_size);
/* Make BGP update packet. */
if (CHECK_FLAG(peer->cap, PEER_CAP_REFRESH_NEW_RCV))
/* Convert AFI, SAFI to values for packet. */
bgp_map_afi_safi_int2iana(afi, safi, &pkt_afi, &pkt_safi);
- s = stream_new(BGP_MAX_PACKET_SIZE);
+ s = stream_new(peer->max_packet_size);
/* Make BGP update packet. */
bgp_packet_set_marker(s, BGP_MSG_CAPABILITY);
subgrp->uptime = bgp_clock();
}
-static void sync_init(struct update_subgroup *subgrp)
+static void sync_init(struct update_subgroup *subgrp,
+ struct update_group *updgrp)
{
+ struct peer *peer = UPDGRP_PEER(updgrp);
+
subgrp->sync =
XCALLOC(MTYPE_BGP_SYNCHRONISE, sizeof(struct bgp_synchronize));
bgp_adv_fifo_init(&subgrp->sync->update);
/* We use a larger buffer for subgrp->work in the event that:
* - We RX a BGP_UPDATE where the attributes alone are just
- * under BGP_MAX_PACKET_SIZE
+ * under 4096 or 65535 (if Extended Message capability negotiated).
* - The user configures an outbound route-map that does many as-path
* prepends or adds many communities. At most they can have
* CMD_ARGC_MAX
* bounds
* checking for every single attribute as we construct an UPDATE.
*/
- subgrp->work =
- stream_new(BGP_MAX_PACKET_SIZE + BGP_MAX_PACKET_SIZE_OVERFLOW);
- subgrp->scratch = stream_new(BGP_MAX_PACKET_SIZE);
+ subgrp->work = stream_new(peer->max_packet_size
+ + BGP_MAX_PACKET_SIZE_OVERFLOW);
+ subgrp->scratch = stream_new(peer->max_packet_size);
}
static void sync_delete(struct update_subgroup *subgrp)
dst->flags = src->flags;
dst->af_flags[afi][safi] = src->af_flags[afi][safi];
dst->pmax_out[afi][safi] = src->pmax_out[afi][safi];
+ dst->max_packet_size = src->max_packet_size;
XFREE(MTYPE_BGP_PEER_HOST, dst->host);
dst->host = XSTRDUP(MTYPE_BGP_PEER_HOST, src->host);
subgrp = XCALLOC(MTYPE_BGP_UPD_SUBGRP, sizeof(struct update_subgroup));
update_subgroup_checkin(subgrp, updgrp);
subgrp->v_coalesce = (UPDGRP_INST(updgrp))->coalesce_time;
- sync_init(subgrp);
+ sync_init(subgrp, updgrp);
bpacket_queue_init(SUBGRP_PKTQ(subgrp));
bpacket_queue_add(SUBGRP_PKTQ(subgrp), NULL, NULL);
TAILQ_INIT(&(subgrp->adjq));
packet = stream_dup(s);
bgp_packet_set_size(packet);
if (bgp_debug_update(NULL, NULL, subgrp->update_group, 0))
- zlog_debug("u%" PRIu64 ":s%" PRIu64" send UPDATE len %zd numpfx %d",
- subgrp->update_group->id, subgrp->id,
- (stream_get_endp(packet)
- - stream_get_getp(packet)),
- num_pfx);
+ zlog_debug(
+ "u%" PRIu64 ":s%" PRIu64
+ " send UPDATE len %zd (max message len: %hu) numpfx %d",
+ subgrp->update_group->id, subgrp->id,
+ (stream_get_endp(packet)
+ - stream_get_getp(packet)),
+ peer->max_packet_size, num_pfx);
pkt = bpacket_queue_add(SUBGRP_PKTQ(subgrp), packet, &vecarr);
stream_reset(s);
stream_reset(snlri);
tx_id_buf, attrstr);
}
- s = stream_new(BGP_MAX_PACKET_SIZE);
+ s = stream_new(peer->max_packet_size);
/* Make BGP update packet. */
bgp_packet_set_marker(s, BGP_MSG_UPDATE);
tx_id_buf);
}
- s = stream_new(BGP_MAX_PACKET_SIZE);
+ s = stream_new(peer->max_packet_size);
/* Make BGP update packet. */
bgp_packet_set_marker(s, BGP_MSG_UPDATE);
"received");
}
+ /* Extended Message Support */
+ if (CHECK_FLAG(p->cap,
+ PEER_CAP_EXTENDED_MESSAGE_ADV)
+ && CHECK_FLAG(
+ p->cap,
+ PEER_CAP_EXTENDED_MESSAGE_RCV))
+ json_object_string_add(
+ json_cap, "extendedMessage",
+ "advertisedAndReceived");
+ else if (CHECK_FLAG(
+ p->cap,
+ PEER_CAP_EXTENDED_MESSAGE_ADV))
+ json_object_string_add(
+ json_cap, "extendedMessage",
+ "advertised");
+ else if (CHECK_FLAG(
+ p->cap,
+ PEER_CAP_EXTENDED_MESSAGE_RCV))
+ json_object_string_add(
+ json_cap, "extendedMessage",
+ "received");
+
/* AddPath */
if (CHECK_FLAG(p->cap, PEER_CAP_ADDPATH_RCV)
|| CHECK_FLAG(p->cap,
vty_out(vty, "\n");
}
+ /* Extended Message Support */
+ if (CHECK_FLAG(p->cap,
+ PEER_CAP_EXTENDED_MESSAGE_RCV)
+ || CHECK_FLAG(
+ p->cap,
+ PEER_CAP_EXTENDED_MESSAGE_ADV)) {
+ vty_out(vty, " Extended Message:");
+ if (CHECK_FLAG(
+ p->cap,
+ PEER_CAP_EXTENDED_MESSAGE_ADV))
+ vty_out(vty, " advertised");
+ if (CHECK_FLAG(
+ p->cap,
+ PEER_CAP_EXTENDED_MESSAGE_RCV))
+ vty_out(vty, " %sreceived",
+ CHECK_FLAG(
+ p->cap,
+ PEER_CAP_EXTENDED_MESSAGE_ADV)
+ ? "and "
+ : "");
+ vty_out(vty, "\n");
+ }
+
/* AddPath */
if (CHECK_FLAG(p->cap, PEER_CAP_ADDPATH_RCV)
|| CHECK_FLAG(p->cap,
peer->bgp = bgp_lock(bgp);
peer = peer_lock(peer); /* initial reference */
peer->password = NULL;
+ peer->max_packet_size = BGP_MAX_EXTENDED_MESSAGE_PACKET_SIZE;
/* Set default flags. */
FOREACH_AFI_SAFI (afi, safi) {
/* We use a larger buffer for peer->obuf_work in the event that:
* - We RX a BGP_UPDATE where the attributes alone are just
- * under BGP_MAX_PACKET_SIZE
+ * under BGP_MAX_EXTENDED_MESSAGE_PACKET_SIZE.
* - The user configures an outbound route-map that does many as-path
* prepends or adds many communities. At most they can have
* CMD_ARGC_MAX args in a route-map so there is a finite limit on how
* bounds checking for every single attribute as we construct an
* UPDATE.
*/
- peer->obuf_work =
- stream_new(BGP_MAX_PACKET_SIZE + BGP_MAX_PACKET_SIZE_OVERFLOW);
- peer->ibuf_work =
- ringbuf_new(BGP_MAX_PACKET_SIZE * BGP_READ_PACKET_MAX);
+ peer->obuf_work = stream_new(BGP_MAX_EXTENDED_MESSAGE_PACKET_SIZE
+ + BGP_MAX_PACKET_SIZE_OVERFLOW);
+ peer->ibuf_work = ringbuf_new(BGP_MAX_EXTENDED_MESSAGE_PACKET_SIZE
+ * BGP_READ_PACKET_MAX);
- peer->scratch = stream_new(BGP_MAX_PACKET_SIZE);
+ peer->scratch = stream_new(BGP_MAX_EXTENDED_MESSAGE_PACKET_SIZE);
bgp_sync_init(peer);
#define BGP_MARKER_SIZE 16
#define BGP_HEADER_SIZE 19
#define BGP_MAX_PACKET_SIZE 4096
+#define BGP_MAX_EXTENDED_MESSAGE_PACKET_SIZE 65535
#define BGP_MAX_PACKET_SIZE_OVERFLOW 1024
/*
#define PEER_CAP_HOSTNAME_RCV (1U << 16) /* hostname received */
#define PEER_CAP_ENHANCED_RR_ADV (1U << 17) /* enhanced rr advertised */
#define PEER_CAP_ENHANCED_RR_RCV (1U << 18) /* enhanced rr received */
+#define PEER_CAP_EXTENDED_MESSAGE_ADV (1U << 19)
+#define PEER_CAP_EXTENDED_MESSAGE_RCV (1U << 20)
/* Capability flags (reset in bgp_stop) */
uint32_t af_cap[AFI_MAX][SAFI_MAX];
/* Sender side AS path loop detection. */
bool as_path_loop_detection;
+ /* Extended Message Support */
+ uint16_t max_packet_size;
+
/* Conditional advertisement */
bool advmap_config_change[AFI_MAX][SAFI_MAX];
bool advmap_table_change;