From 964d1136e8fec5c023cefb18bf73346a2dafaaa8 Mon Sep 17 00:00:00 2001 From: Guillaume Rose Date: Thu, 30 Sep 2021 11:42:19 +0200 Subject: [PATCH 1/6] Add AAAA answers in the built-in DNS server --- pkg/services/dns/dns.go | 21 +++++++++++++++++++++ 1 file changed, 21 insertions(+) diff --git a/pkg/services/dns/dns.go b/pkg/services/dns/dns.go index d3dd2c817..c0c049c9d 100644 --- a/pkg/services/dns/dns.go +++ b/pkg/services/dns/dns.go @@ -107,7 +107,28 @@ func (h *dnsHandler) addAnswers(m *dns.Msg) { A: ip.IP.To4(), }) } + case dns.TypeAAAA: + ips, err := resolver.LookupIPAddr(context.TODO(), q.Name) + if err != nil { + m.Rcode = dns.RcodeNameError + return + } + for _, ip := range ips { + if len(ip.IP) != net.IPv6len { + continue + } + m.Answer = append(m.Answer, &dns.AAAA{ + Hdr: dns.RR_Header{ + Name: q.Name, + Rrtype: dns.TypeAAAA, + Class: dns.ClassINET, + Ttl: 0, + }, + AAAA: ip.IP.To16(), + }) + } } + } } From b0c909e3694fa4f4e59bc3979d90c9698523c445 Mon Sep 17 00:00:00 2001 From: Guillaume Rose Date: Thu, 30 Sep 2021 11:43:17 +0200 Subject: [PATCH 2/6] Use net.JoinHostPort instead of string concat for joining host and port --- pkg/services/forwarder/tcp.go | 4 ++-- pkg/services/forwarder/udp.go | 4 ++-- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/pkg/services/forwarder/tcp.go b/pkg/services/forwarder/tcp.go index 4ea212673..d5797fcd7 100644 --- a/pkg/services/forwarder/tcp.go +++ b/pkg/services/forwarder/tcp.go @@ -2,8 +2,8 @@ package forwarder import ( "context" - "fmt" "net" + "strconv" "sync" "github.com/google/tcpproxy" @@ -31,7 +31,7 @@ func TCP(s *stack.Stack, nat map[tcpip.Address]tcpip.Address, natLock *sync.Mute localAddress = replaced } natLock.Unlock() - outbound, err := net.Dial("tcp", fmt.Sprintf("%s:%d", localAddress, r.ID().LocalPort)) + outbound, err := net.Dial("tcp", net.JoinHostPort(localAddress.String(), strconv.Itoa(int(r.ID().LocalPort)))) if err != nil { log.Errorf("net.Dial() = %v", err) r.Complete(true) diff --git a/pkg/services/forwarder/udp.go b/pkg/services/forwarder/udp.go index 3226b14c6..9c5440839 100644 --- a/pkg/services/forwarder/udp.go +++ b/pkg/services/forwarder/udp.go @@ -1,8 +1,8 @@ package forwarder import ( - "fmt" "net" + "strconv" "sync" log "github.com/sirupsen/logrus" @@ -36,7 +36,7 @@ func UDP(s *stack.Stack, nat map[tcpip.Address]tcpip.Address, natLock *sync.Mute } p, _ := NewUDPProxy(&autoStoppingListener{underlying: gonet.NewUDPConn(s, &wq, ep)}, func() (net.Conn, error) { - return net.Dial("udp", fmt.Sprintf("%s:%d", localAddress, r.ID().LocalPort)) + return net.Dial("udp", net.JoinHostPort(localAddress.String(), strconv.Itoa(int(r.ID().LocalPort)))) }) go p.Run() }) From 04ac3db0eecdca0d057ae2c4b108f7e05947585e Mon Sep 17 00:00:00 2001 From: Guillaume Rose Date: Thu, 30 Sep 2021 11:53:38 +0200 Subject: [PATCH 3/6] Some MAC addresses can be used for multicast. Treat them as broadcast. It used by NDP protocol. --- pkg/tap/switch.go | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/pkg/tap/switch.go b/pkg/tap/switch.go index f87b39620..0a7a3343b 100644 --- a/pkg/tap/switch.go +++ b/pkg/tap/switch.go @@ -121,7 +121,7 @@ func (e *Switch) tx(src, dst tcpip.LinkAddress, pkt *stack.PacketBuffer) error { e.connLock.Lock() defer e.connLock.Unlock() - if dst == header.EthernetBroadcastAddress { + if dst == header.EthernetBroadcastAddress || header.IsMulticastEthernetAddress(dst) { e.camLock.RLock() srcID, ok := e.cam[src] if !ok { @@ -224,7 +224,7 @@ loop: log.Error(err) } } - if eth.DestinationAddress() == e.gateway.LinkAddress() || eth.DestinationAddress() == header.EthernetBroadcastAddress { + if eth.DestinationAddress() == e.gateway.LinkAddress() || eth.DestinationAddress() == header.EthernetBroadcastAddress || header.IsMulticastEthernetAddress(eth.DestinationAddress()) { vv.TrimFront(header.EthernetMinimumSize) e.gateway.DeliverNetworkPacket( eth.SourceAddress(), From 196fa74be3f03eca1ef65930c460b3d7e99f68e8 Mon Sep 17 00:00:00 2001 From: Guillaume Rose Date: Thu, 30 Sep 2021 12:03:43 +0200 Subject: [PATCH 4/6] Configure the stack to handle IPv6 packets --- cmd/gvproxy/main.go | 1 + pkg/types/configuration.go | 3 + pkg/virtualnetwork/virtualnetwork.go | 12 + .../dhcpv6configurationfromndpra_string.go | 40 + .../gvisor/pkg/tcpip/network/ipv6/icmp.go | 1298 +++++++++ .../gvisor/pkg/tcpip/network/ipv6/ipv6.go | 2369 +++++++++++++++++ .../tcpip/network/ipv6/ipv6_state_autogen.go | 136 + .../gvisor/pkg/tcpip/network/ipv6/mld.go | 286 ++ .../gvisor/pkg/tcpip/network/ipv6/ndp.go | 1943 ++++++++++++++ .../gvisor/pkg/tcpip/network/ipv6/stats.go | 136 + vendor/modules.txt | 1 + 11 files changed, 6225 insertions(+) create mode 100644 vendor/gvisor.dev/gvisor/pkg/tcpip/network/ipv6/dhcpv6configurationfromndpra_string.go create mode 100644 vendor/gvisor.dev/gvisor/pkg/tcpip/network/ipv6/icmp.go create mode 100644 vendor/gvisor.dev/gvisor/pkg/tcpip/network/ipv6/ipv6.go create mode 100644 vendor/gvisor.dev/gvisor/pkg/tcpip/network/ipv6/ipv6_state_autogen.go create mode 100644 vendor/gvisor.dev/gvisor/pkg/tcpip/network/ipv6/mld.go create mode 100644 vendor/gvisor.dev/gvisor/pkg/tcpip/network/ipv6/ndp.go create mode 100644 vendor/gvisor.dev/gvisor/pkg/tcpip/network/ipv6/stats.go diff --git a/cmd/gvproxy/main.go b/cmd/gvproxy/main.go index c967ff592..8ee97b20a 100644 --- a/cmd/gvproxy/main.go +++ b/cmd/gvproxy/main.go @@ -108,6 +108,7 @@ func main() { MTU: mtu, Subnet: "192.168.127.0/24", GatewayIP: gatewayIP, + GatewayIPv6: "fe80::1", GatewayMacAddress: "5a:94:ef:e4:0c:dd", DHCPStaticLeases: map[string]string{ "192.168.127.2": "5a:94:ef:e4:0c:ee", diff --git a/pkg/types/configuration.go b/pkg/types/configuration.go index e635a8b69..a1b666890 100644 --- a/pkg/types/configuration.go +++ b/pkg/types/configuration.go @@ -22,6 +22,9 @@ type Configuration struct { // IP address of the virtual gateway GatewayIP string + // IPv6 address of the virtual gateway. Prefix will be 64. + GatewayIPv6 string + // MAC address of the virtual gateway GatewayMacAddress string diff --git a/pkg/virtualnetwork/virtualnetwork.go b/pkg/virtualnetwork/virtualnetwork.go index 7b3aeb82c..1dc7d0de4 100644 --- a/pkg/virtualnetwork/virtualnetwork.go +++ b/pkg/virtualnetwork/virtualnetwork.go @@ -13,6 +13,7 @@ import ( "gvisor.dev/gvisor/pkg/tcpip/link/sniffer" "gvisor.dev/gvisor/pkg/tcpip/network/arp" "gvisor.dev/gvisor/pkg/tcpip/network/ipv4" + "gvisor.dev/gvisor/pkg/tcpip/network/ipv6" "gvisor.dev/gvisor/pkg/tcpip/stack" "gvisor.dev/gvisor/pkg/tcpip/transport/icmp" "gvisor.dev/gvisor/pkg/tcpip/transport/tcp" @@ -99,6 +100,7 @@ func (n *VirtualNetwork) BytesReceived() uint64 { func createStack(configuration *types.Configuration, endpoint stack.LinkEndpoint) (*stack.Stack, error) { s := stack.New(stack.Options{ NetworkProtocols: []stack.NetworkProtocolFactory{ + ipv6.NewProtocol, ipv4.NewProtocol, arp.NewProtocol, }, @@ -106,6 +108,7 @@ func createStack(configuration *types.Configuration, endpoint stack.LinkEndpoint tcp.NewProtocol, udp.NewProtocol, icmp.NewProtocol4, + icmp.NewProtocol6, }, }) @@ -119,6 +122,15 @@ func createStack(configuration *types.Configuration, endpoint stack.LinkEndpoint }, stack.AddressProperties{}); err != nil { return nil, errors.New(err.String()) } + if err := s.AddProtocolAddress(1, tcpip.ProtocolAddress{ + Protocol: ipv6.ProtocolNumber, + AddressWithPrefix: tcpip.AddressWithPrefix{ + Address: tcpip.Address(net.ParseIP(configuration.GatewayIPv6)), + PrefixLen: 64, + }, + }, stack.AddressProperties{}); err != nil { + return nil, errors.New(err.String()) + } s.SetSpoofing(1, true) s.SetPromiscuousMode(1, true) diff --git a/vendor/gvisor.dev/gvisor/pkg/tcpip/network/ipv6/dhcpv6configurationfromndpra_string.go b/vendor/gvisor.dev/gvisor/pkg/tcpip/network/ipv6/dhcpv6configurationfromndpra_string.go new file mode 100644 index 000000000..09ba133b1 --- /dev/null +++ b/vendor/gvisor.dev/gvisor/pkg/tcpip/network/ipv6/dhcpv6configurationfromndpra_string.go @@ -0,0 +1,40 @@ +// Copyright 2020 The gVisor Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +// Code generated by "stringer -type DHCPv6ConfigurationFromNDPRA"; DO NOT EDIT. + +package ipv6 + +import "strconv" + +func _() { + // An "invalid array index" compiler error signifies that the constant values have changed. + // Re-run the stringer command to generate them again. + var x [1]struct{} + _ = x[DHCPv6NoConfiguration-1] + _ = x[DHCPv6ManagedAddress-2] + _ = x[DHCPv6OtherConfigurations-3] +} + +const _DHCPv6ConfigurationFromNDPRA_name = "DHCPv6NoConfigurationDHCPv6ManagedAddressDHCPv6OtherConfigurations" + +var _DHCPv6ConfigurationFromNDPRA_index = [...]uint8{0, 21, 41, 66} + +func (i DHCPv6ConfigurationFromNDPRA) String() string { + i -= 1 + if i < 0 || i >= DHCPv6ConfigurationFromNDPRA(len(_DHCPv6ConfigurationFromNDPRA_index)-1) { + return "DHCPv6ConfigurationFromNDPRA(" + strconv.FormatInt(int64(i+1), 10) + ")" + } + return _DHCPv6ConfigurationFromNDPRA_name[_DHCPv6ConfigurationFromNDPRA_index[i]:_DHCPv6ConfigurationFromNDPRA_index[i+1]] +} diff --git a/vendor/gvisor.dev/gvisor/pkg/tcpip/network/ipv6/icmp.go b/vendor/gvisor.dev/gvisor/pkg/tcpip/network/ipv6/icmp.go new file mode 100644 index 000000000..6c6107264 --- /dev/null +++ b/vendor/gvisor.dev/gvisor/pkg/tcpip/network/ipv6/icmp.go @@ -0,0 +1,1298 @@ +// Copyright 2021 The gVisor Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package ipv6 + +import ( + "fmt" + + "gvisor.dev/gvisor/pkg/tcpip" + "gvisor.dev/gvisor/pkg/tcpip/header" + "gvisor.dev/gvisor/pkg/tcpip/stack" +) + +// icmpv6DestinationUnreachableSockError is a general ICMPv6 Destination +// Unreachable error. +// +// +stateify savable +type icmpv6DestinationUnreachableSockError struct{} + +// Origin implements tcpip.SockErrorCause. +func (*icmpv6DestinationUnreachableSockError) Origin() tcpip.SockErrOrigin { + return tcpip.SockExtErrorOriginICMP6 +} + +// Type implements tcpip.SockErrorCause. +func (*icmpv6DestinationUnreachableSockError) Type() uint8 { + return uint8(header.ICMPv6DstUnreachable) +} + +// Info implements tcpip.SockErrorCause. +func (*icmpv6DestinationUnreachableSockError) Info() uint32 { + return 0 +} + +var _ stack.TransportError = (*icmpv6DestinationNetworkUnreachableSockError)(nil) + +// icmpv6DestinationNetworkUnreachableSockError is an ICMPv6 Destination Network +// Unreachable error. +// +// It indicates that the destination network is unreachable. +// +// +stateify savable +type icmpv6DestinationNetworkUnreachableSockError struct { + icmpv6DestinationUnreachableSockError +} + +// Code implements tcpip.SockErrorCause. +func (*icmpv6DestinationNetworkUnreachableSockError) Code() uint8 { + return uint8(header.ICMPv6NetworkUnreachable) +} + +// Kind implements stack.TransportError. +func (*icmpv6DestinationNetworkUnreachableSockError) Kind() stack.TransportErrorKind { + return stack.DestinationNetworkUnreachableTransportError +} + +var _ stack.TransportError = (*icmpv6DestinationPortUnreachableSockError)(nil) + +// icmpv6DestinationPortUnreachableSockError is an ICMPv6 Destination Port +// Unreachable error. +// +// It indicates that a packet reached the destination host, but the transport +// protocol was not active on the destination port. +// +// +stateify savable +type icmpv6DestinationPortUnreachableSockError struct { + icmpv6DestinationUnreachableSockError +} + +// Code implements tcpip.SockErrorCause. +func (*icmpv6DestinationPortUnreachableSockError) Code() uint8 { + return uint8(header.ICMPv6PortUnreachable) +} + +// Kind implements stack.TransportError. +func (*icmpv6DestinationPortUnreachableSockError) Kind() stack.TransportErrorKind { + return stack.DestinationPortUnreachableTransportError +} + +var _ stack.TransportError = (*icmpv6DestinationAddressUnreachableSockError)(nil) + +// icmpv6DestinationAddressUnreachableSockError is an ICMPv6 Destination Address +// Unreachable error. +// +// It indicates that a packet was not able to reach the destination. +// +// +stateify savable +type icmpv6DestinationAddressUnreachableSockError struct { + icmpv6DestinationUnreachableSockError +} + +// Code implements tcpip.SockErrorCause. +func (*icmpv6DestinationAddressUnreachableSockError) Code() uint8 { + return uint8(header.ICMPv6AddressUnreachable) +} + +// Kind implements stack.TransportError. +func (*icmpv6DestinationAddressUnreachableSockError) Kind() stack.TransportErrorKind { + return stack.DestinationHostUnreachableTransportError +} + +var _ stack.TransportError = (*icmpv6PacketTooBigSockError)(nil) + +// icmpv6PacketTooBigSockError is an ICMPv6 Packet Too Big error. +// +// It indicates that a link exists on the path to the destination with an MTU +// that is too small to carry the packet. +// +// +stateify savable +type icmpv6PacketTooBigSockError struct { + mtu uint32 +} + +// Origin implements tcpip.SockErrorCause. +func (*icmpv6PacketTooBigSockError) Origin() tcpip.SockErrOrigin { + return tcpip.SockExtErrorOriginICMP6 +} + +// Type implements tcpip.SockErrorCause. +func (*icmpv6PacketTooBigSockError) Type() uint8 { + return uint8(header.ICMPv6PacketTooBig) +} + +// Code implements tcpip.SockErrorCause. +func (*icmpv6PacketTooBigSockError) Code() uint8 { + return uint8(header.ICMPv6UnusedCode) +} + +// Info implements tcpip.SockErrorCause. +func (e *icmpv6PacketTooBigSockError) Info() uint32 { + return e.mtu +} + +// Kind implements stack.TransportError. +func (*icmpv6PacketTooBigSockError) Kind() stack.TransportErrorKind { + return stack.PacketTooBigTransportError +} + +func (e *endpoint) checkLocalAddress(addr tcpip.Address) bool { + if e.nic.Spoofing() { + return true + } + + if addressEndpoint := e.AcquireAssignedAddress(addr, false, stack.NeverPrimaryEndpoint); addressEndpoint != nil { + addressEndpoint.DecRef() + return true + } + return false +} + +// handleControl handles the case when an ICMP packet contains the headers of +// the original packet that caused the ICMP one to be sent. This information is +// used to find out which transport endpoint must be notified about the ICMP +// packet. +func (e *endpoint) handleControl(transErr stack.TransportError, pkt *stack.PacketBuffer) { + h, ok := pkt.Data().PullUp(header.IPv6MinimumSize) + if !ok { + return + } + hdr := header.IPv6(h) + + // We don't use IsValid() here because ICMP only requires that up to + // 1280 bytes of the original packet be included. So it's likely that it + // is truncated, which would cause IsValid to return false. + // + // Drop packet if it doesn't have the basic IPv6 header or if the + // original source address doesn't match an address we own. + srcAddr := hdr.SourceAddress() + if !e.checkLocalAddress(srcAddr) { + return + } + + // Keep needed information before trimming header. + p := hdr.TransportProtocol() + dstAddr := hdr.DestinationAddress() + + // Skip the IP header, then handle the fragmentation header if there + // is one. + pkt.Data().DeleteFront(header.IPv6MinimumSize) + if p == header.IPv6FragmentHeader { + f, ok := pkt.Data().PullUp(header.IPv6FragmentHeaderSize) + if !ok { + return + } + fragHdr := header.IPv6Fragment(f) + if !fragHdr.IsValid() || fragHdr.FragmentOffset() != 0 { + // We can't handle fragments that aren't at offset 0 + // because they don't have the transport headers. + return + } + p = fragHdr.TransportProtocol() + + // Skip fragmentation header and find out the actual protocol + // number. + pkt.Data().DeleteFront(header.IPv6FragmentHeaderSize) + } + + e.dispatcher.DeliverTransportError(srcAddr, dstAddr, ProtocolNumber, p, transErr, pkt) +} + +// getLinkAddrOption searches NDP options for a given link address option using +// the provided getAddr function as a filter. Returns the link address if +// found; otherwise, returns the zero link address value. Also returns true if +// the options are valid as per the wire format, false otherwise. +func getLinkAddrOption(it header.NDPOptionIterator, getAddr func(header.NDPOption) tcpip.LinkAddress) (tcpip.LinkAddress, bool) { + var linkAddr tcpip.LinkAddress + for { + opt, done, err := it.Next() + if err != nil { + return "", false + } + if done { + break + } + if addr := getAddr(opt); len(addr) != 0 { + // No RFCs define what to do when an NDP message has multiple Link-Layer + // Address options. Since no interface can have multiple link-layer + // addresses, we consider such messages invalid. + if len(linkAddr) != 0 { + return "", false + } + linkAddr = addr + } + } + return linkAddr, true +} + +// getSourceLinkAddr searches NDP options for the source link address option. +// Returns the link address if found; otherwise, returns the zero link address +// value. Also returns true if the options are valid as per the wire format, +// false otherwise. +func getSourceLinkAddr(it header.NDPOptionIterator) (tcpip.LinkAddress, bool) { + return getLinkAddrOption(it, func(opt header.NDPOption) tcpip.LinkAddress { + if src, ok := opt.(header.NDPSourceLinkLayerAddressOption); ok { + return src.EthernetAddress() + } + return "" + }) +} + +// getTargetLinkAddr searches NDP options for the target link address option. +// Returns the link address if found; otherwise, returns the zero link address +// value. Also returns true if the options are valid as per the wire format, +// false otherwise. +func getTargetLinkAddr(it header.NDPOptionIterator) (tcpip.LinkAddress, bool) { + return getLinkAddrOption(it, func(opt header.NDPOption) tcpip.LinkAddress { + if dst, ok := opt.(header.NDPTargetLinkLayerAddressOption); ok { + return dst.EthernetAddress() + } + return "" + }) +} + +func isMLDValid(pkt *stack.PacketBuffer, iph header.IPv6, routerAlert *header.IPv6RouterAlertOption) bool { + // As per RFC 2710 section 3: + // All MLD messages described in this document are sent with a link-local + // IPv6 Source Address, an IPv6 Hop Limit of 1, and an IPv6 Router Alert + // option in a Hop-by-Hop Options header. + if routerAlert == nil || routerAlert.Value != header.IPv6RouterAlertMLD { + return false + } + if pkt.Data().Size() < header.ICMPv6HeaderSize+header.MLDMinimumSize { + return false + } + if iph.HopLimit() != header.MLDHopLimit { + return false + } + if !header.IsV6LinkLocalUnicastAddress(iph.SourceAddress()) { + return false + } + return true +} + +func (e *endpoint) handleICMP(pkt *stack.PacketBuffer, hasFragmentHeader bool, routerAlert *header.IPv6RouterAlertOption) { + sent := e.stats.icmp.packetsSent + received := e.stats.icmp.packetsReceived + // ICMP packets don't have their TransportHeader fields set. See + // icmp/protocol.go:protocol.Parse for a full explanation. + v, ok := pkt.Data().PullUp(header.ICMPv6HeaderSize) + if !ok { + received.invalid.Increment() + return + } + h := header.ICMPv6(v) + iph := header.IPv6(pkt.NetworkHeader().View()) + srcAddr := iph.SourceAddress() + dstAddr := iph.DestinationAddress() + + // Validate ICMPv6 checksum before processing the packet. + payload := pkt.Data().AsRange().SubRange(len(h)) + if got, want := h.Checksum(), header.ICMPv6Checksum(header.ICMPv6ChecksumParams{ + Header: h, + Src: srcAddr, + Dst: dstAddr, + PayloadCsum: payload.Checksum(), + PayloadLen: payload.Size(), + }); got != want { + received.invalid.Increment() + return + } + + isNDPValid := func() bool { + // As per RFC 4861 sections 4.1 - 4.5, 6.1.1, 6.1.2, 7.1.1, 7.1.2 and + // 8.1, nodes MUST silently drop NDP packets where the Hop Limit field + // in the IPv6 header is not set to 255, or the ICMPv6 Code field is not + // set to 0. + // + // As per RFC 6980 section 5, nodes MUST silently drop NDP messages if the + // packet includes a fragmentation header. + return !hasFragmentHeader && iph.HopLimit() == header.NDPHopLimit && h.Code() == 0 + } + + // TODO(b/112892170): Meaningfully handle all ICMP types. + switch icmpType := h.Type(); icmpType { + case header.ICMPv6PacketTooBig: + received.packetTooBig.Increment() + hdr, ok := pkt.Data().PullUp(header.ICMPv6PacketTooBigMinimumSize) + if !ok { + received.invalid.Increment() + return + } + networkMTU, err := calculateNetworkMTU(header.ICMPv6(hdr).MTU(), header.IPv6MinimumSize) + if err != nil { + networkMTU = 0 + } + pkt.Data().DeleteFront(header.ICMPv6PacketTooBigMinimumSize) + e.handleControl(&icmpv6PacketTooBigSockError{mtu: networkMTU}, pkt) + + case header.ICMPv6DstUnreachable: + received.dstUnreachable.Increment() + hdr, ok := pkt.Data().PullUp(header.ICMPv6DstUnreachableMinimumSize) + if !ok { + received.invalid.Increment() + return + } + code := header.ICMPv6(hdr).Code() + pkt.Data().DeleteFront(header.ICMPv6DstUnreachableMinimumSize) + switch code { + case header.ICMPv6NetworkUnreachable: + e.handleControl(&icmpv6DestinationNetworkUnreachableSockError{}, pkt) + case header.ICMPv6PortUnreachable: + e.handleControl(&icmpv6DestinationPortUnreachableSockError{}, pkt) + } + case header.ICMPv6NeighborSolicit: + received.neighborSolicit.Increment() + if !isNDPValid() || pkt.Data().Size() < header.ICMPv6NeighborSolicitMinimumSize { + received.invalid.Increment() + return + } + + // The remainder of payload must be only the neighbor solicitation, so + // payload.AsView() always returns the solicitation. Per RFC 6980 section 5, + // NDP messages cannot be fragmented. Also note that in the common case NDP + // datagrams are very small and AsView() will not incur allocations. + ns := header.NDPNeighborSolicit(payload.AsView()) + targetAddr := ns.TargetAddress() + + // As per RFC 4861 section 4.3, the Target Address MUST NOT be a multicast + // address. + if header.IsV6MulticastAddress(targetAddr) { + received.invalid.Increment() + return + } + + var it header.NDPOptionIterator + { + var err error + it, err = ns.Options().Iter(false /* check */) + if err != nil { + // Options are not valid as per the wire format, silently drop the + // packet. + received.invalid.Increment() + return + } + } + + if e.hasTentativeAddr(targetAddr) { + // If the target address is tentative and the source of the packet is a + // unicast (specified) address, then the source of the packet is + // attempting to perform address resolution on the target. In this case, + // the solicitation is silently ignored, as per RFC 4862 section 5.4.3. + // + // If the target address is tentative and the source of the packet is the + // unspecified address (::), then we know another node is also performing + // DAD for the same address (since the target address is tentative for us, + // we know we are also performing DAD on it). In this case we let the + // stack know so it can handle such a scenario and do nothing further with + // the NS. + if srcAddr == header.IPv6Any { + var nonce []byte + for { + opt, done, err := it.Next() + if err != nil { + received.invalid.Increment() + return + } + if done { + break + } + if n, ok := opt.(header.NDPNonceOption); ok { + nonce = n.Nonce() + break + } + } + + // Since this is a DAD message we know the sender does not actually hold + // the target address so there is no "holder". + var holderLinkAddress tcpip.LinkAddress + + // We would get an error if the address no longer exists or the address + // is no longer tentative (DAD resolved between the call to + // hasTentativeAddr and this point). Both of these are valid scenarios: + // 1) An address may be removed at any time. + // 2) As per RFC 4862 section 5.4, DAD is not a perfect: + // "Note that the method for detecting duplicates + // is not completely reliable, and it is possible that duplicate + // addresses will still exist" + // + // TODO(gvisor.dev/issue/4046): Handle the scenario when a duplicate + // address is detected for an assigned address. + switch err := e.dupTentativeAddrDetected(targetAddr, holderLinkAddress, nonce); err.(type) { + case nil, *tcpip.ErrBadAddress, *tcpip.ErrInvalidEndpointState: + default: + panic(fmt.Sprintf("unexpected error handling duplicate tentative address: %s", err)) + } + } + + // Do not handle neighbor solicitations targeted to an address that is + // tentative on the NIC any further. + return + } + + // At this point we know that the target address is not tentative on the NIC + // so the packet is processed as defined in RFC 4861, as per RFC 4862 + // section 5.4.3. + + // Is the NS targeting us? + if !e.checkLocalAddress(targetAddr) { + return + } + + sourceLinkAddr, ok := getSourceLinkAddr(it) + if !ok { + received.invalid.Increment() + return + } + + // As per RFC 4861 section 4.3, the Source Link-Layer Address Option MUST + // NOT be included when the source IP address is the unspecified address. + // Otherwise, on link layers that have addresses this option MUST be + // included in multicast solicitations and SHOULD be included in unicast + // solicitations. + unspecifiedSource := srcAddr == header.IPv6Any + if len(sourceLinkAddr) == 0 { + if header.IsV6MulticastAddress(dstAddr) && !unspecifiedSource { + received.invalid.Increment() + return + } + } else if unspecifiedSource { + received.invalid.Increment() + return + } else { + switch err := e.nic.HandleNeighborProbe(ProtocolNumber, srcAddr, sourceLinkAddr); err.(type) { + case nil: + case *tcpip.ErrNotSupported: + // The stack may support ICMPv6 but the NIC may not need link resolution. + default: + panic(fmt.Sprintf("unexpected error when informing NIC of neighbor probe message: %s", err)) + } + } + + // As per RFC 4861 section 7.1.1: + // A node MUST silently discard any received Neighbor Solicitation + // messages that do not satisfy all of the following validity checks: + // ... + // - If the IP source address is the unspecified address, the IP + // destination address is a solicited-node multicast address. + if unspecifiedSource && !header.IsSolicitedNodeAddr(dstAddr) { + received.invalid.Increment() + return + } + + // As per RFC 4861 section 7.2.4: + // + // If the source of the solicitation is the unspecified address, the node + // MUST [...] and multicast the advertisement to the all-nodes address. + // + remoteAddr := srcAddr + if unspecifiedSource { + remoteAddr = header.IPv6AllNodesMulticastAddress + } + + // Even if we were able to receive a packet from some remote, we may not + // have a route to it - the remote may be blocked via routing rules. We must + // always consult our routing table and find a route to the remote before + // sending any packet. + r, err := e.protocol.stack.FindRoute(e.nic.ID(), targetAddr, remoteAddr, ProtocolNumber, false /* multicastLoop */) + if err != nil { + // If we cannot find a route to the destination, silently drop the packet. + return + } + defer r.Release() + + // If the NS has a source link-layer option, resolve the route immediately + // to avoid querying the neighbor table when the neighbor entry was updated + // as probing the neighbor table for a link address will transition the + // entry's state from stale to delay. + // + // Note, if the source link address is unspecified and this is a unicast + // solicitation, we may need to perform neighbor discovery to send the + // neighbor advertisement response. This is expected as per RFC 4861 section + // 7.2.4: + // + // Because unicast Neighbor Solicitations are not required to include a + // Source Link-Layer Address, it is possible that a node sending a + // solicited Neighbor Advertisement does not have a corresponding link- + // layer address for its neighbor in its Neighbor Cache. In such + // situations, a node will first have to use Neighbor Discovery to + // determine the link-layer address of its neighbor (i.e., send out a + // multicast Neighbor Solicitation). + // + if len(sourceLinkAddr) != 0 { + r.ResolveWith(sourceLinkAddr) + } + + optsSerializer := header.NDPOptionsSerializer{ + header.NDPTargetLinkLayerAddressOption(e.nic.LinkAddress()), + } + neighborAdvertSize := header.ICMPv6NeighborAdvertMinimumSize + optsSerializer.Length() + pkt := stack.NewPacketBuffer(stack.PacketBufferOptions{ + ReserveHeaderBytes: int(r.MaxHeaderLength()) + neighborAdvertSize, + }) + pkt.TransportProtocolNumber = header.ICMPv6ProtocolNumber + packet := header.ICMPv6(pkt.TransportHeader().Push(neighborAdvertSize)) + packet.SetType(header.ICMPv6NeighborAdvert) + na := header.NDPNeighborAdvert(packet.MessageBody()) + + // As per RFC 4861 section 7.2.4: + // + // If the source of the solicitation is the unspecified address, the node + // MUST set the Solicited flag to zero and [..]. Otherwise, the node MUST + // set the Solicited flag to one and [..]. + // + na.SetSolicitedFlag(!unspecifiedSource) + na.SetOverrideFlag(true) + na.SetTargetAddress(targetAddr) + na.Options().Serialize(optsSerializer) + packet.SetChecksum(header.ICMPv6Checksum(header.ICMPv6ChecksumParams{ + Header: packet, + Src: r.LocalAddress(), + Dst: r.RemoteAddress(), + })) + + // RFC 4861 Neighbor Discovery for IP version 6 (IPv6) + // + // 7.1.2. Validation of Neighbor Advertisements + // + // The IP Hop Limit field has a value of 255, i.e., the packet + // could not possibly have been forwarded by a router. + if err := r.WritePacket(stack.NetworkHeaderParams{Protocol: header.ICMPv6ProtocolNumber, TTL: header.NDPHopLimit, TOS: stack.DefaultTOS}, pkt); err != nil { + sent.dropped.Increment() + return + } + sent.neighborAdvert.Increment() + + case header.ICMPv6NeighborAdvert: + received.neighborAdvert.Increment() + if !isNDPValid() || pkt.Data().Size() < header.ICMPv6NeighborAdvertMinimumSize { + received.invalid.Increment() + return + } + + // The remainder of payload must be only the neighbor advertisement, so + // payload.AsView() always returns the advertisement. Per RFC 6980 section + // 5, NDP messages cannot be fragmented. Also note that in the common case + // NDP datagrams are very small and AsView() will not incur allocations. + na := header.NDPNeighborAdvert(payload.AsView()) + + it, err := na.Options().Iter(false /* check */) + if err != nil { + // If we have a malformed NDP NA option, drop the packet. + received.invalid.Increment() + return + } + + targetLinkAddr, ok := getTargetLinkAddr(it) + if !ok { + received.invalid.Increment() + return + } + + targetAddr := na.TargetAddress() + + e.dad.mu.Lock() + e.dad.mu.dad.StopLocked(targetAddr, &stack.DADDupAddrDetected{HolderLinkAddress: targetLinkAddr}) + e.dad.mu.Unlock() + + if e.hasTentativeAddr(targetAddr) { + // We only send a nonce value in DAD messages to check for loopedback + // messages so we use the empty nonce value here. + var nonce []byte + + // We just got an NA from a node that owns an address we are performing + // DAD on, implying the address is not unique. In this case we let the + // stack know so it can handle such a scenario and do nothing furthur with + // the NDP NA. + // + // We would get an error if the address no longer exists or the address + // is no longer tentative (DAD resolved between the call to + // hasTentativeAddr and this point). Both of these are valid scenarios: + // 1) An address may be removed at any time. + // 2) As per RFC 4862 section 5.4, DAD is not a perfect: + // "Note that the method for detecting duplicates + // is not completely reliable, and it is possible that duplicate + // addresses will still exist" + // + // TODO(gvisor.dev/issue/4046): Handle the scenario when a duplicate + // address is detected for an assigned address. + switch err := e.dupTentativeAddrDetected(targetAddr, targetLinkAddr, nonce); err.(type) { + case nil, *tcpip.ErrBadAddress, *tcpip.ErrInvalidEndpointState: + return + default: + panic(fmt.Sprintf("unexpected error handling duplicate tentative address: %s", err)) + } + } + + // At this point we know that the target address is not tentative on the + // NIC. However, the target address may still be assigned to the NIC but not + // tentative (it could be permanent). Such a scenario is beyond the scope of + // RFC 4862. As such, we simply ignore such a scenario for now and proceed + // as normal. + // + // TODO(b/143147598): Handle the scenario described above. Also inform the + // netstack integration that a duplicate address was detected outside of + // DAD. + + // As per RFC 4861 section 7.1.2: + // A node MUST silently discard any received Neighbor Advertisement + // messages that do not satisfy all of the following validity checks: + // ... + // - If the IP Destination Address is a multicast address the + // Solicited flag is zero. + if header.IsV6MulticastAddress(dstAddr) && na.SolicitedFlag() { + received.invalid.Increment() + return + } + + // If the NA message has the target link layer option, update the link + // address cache with the link address for the target of the message. + switch err := e.nic.HandleNeighborConfirmation(ProtocolNumber, targetAddr, targetLinkAddr, stack.ReachabilityConfirmationFlags{ + Solicited: na.SolicitedFlag(), + Override: na.OverrideFlag(), + IsRouter: na.RouterFlag(), + }); err.(type) { + case nil: + case *tcpip.ErrNotSupported: + // The stack may support ICMPv6 but the NIC may not need link resolution. + default: + panic(fmt.Sprintf("unexpected error when informing NIC of neighbor confirmation message: %s", err)) + } + + case header.ICMPv6EchoRequest: + received.echoRequest.Increment() + icmpHdr, ok := pkt.TransportHeader().Consume(header.ICMPv6EchoMinimumSize) + if !ok { + received.invalid.Increment() + return + } + + // As per RFC 4291 section 2.7, multicast addresses must not be used as + // source addresses in IPv6 packets. + localAddr := dstAddr + if header.IsV6MulticastAddress(dstAddr) { + localAddr = "" + } + + r, err := e.protocol.stack.FindRoute(e.nic.ID(), localAddr, srcAddr, ProtocolNumber, false /* multicastLoop */) + if err != nil { + // If we cannot find a route to the destination, silently drop the packet. + return + } + defer r.Release() + + if !e.protocol.allowICMPReply(header.ICMPv6EchoReply) { + sent.rateLimited.Increment() + return + } + + replyPkt := stack.NewPacketBuffer(stack.PacketBufferOptions{ + ReserveHeaderBytes: int(r.MaxHeaderLength()) + header.ICMPv6EchoMinimumSize, + Data: pkt.Data().ExtractVV(), + }) + icmp := header.ICMPv6(replyPkt.TransportHeader().Push(header.ICMPv6EchoMinimumSize)) + pkt.TransportProtocolNumber = header.ICMPv6ProtocolNumber + copy(icmp, icmpHdr) + icmp.SetType(header.ICMPv6EchoReply) + dataRange := replyPkt.Data().AsRange() + icmp.SetChecksum(header.ICMPv6Checksum(header.ICMPv6ChecksumParams{ + Header: icmp, + Src: r.LocalAddress(), + Dst: r.RemoteAddress(), + PayloadCsum: dataRange.Checksum(), + PayloadLen: dataRange.Size(), + })) + if err := r.WritePacket(stack.NetworkHeaderParams{ + Protocol: header.ICMPv6ProtocolNumber, + TTL: r.DefaultTTL(), + TOS: stack.DefaultTOS, + }, replyPkt); err != nil { + sent.dropped.Increment() + return + } + sent.echoReply.Increment() + + case header.ICMPv6EchoReply: + received.echoReply.Increment() + if pkt.Data().Size() < header.ICMPv6EchoMinimumSize { + received.invalid.Increment() + return + } + e.dispatcher.DeliverTransportPacket(header.ICMPv6ProtocolNumber, pkt) + + case header.ICMPv6TimeExceeded: + received.timeExceeded.Increment() + + case header.ICMPv6ParamProblem: + received.paramProblem.Increment() + + case header.ICMPv6RouterSolicit: + received.routerSolicit.Increment() + + // + // Validate the RS as per RFC 4861 section 6.1.1. + // + + // Is the NDP payload of sufficient size to hold a Router Solictation? + if !isNDPValid() || pkt.Data().Size()-header.ICMPv6HeaderSize < header.NDPRSMinimumSize { + received.invalid.Increment() + return + } + + if !e.Forwarding() { + received.routerOnlyPacketsDroppedByHost.Increment() + return + } + + // Note that in the common case NDP datagrams are very small and AsView() + // will not incur allocations. + rs := header.NDPRouterSolicit(payload.AsView()) + it, err := rs.Options().Iter(false /* check */) + if err != nil { + // Options are not valid as per the wire format, silently drop the packet. + received.invalid.Increment() + return + } + + sourceLinkAddr, ok := getSourceLinkAddr(it) + if !ok { + received.invalid.Increment() + return + } + + // If the RS message has the source link layer option, update the link + // address cache with the link address for the source of the message. + if len(sourceLinkAddr) != 0 { + // As per RFC 4861 section 4.1, the Source Link-Layer Address Option MUST + // NOT be included when the source IP address is the unspecified address. + // Otherwise, it SHOULD be included on link layers that have addresses. + if srcAddr == header.IPv6Any { + received.invalid.Increment() + return + } + + // A RS with a specified source IP address modifies the neighbor table + // in the same way a regular probe would. + switch err := e.nic.HandleNeighborProbe(ProtocolNumber, srcAddr, sourceLinkAddr); err.(type) { + case nil: + case *tcpip.ErrNotSupported: + // The stack may support ICMPv6 but the NIC may not need link resolution. + default: + panic(fmt.Sprintf("unexpected error when informing NIC of neighbor probe message: %s", err)) + } + } + + case header.ICMPv6RouterAdvert: + received.routerAdvert.Increment() + + // + // Validate the RA as per RFC 4861 section 6.1.2. + // + + // Is the NDP payload of sufficient size to hold a Router Advertisement? + if !isNDPValid() || pkt.Data().Size()-header.ICMPv6HeaderSize < header.NDPRAMinimumSize { + received.invalid.Increment() + return + } + + routerAddr := srcAddr + + // Is the IP Source Address a link-local address? + if !header.IsV6LinkLocalUnicastAddress(routerAddr) { + // ...No, silently drop the packet. + received.invalid.Increment() + return + } + + // Note that in the common case NDP datagrams are very small and AsView() + // will not incur allocations. + ra := header.NDPRouterAdvert(payload.AsView()) + it, err := ra.Options().Iter(false /* check */) + if err != nil { + // Options are not valid as per the wire format, silently drop the packet. + received.invalid.Increment() + return + } + + sourceLinkAddr, ok := getSourceLinkAddr(it) + if !ok { + received.invalid.Increment() + return + } + + // + // At this point, we have a valid Router Advertisement, as far + // as RFC 4861 section 6.1.2 is concerned. + // + + // If the RA has the source link layer option, update the link address + // cache with the link address for the advertised router. + if len(sourceLinkAddr) != 0 { + switch err := e.nic.HandleNeighborProbe(ProtocolNumber, routerAddr, sourceLinkAddr); err.(type) { + case nil: + case *tcpip.ErrNotSupported: + // The stack may support ICMPv6 but the NIC may not need link resolution. + default: + panic(fmt.Sprintf("unexpected error when informing NIC of neighbor probe message: %s", err)) + } + } + + e.mu.Lock() + e.mu.ndp.handleRA(routerAddr, ra) + e.mu.Unlock() + + case header.ICMPv6RedirectMsg: + // TODO(gvisor.dev/issue/2285): Call `e.nud.HandleProbe` after validating + // this redirect message, as per RFC 4871 section 7.3.3: + // + // "A Neighbor Cache entry enters the STALE state when created as a + // result of receiving packets other than solicited Neighbor + // Advertisements (i.e., Router Solicitations, Router Advertisements, + // Redirects, and Neighbor Solicitations). These packets contain the + // link-layer address of either the sender or, in the case of Redirect, + // the redirection target. However, receipt of these link-layer + // addresses does not confirm reachability of the forward-direction path + // to that node. Placing a newly created Neighbor Cache entry for which + // the link-layer address is known in the STALE state provides assurance + // that path failures are detected quickly. In addition, should a cached + // link-layer address be modified due to receiving one of the above + // messages, the state SHOULD also be set to STALE to provide prompt + // verification that the path to the new link-layer address is working." + received.redirectMsg.Increment() + if !isNDPValid() { + received.invalid.Increment() + return + } + + case header.ICMPv6MulticastListenerQuery, header.ICMPv6MulticastListenerReport, header.ICMPv6MulticastListenerDone: + switch icmpType { + case header.ICMPv6MulticastListenerQuery: + received.multicastListenerQuery.Increment() + case header.ICMPv6MulticastListenerReport: + received.multicastListenerReport.Increment() + case header.ICMPv6MulticastListenerDone: + received.multicastListenerDone.Increment() + default: + panic(fmt.Sprintf("unrecognized MLD message = %d", icmpType)) + } + + if !isMLDValid(pkt, iph, routerAlert) { + received.invalid.Increment() + return + } + + switch icmpType { + case header.ICMPv6MulticastListenerQuery: + e.mu.Lock() + e.mu.mld.handleMulticastListenerQuery(header.MLD(payload.AsView())) + e.mu.Unlock() + case header.ICMPv6MulticastListenerReport: + e.mu.Lock() + e.mu.mld.handleMulticastListenerReport(header.MLD(payload.AsView())) + e.mu.Unlock() + case header.ICMPv6MulticastListenerDone: + default: + panic(fmt.Sprintf("unrecognized MLD message = %d", icmpType)) + } + + default: + received.unrecognized.Increment() + } +} + +// LinkAddressProtocol implements stack.LinkAddressResolver. +func (*endpoint) LinkAddressProtocol() tcpip.NetworkProtocolNumber { + return header.IPv6ProtocolNumber +} + +// LinkAddressRequest implements stack.LinkAddressResolver. +func (e *endpoint) LinkAddressRequest(targetAddr, localAddr tcpip.Address, remoteLinkAddr tcpip.LinkAddress) tcpip.Error { + remoteAddr := targetAddr + if len(remoteLinkAddr) == 0 { + remoteAddr = header.SolicitedNodeAddr(targetAddr) + remoteLinkAddr = header.EthernetAddressFromMulticastIPv6Address(remoteAddr) + } + + if len(localAddr) == 0 { + // Find an address that we can use as our source address. + addressEndpoint := e.AcquireOutgoingPrimaryAddress(remoteAddr, false /* allowExpired */) + if addressEndpoint == nil { + return &tcpip.ErrNetworkUnreachable{} + } + + localAddr = addressEndpoint.AddressWithPrefix().Address + addressEndpoint.DecRef() + } else if !e.checkLocalAddress(localAddr) { + // The provided local address is not assigned to us. + return &tcpip.ErrBadLocalAddress{} + } + + return e.sendNDPNS(localAddr, remoteAddr, targetAddr, remoteLinkAddr, header.NDPOptionsSerializer{ + header.NDPSourceLinkLayerAddressOption(e.nic.LinkAddress()), + }) +} + +// ResolveStaticAddress implements stack.LinkAddressResolver. +func (*endpoint) ResolveStaticAddress(addr tcpip.Address) (tcpip.LinkAddress, bool) { + if header.IsV6MulticastAddress(addr) { + return header.EthernetAddressFromMulticastIPv6Address(addr), true + } + return tcpip.LinkAddress([]byte(nil)), false +} + +// ======= ICMP Error packet generation ========= + +// icmpReason is a marker interface for IPv6 specific ICMP errors. +type icmpReason interface { + isICMPReason() + // isForwarding indicates whether or not the error arose while attempting to + // forward a packet. + isForwarding() bool + // respondToMulticast indicates whether this error falls under the exception + // outlined by RFC 4443 section 2.4 point e.3 exception 2: + // + // (e.3) A packet destined to an IPv6 multicast address. (There are two + // exceptions to this rule: (1) the Packet Too Big Message (Section 3.2) to + // allow Path MTU discovery to work for IPv6 multicast, and (2) the Parameter + // Problem Message, Code 2 (Section 3.4) reporting an unrecognized IPv6 + // option (see Section 4.2 of [IPv6]) that has the Option Type highest- + // order two bits set to 10). + respondsToMulticast() bool +} + +// icmpReasonParameterProblem is an error during processing of extension headers +// or the fixed header defined in RFC 4443 section 3.4. +type icmpReasonParameterProblem struct { + code header.ICMPv6Code + + // pointer is defined in the RFC 4443 setion 3.4 which reads: + // + // Pointer Identifies the octet offset within the invoking packet + // where the error was detected. + // + // The pointer will point beyond the end of the ICMPv6 + // packet if the field in error is beyond what can fit + // in the maximum size of an ICMPv6 error message. + pointer uint32 + + forwarding bool + + respondToMulticast bool +} + +func (*icmpReasonParameterProblem) isICMPReason() {} +func (p *icmpReasonParameterProblem) isForwarding() bool { + return p.forwarding +} + +func (p *icmpReasonParameterProblem) respondsToMulticast() bool { + return p.respondToMulticast +} + +// icmpReasonPortUnreachable is an error where the transport protocol has no +// listener and no alternative means to inform the sender. +type icmpReasonPortUnreachable struct{} + +func (*icmpReasonPortUnreachable) isICMPReason() {} + +func (*icmpReasonPortUnreachable) isForwarding() bool { + return false +} + +func (*icmpReasonPortUnreachable) respondsToMulticast() bool { + return false +} + +// icmpReasonNetUnreachable is an error where no route can be found to the +// network of the final destination. +type icmpReasonNetUnreachable struct{} + +func (*icmpReasonNetUnreachable) isICMPReason() {} + +func (*icmpReasonNetUnreachable) isForwarding() bool { + // If we hit a Network Unreachable error, then we also know we are + // operating as a router. As per RFC 4443 section 3.1: + // + // If the reason for the failure to deliver is lack of a matching + // entry in the forwarding node's routing table, the Code field is + // set to 0 (Network Unreachable). + return true +} + +func (*icmpReasonNetUnreachable) respondsToMulticast() bool { + return false +} + +// icmpReasonHostUnreachable is an error in which the host specified in the +// internet destination field of the datagram is unreachable. +type icmpReasonHostUnreachable struct{} + +func (*icmpReasonHostUnreachable) isICMPReason() {} +func (*icmpReasonHostUnreachable) isForwarding() bool { + // If we hit a Host Unreachable error, then we know we are operating as a + // router. As per RFC 4443 page 8, Destination Unreachable Message, + // + // If the reason for the failure to deliver cannot be mapped to any of + // other codes, the Code field is set to 3. Example of such cases are + // an inability to resolve the IPv6 destination address into a + // corresponding link address, or a link-specific problem of some sort. + return true +} + +func (*icmpReasonHostUnreachable) respondsToMulticast() bool { + return false +} + +// icmpReasonFragmentationNeeded is an error where a packet is to big to be sent +// out through the outgoing MTU, as per RFC 4443 page 9, Packet Too Big Message. +type icmpReasonPacketTooBig struct{} + +func (*icmpReasonPacketTooBig) isICMPReason() {} + +func (*icmpReasonPacketTooBig) isForwarding() bool { + // If we hit a Packet Too Big error, then we know we are operating as a router. + // As per RFC 4443 section 3.2: + // + // A Packet Too Big MUST be sent by a router in response to a packet that it + // cannot forward because the packet is larger than the MTU of the outgoing + // link. + return true +} + +func (*icmpReasonPacketTooBig) respondsToMulticast() bool { + return true +} + +// icmpReasonHopLimitExceeded is an error where a packet's hop limit exceeded in +// transit to its final destination, as per RFC 4443 section 3.3. +type icmpReasonHopLimitExceeded struct{} + +func (*icmpReasonHopLimitExceeded) isICMPReason() {} + +func (*icmpReasonHopLimitExceeded) isForwarding() bool { + // If we hit a Hop Limit Exceeded error, then we know we are operating + // as a router. As per RFC 4443 section 3.3: + // + // If a router receives a packet with a Hop Limit of zero, or if a + // router decrements a packet's Hop Limit to zero, it MUST discard + // the packet and originate an ICMPv6 Time Exceeded message with Code + // 0 to the source of the packet. This indicates either a routing + // loop or too small an initial Hop Limit value. + return true +} + +func (*icmpReasonHopLimitExceeded) respondsToMulticast() bool { + return false +} + +// icmpReasonReassemblyTimeout is an error where insufficient fragments are +// received to complete reassembly of a packet within a configured time after +// the reception of the first-arriving fragment of that packet. +type icmpReasonReassemblyTimeout struct{} + +func (*icmpReasonReassemblyTimeout) isICMPReason() {} + +func (*icmpReasonReassemblyTimeout) isForwarding() bool { + return false +} + +func (*icmpReasonReassemblyTimeout) respondsToMulticast() bool { + return false +} + +// returnError takes an error descriptor and generates the appropriate ICMP +// error packet for IPv6 and sends it. +func (p *protocol) returnError(reason icmpReason, pkt *stack.PacketBuffer) tcpip.Error { + origIPHdr := header.IPv6(pkt.NetworkHeader().View()) + origIPHdrSrc := origIPHdr.SourceAddress() + origIPHdrDst := origIPHdr.DestinationAddress() + + // Only send ICMP error if the address is not a multicast v6 + // address and the source is not the unspecified address. + // + // There are exceptions to this rule. + // See: point e.3) RFC 4443 section-2.4 + // + // (e) An ICMPv6 error message MUST NOT be originated as a result of + // receiving the following: + // + // (e.1) An ICMPv6 error message. + // + // (e.2) An ICMPv6 redirect message [IPv6-DISC]. + // + // (e.3) A packet destined to an IPv6 multicast address. (There are + // two exceptions to this rule: (1) the Packet Too Big Message + // (Section 3.2) to allow Path MTU discovery to work for IPv6 + // multicast, and (2) the Parameter Problem Message, Code 2 + // (Section 3.4) reporting an unrecognized IPv6 option (see + // Section 4.2 of [IPv6]) that has the Option Type highest- + // order two bits set to 10). + // + allowResponseToMulticast := reason.respondsToMulticast() + isOrigDstMulticast := header.IsV6MulticastAddress(origIPHdrDst) + if (!allowResponseToMulticast && isOrigDstMulticast) || origIPHdrSrc == header.IPv6Any { + return nil + } + + // If we are operating as a router, do not use the packet's destination + // address as the response's source address as we should not own the + // destination address of a packet we are forwarding. + // + // If the packet was originally destined to a multicast address, then do not + // use the packet's destination address as the source for the response ICMP + // packet as "multicast addresses must not be used as source addresses in IPv6 + // packets", as per RFC 4291 section 2.7. + localAddr := origIPHdrDst + if reason.isForwarding() || isOrigDstMulticast { + localAddr = "" + } + // Even if we were able to receive a packet from some remote, we may not have + // a route to it - the remote may be blocked via routing rules. We must always + // consult our routing table and find a route to the remote before sending any + // packet. + route, err := p.stack.FindRoute(pkt.NICID, localAddr, origIPHdrSrc, ProtocolNumber, false /* multicastLoop */) + if err != nil { + return err + } + defer route.Release() + + p.mu.Lock() + // We retrieve an endpoint using the newly constructed route's NICID rather + // than the packet's NICID. The packet's NICID corresponds to the NIC on + // which it arrived, which isn't necessarily the same as the NIC on which it + // will be transmitted. On the other hand, the route's NIC *is* guaranteed + // to be the NIC on which the packet will be transmitted. + netEP, ok := p.mu.eps[route.NICID()] + p.mu.Unlock() + if !ok { + return &tcpip.ErrNotConnected{} + } + + if pkt.TransportProtocolNumber == header.ICMPv6ProtocolNumber { + // TODO(gvisor.dev/issues/3810): Sort this out when ICMP headers are stored. + // Unfortunately at this time ICMP Packets do not have a transport + // header separated out. It is in the Data part so we need to + // separate it out now. We will just pretend it is a minimal length + // ICMP packet as we don't really care if any later bits of a + // larger ICMP packet are in the header view or in the Data view. + transport, ok := pkt.TransportHeader().Consume(header.ICMPv6MinimumSize) + if !ok { + return nil + } + typ := header.ICMPv6(transport).Type() + if typ.IsErrorType() || typ == header.ICMPv6RedirectMsg { + return nil + } + } + + sent := netEP.stats.icmp.packetsSent + icmpType, icmpCode, counter, typeSpecific := func() (header.ICMPv6Type, header.ICMPv6Code, tcpip.MultiCounterStat, uint32) { + switch reason := reason.(type) { + case *icmpReasonParameterProblem: + return header.ICMPv6ParamProblem, reason.code, sent.paramProblem, reason.pointer + case *icmpReasonPortUnreachable: + return header.ICMPv6DstUnreachable, header.ICMPv6PortUnreachable, sent.dstUnreachable, 0 + case *icmpReasonNetUnreachable: + return header.ICMPv6DstUnreachable, header.ICMPv6NetworkUnreachable, sent.dstUnreachable, 0 + case *icmpReasonHostUnreachable: + return header.ICMPv6DstUnreachable, header.ICMPv6AddressUnreachable, sent.dstUnreachable, 0 + case *icmpReasonPacketTooBig: + return header.ICMPv6PacketTooBig, header.ICMPv6UnusedCode, sent.packetTooBig, 0 + case *icmpReasonHopLimitExceeded: + return header.ICMPv6TimeExceeded, header.ICMPv6HopLimitExceeded, sent.timeExceeded, 0 + case *icmpReasonReassemblyTimeout: + return header.ICMPv6TimeExceeded, header.ICMPv6ReassemblyTimeout, sent.timeExceeded, 0 + default: + panic(fmt.Sprintf("unsupported ICMP type %T", reason)) + } + }() + + if !p.allowICMPReply(icmpType) { + sent.rateLimited.Increment() + return nil + } + + network, transport := pkt.NetworkHeader().View(), pkt.TransportHeader().View() + + // As per RFC 4443 section 2.4 + // + // (c) Every ICMPv6 error message (type < 128) MUST include + // as much of the IPv6 offending (invoking) packet (the + // packet that caused the error) as possible without making + // the error message packet exceed the minimum IPv6 MTU + // [IPv6]. + mtu := int(route.MTU()) + const maxIPv6Data = header.IPv6MinimumMTU - header.IPv6FixedHeaderSize + if mtu > maxIPv6Data { + mtu = maxIPv6Data + } + available := mtu - header.ICMPv6ErrorHeaderSize + if available < header.IPv6MinimumSize { + return nil + } + payloadLen := network.Size() + transport.Size() + pkt.Data().Size() + if payloadLen > available { + payloadLen = available + } + payload := network.ToVectorisedView() + payload.AppendView(transport) + payload.Append(pkt.Data().ExtractVV()) + payload.CapLength(payloadLen) + + newPkt := stack.NewPacketBuffer(stack.PacketBufferOptions{ + ReserveHeaderBytes: int(route.MaxHeaderLength()) + header.ICMPv6ErrorHeaderSize, + Data: payload, + }) + newPkt.TransportProtocolNumber = header.ICMPv6ProtocolNumber + + icmpHdr := header.ICMPv6(newPkt.TransportHeader().Push(header.ICMPv6DstUnreachableMinimumSize)) + icmpHdr.SetType(icmpType) + icmpHdr.SetCode(icmpCode) + icmpHdr.SetTypeSpecific(typeSpecific) + + dataRange := newPkt.Data().AsRange() + icmpHdr.SetChecksum(header.ICMPv6Checksum(header.ICMPv6ChecksumParams{ + Header: icmpHdr, + Src: route.LocalAddress(), + Dst: route.RemoteAddress(), + PayloadCsum: dataRange.Checksum(), + PayloadLen: dataRange.Size(), + })) + if err := route.WritePacket( + stack.NetworkHeaderParams{ + Protocol: header.ICMPv6ProtocolNumber, + TTL: route.DefaultTTL(), + TOS: stack.DefaultTOS, + }, + newPkt, + ); err != nil { + sent.dropped.Increment() + return err + } + counter.Increment() + return nil +} + +// OnReassemblyTimeout implements fragmentation.TimeoutHandler. +func (p *protocol) OnReassemblyTimeout(pkt *stack.PacketBuffer) { + // OnReassemblyTimeout sends a Time Exceeded Message as per RFC 2460 Section + // 4.5: + // + // If the first fragment (i.e., the one with a Fragment Offset of zero) has + // been received, an ICMP Time Exceeded -- Fragment Reassembly Time Exceeded + // message should be sent to the source of that fragment. + if pkt != nil { + p.returnError(&icmpReasonReassemblyTimeout{}, pkt) + } +} diff --git a/vendor/gvisor.dev/gvisor/pkg/tcpip/network/ipv6/ipv6.go b/vendor/gvisor.dev/gvisor/pkg/tcpip/network/ipv6/ipv6.go new file mode 100644 index 000000000..e2d2cf907 --- /dev/null +++ b/vendor/gvisor.dev/gvisor/pkg/tcpip/network/ipv6/ipv6.go @@ -0,0 +1,2369 @@ +// Copyright 2020 The gVisor Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +// Package ipv6 contains the implementation of the ipv6 network protocol. +package ipv6 + +import ( + "encoding/binary" + "fmt" + "hash/fnv" + "math" + "reflect" + "sort" + "sync/atomic" + "time" + + "gvisor.dev/gvisor/pkg/sync" + "gvisor.dev/gvisor/pkg/tcpip" + "gvisor.dev/gvisor/pkg/tcpip/buffer" + "gvisor.dev/gvisor/pkg/tcpip/header" + "gvisor.dev/gvisor/pkg/tcpip/header/parse" + "gvisor.dev/gvisor/pkg/tcpip/network/hash" + "gvisor.dev/gvisor/pkg/tcpip/network/internal/fragmentation" + "gvisor.dev/gvisor/pkg/tcpip/network/internal/ip" + "gvisor.dev/gvisor/pkg/tcpip/stack" +) + +const ( + // ReassembleTimeout controls how long a fragment will be held. + // As per RFC 8200 section 4.5: + // + // If insufficient fragments are received to complete reassembly of a packet + // within 60 seconds of the reception of the first-arriving fragment of that + // packet, reassembly of that packet must be abandoned. + // + // Linux also uses 60 seconds for reassembly timeout: + // https://github.com/torvalds/linux/blob/47ec5303d73ea344e84f46660fff693c57641386/include/net/ipv6.h#L456 + ReassembleTimeout = 60 * time.Second + + // ProtocolNumber is the ipv6 protocol number. + ProtocolNumber = header.IPv6ProtocolNumber + + // maxPayloadSize is the maximum size that can be encoded in the 16-bit + // PayloadLength field of the ipv6 header. + maxPayloadSize = 0xffff + + // DefaultTTL is the default hop limit for IPv6 Packets egressed by + // Netstack. + DefaultTTL = 64 + + // buckets for fragment identifiers + buckets = 2048 +) + +const ( + forwardingDisabled = 0 + forwardingEnabled = 1 +) + +// policyTable is the default policy table defined in RFC 6724 section 2.1. +// +// A more human-readable version: +// +// Prefix Precedence Label +// ::1/128 50 0 +// ::/0 40 1 +// ::ffff:0:0/96 35 4 +// 2002::/16 30 2 +// 2001::/32 5 5 +// fc00::/7 3 13 +// ::/96 1 3 +// fec0::/10 1 11 +// 3ffe::/16 1 12 +// +// The table is sorted by prefix length so longest-prefix match can be easily +// achieved. +// +// We willingly left out ::/96, fec0::/10 and 3ffe::/16 since those prefix +// assignments are deprecated. +// +// As per RFC 4291 section 2.5.5.1 (for ::/96), +// +// The "IPv4-Compatible IPv6 address" is now deprecated because the +// current IPv6 transition mechanisms no longer use these addresses. +// New or updated implementations are not required to support this +// address type. +// +// As per RFC 3879 section 4 (for fec0::/10), +// +// This document formally deprecates the IPv6 site-local unicast prefix +// defined in [RFC3513], i.e., 1111111011 binary or FEC0::/10. +// +// As per RFC 3701 section 1 (for 3ffe::/16), +// +// As clearly stated in [TEST-NEW], the addresses for the 6bone are +// temporary and will be reclaimed in the future. It further states +// that all users of these addresses (within the 3FFE::/16 prefix) will +// be required to renumber at some time in the future. +// +// and section 2, +// +// Thus after the pTLA allocation cutoff date January 1, 2004, it is +// REQUIRED that no new 6bone 3FFE pTLAs be allocated. +// +// MUST NOT BE MODIFIED. +var policyTable = [...]struct { + subnet tcpip.Subnet + + label uint8 +}{ + // ::1/128 + { + subnet: header.IPv6Loopback.WithPrefix().Subnet(), + label: 0, + }, + // ::ffff:0:0/96 + { + subnet: header.IPv4MappedIPv6Subnet, + label: 4, + }, + // 2001::/32 (Teredo prefix as per RFC 4380 section 2.6). + { + subnet: tcpip.AddressWithPrefix{ + Address: "\x20\x01\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00", + PrefixLen: 32, + }.Subnet(), + label: 5, + }, + // 2002::/16 (6to4 prefix as per RFC 3056 section 2). + { + subnet: tcpip.AddressWithPrefix{ + Address: "\x20\x02\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00", + PrefixLen: 16, + }.Subnet(), + label: 2, + }, + // fc00::/7 (Unique local addresses as per RFC 4193 section 3.1). + { + subnet: tcpip.AddressWithPrefix{ + Address: "\xfc\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00", + PrefixLen: 7, + }.Subnet(), + label: 13, + }, + // ::/0 + { + subnet: header.IPv6EmptySubnet, + label: 1, + }, +} + +func getLabel(addr tcpip.Address) uint8 { + for _, p := range policyTable { + if p.subnet.Contains(addr) { + return p.label + } + } + + panic(fmt.Sprintf("should have a label for address = %s", addr)) +} + +var _ stack.DuplicateAddressDetector = (*endpoint)(nil) +var _ stack.LinkAddressResolver = (*endpoint)(nil) +var _ stack.LinkResolvableNetworkEndpoint = (*endpoint)(nil) +var _ stack.ForwardingNetworkEndpoint = (*endpoint)(nil) +var _ stack.GroupAddressableEndpoint = (*endpoint)(nil) +var _ stack.AddressableEndpoint = (*endpoint)(nil) +var _ stack.NetworkEndpoint = (*endpoint)(nil) +var _ stack.NDPEndpoint = (*endpoint)(nil) +var _ NDPEndpoint = (*endpoint)(nil) + +type endpoint struct { + nic stack.NetworkInterface + dispatcher stack.TransportDispatcher + protocol *protocol + stats sharedStats + + // enabled is set to 1 when the endpoint is enabled and 0 when it is + // disabled. + // + // Must be accessed using atomic operations. + enabled uint32 + + // forwarding is set to forwardingEnabled when the endpoint has forwarding + // enabled and forwardingDisabled when it is disabled. + // + // Must be accessed using atomic operations. + forwarding uint32 + + mu struct { + sync.RWMutex + + addressableEndpointState stack.AddressableEndpointState + ndp ndpState + mld mldState + } + + // dad is used to check if an arbitrary address is already assigned to some + // neighbor. + // + // Note: this is different from mu.ndp.dad which is used to perform DAD for + // addresses that are assigned to the interface. Removing an address aborts + // DAD; if we had used the same state, handlers for a removed address would + // not be called with the actual DAD result. + // + // LOCK ORDERING: mu > dad.mu. + dad struct { + mu struct { + sync.Mutex + + dad ip.DAD + } + } +} + +// NICNameFromID is a function that returns a stable name for the specified NIC, +// even if different NIC IDs are used to refer to the same NIC in different +// program runs. It is used when generating opaque interface identifiers (IIDs). +// If the NIC was created with a name, it is passed to NICNameFromID. +// +// NICNameFromID SHOULD return unique NIC names so unique opaque IIDs are +// generated for the same prefix on different NICs. +type NICNameFromID func(tcpip.NICID, string) string + +// OpaqueInterfaceIdentifierOptions holds the options related to the generation +// of opaque interface identifiers (IIDs) as defined by RFC 7217. +type OpaqueInterfaceIdentifierOptions struct { + // NICNameFromID is a function that returns a stable name for a specified NIC, + // even if the NIC ID changes over time. + // + // Must be specified to generate the opaque IID. + NICNameFromID NICNameFromID + + // SecretKey is a pseudo-random number used as the secret key when generating + // opaque IIDs as defined by RFC 7217. The key SHOULD be at least + // header.OpaqueIIDSecretKeyMinBytes bytes and MUST follow minimum randomness + // requirements for security as outlined by RFC 4086. SecretKey MUST NOT + // change between program runs, unless explicitly changed. + // + // OpaqueInterfaceIdentifierOptions takes ownership of SecretKey. SecretKey + // MUST NOT be modified after Stack is created. + // + // May be nil, but a nil value is highly discouraged to maintain + // some level of randomness between nodes. + SecretKey []byte +} + +// CheckDuplicateAddress implements stack.DuplicateAddressDetector. +func (e *endpoint) CheckDuplicateAddress(addr tcpip.Address, h stack.DADCompletionHandler) stack.DADCheckAddressDisposition { + e.dad.mu.Lock() + defer e.dad.mu.Unlock() + return e.dad.mu.dad.CheckDuplicateAddressLocked(addr, h) +} + +// SetDADConfigurations implements stack.DuplicateAddressDetector. +func (e *endpoint) SetDADConfigurations(c stack.DADConfigurations) { + e.mu.Lock() + defer e.mu.Unlock() + e.dad.mu.Lock() + defer e.dad.mu.Unlock() + + e.mu.ndp.dad.SetConfigsLocked(c) + e.dad.mu.dad.SetConfigsLocked(c) +} + +// DuplicateAddressProtocol implements stack.DuplicateAddressDetector. +func (*endpoint) DuplicateAddressProtocol() tcpip.NetworkProtocolNumber { + return ProtocolNumber +} + +// HandleLinkResolutionFailure implements stack.LinkResolvableNetworkEndpoint. +func (e *endpoint) HandleLinkResolutionFailure(pkt *stack.PacketBuffer) { + // If we are operating as a router, we should return an ICMP error to the + // original packet's sender. + if pkt.NetworkPacketInfo.IsForwardedPacket { + // TODO(gvisor.dev/issue/6005): Propagate asynchronously generated ICMP + // errors to local endpoints. + e.protocol.returnError(&icmpReasonHostUnreachable{}, pkt) + e.stats.ip.Forwarding.Errors.Increment() + e.stats.ip.Forwarding.HostUnreachable.Increment() + return + } + // handleControl expects the entire offending packet to be in the packet + // buffer's data field. + pkt = stack.NewPacketBuffer(stack.PacketBufferOptions{ + Data: buffer.NewVectorisedView(pkt.Size(), pkt.Views()), + }) + pkt.NICID = e.nic.ID() + pkt.NetworkProtocolNumber = ProtocolNumber + e.handleControl(&icmpv6DestinationAddressUnreachableSockError{}, pkt) +} + +// onAddressAssignedLocked handles an address being assigned. +// +// Precondition: e.mu must be exclusively locked. +func (e *endpoint) onAddressAssignedLocked(addr tcpip.Address) { + // As per RFC 2710 section 3, + // + // All MLD messages described in this document are sent with a link-local + // IPv6 Source Address, ... + // + // If we just completed DAD for a link-local address, then attempt to send any + // queued MLD reports. Note, we may have sent reports already for some of the + // groups before we had a valid link-local address to use as the source for + // the MLD messages, but that was only so that MLD snooping switches are aware + // of our membership to groups - routers would not have handled those reports. + // + // As per RFC 3590 section 4, + // + // MLD Report and Done messages are sent with a link-local address as + // the IPv6 source address, if a valid address is available on the + // interface. If a valid link-local address is not available (e.g., one + // has not been configured), the message is sent with the unspecified + // address (::) as the IPv6 source address. + // + // Once a valid link-local address is available, a node SHOULD generate + // new MLD Report messages for all multicast addresses joined on the + // interface. + // + // Routers receiving an MLD Report or Done message with the unspecified + // address as the IPv6 source address MUST silently discard the packet + // without taking any action on the packets contents. + // + // Snooping switches MUST manage multicast forwarding state based on MLD + // Report and Done messages sent with the unspecified address as the + // IPv6 source address. + if header.IsV6LinkLocalUnicastAddress(addr) { + e.mu.mld.sendQueuedReports() + } +} + +// InvalidateDefaultRouter implements stack.NDPEndpoint. +func (e *endpoint) InvalidateDefaultRouter(rtr tcpip.Address) { + e.mu.Lock() + defer e.mu.Unlock() + + // We represent default routers with a default (off-link) route through the + // router. + e.mu.ndp.invalidateOffLinkRoute(offLinkRoute{dest: header.IPv6EmptySubnet, router: rtr}) +} + +// SetNDPConfigurations implements NDPEndpoint. +func (e *endpoint) SetNDPConfigurations(c NDPConfigurations) { + c.validate() + e.mu.Lock() + defer e.mu.Unlock() + e.mu.ndp.configs = c +} + +// hasTentativeAddr returns true if addr is tentative on e. +func (e *endpoint) hasTentativeAddr(addr tcpip.Address) bool { + e.mu.RLock() + addressEndpoint := e.getAddressRLocked(addr) + e.mu.RUnlock() + return addressEndpoint != nil && addressEndpoint.GetKind() == stack.PermanentTentative +} + +// dupTentativeAddrDetected attempts to inform e that a tentative addr is a +// duplicate on a link. +// +// dupTentativeAddrDetected removes the tentative address if it exists. If the +// address was generated via SLAAC, an attempt is made to generate a new +// address. +func (e *endpoint) dupTentativeAddrDetected(addr tcpip.Address, holderLinkAddr tcpip.LinkAddress, nonce []byte) tcpip.Error { + e.mu.Lock() + defer e.mu.Unlock() + + addressEndpoint := e.getAddressRLocked(addr) + if addressEndpoint == nil { + return &tcpip.ErrBadAddress{} + } + + if addressEndpoint.GetKind() != stack.PermanentTentative { + return &tcpip.ErrInvalidEndpointState{} + } + + switch result := e.mu.ndp.dad.ExtendIfNonceEqualLocked(addr, nonce); result { + case ip.Extended: + // The nonce we got back was the same we sent so we know the message + // indicating a duplicate address was likely ours so do not consider + // the address duplicate here. + return nil + case ip.AlreadyExtended: + // See Extended. + // + // Our DAD message was looped back already. + return nil + case ip.NoDADStateFound: + panic(fmt.Sprintf("expected DAD state for tentative address %s", addr)) + case ip.NonceDisabled: + // If nonce is disabled then we have no way to know if the packet was + // looped-back so we have to assume it indicates a duplicate address. + fallthrough + case ip.NonceNotEqual: + // If the address is a SLAAC address, do not invalidate its SLAAC prefix as an + // attempt will be made to generate a new address for it. + if err := e.removePermanentEndpointLocked(addressEndpoint, false /* allowSLAACInvalidation */, &stack.DADDupAddrDetected{HolderLinkAddress: holderLinkAddr}); err != nil { + return err + } + + prefix := addressEndpoint.Subnet() + + switch t := addressEndpoint.ConfigType(); t { + case stack.AddressConfigStatic: + case stack.AddressConfigSlaac: + e.mu.ndp.regenerateSLAACAddr(prefix) + case stack.AddressConfigSlaacTemp: + // Do not reset the generation attempts counter for the prefix as the + // temporary address is being regenerated in response to a DAD conflict. + e.mu.ndp.regenerateTempSLAACAddr(prefix, false /* resetGenAttempts */) + default: + panic(fmt.Sprintf("unrecognized address config type = %d", t)) + } + + return nil + default: + panic(fmt.Sprintf("unhandled result = %d", result)) + } +} + +// Forwarding implements stack.ForwardingNetworkEndpoint. +func (e *endpoint) Forwarding() bool { + return atomic.LoadUint32(&e.forwarding) == forwardingEnabled +} + +// setForwarding sets the forwarding status for the endpoint. +// +// Returns true if the forwarding status was updated. +func (e *endpoint) setForwarding(v bool) bool { + forwarding := uint32(forwardingDisabled) + if v { + forwarding = forwardingEnabled + } + + return atomic.SwapUint32(&e.forwarding, forwarding) != forwarding +} + +// SetForwarding implements stack.ForwardingNetworkEndpoint. +func (e *endpoint) SetForwarding(forwarding bool) { + e.mu.Lock() + defer e.mu.Unlock() + + if !e.setForwarding(forwarding) { + return + } + + allRoutersGroups := [...]tcpip.Address{ + header.IPv6AllRoutersInterfaceLocalMulticastAddress, + header.IPv6AllRoutersLinkLocalMulticastAddress, + header.IPv6AllRoutersSiteLocalMulticastAddress, + } + + if forwarding { + // As per RFC 4291 section 2.8: + // + // A router is required to recognize all addresses that a host is + // required to recognize, plus the following addresses as identifying + // itself: + // + // o The All-Routers multicast addresses defined in Section 2.7.1. + // + // As per RFC 4291 section 2.7.1, + // + // All Routers Addresses: FF01:0:0:0:0:0:0:2 + // FF02:0:0:0:0:0:0:2 + // FF05:0:0:0:0:0:0:2 + // + // The above multicast addresses identify the group of all IPv6 routers, + // within scope 1 (interface-local), 2 (link-local), or 5 (site-local). + for _, g := range allRoutersGroups { + if err := e.joinGroupLocked(g); err != nil { + // joinGroupLocked only returns an error if the group address is not a + // valid IPv6 multicast address. + panic(fmt.Sprintf("e.joinGroupLocked(%s): %s", g, err)) + } + } + } else { + for _, g := range allRoutersGroups { + switch err := e.leaveGroupLocked(g).(type) { + case nil: + case *tcpip.ErrBadLocalAddress: + // The endpoint may have already left the multicast group. + default: + panic(fmt.Sprintf("e.leaveGroupLocked(%s): %s", g, err)) + } + } + } + + e.mu.ndp.forwardingChanged(forwarding) +} + +// Enable implements stack.NetworkEndpoint. +func (e *endpoint) Enable() tcpip.Error { + e.mu.Lock() + defer e.mu.Unlock() + + // If the NIC is not enabled, the endpoint can't do anything meaningful so + // don't enable the endpoint. + if !e.nic.Enabled() { + return &tcpip.ErrNotPermitted{} + } + + // If the endpoint is already enabled, there is nothing for it to do. + if !e.setEnabled(true) { + return nil + } + + // Groups may have been joined when the endpoint was disabled, or the + // endpoint may have left groups from the perspective of MLD when the + // endpoint was disabled. Either way, we need to let routers know to + // send us multicast traffic. + e.mu.mld.initializeAll() + + // Join the IPv6 All-Nodes Multicast group if the stack is configured to + // use IPv6. This is required to ensure that this node properly receives + // and responds to the various NDP messages that are destined to the + // all-nodes multicast address. An example is the Neighbor Advertisement + // when we perform Duplicate Address Detection, or Router Advertisement + // when we do Router Discovery. See RFC 4862, section 5.4.2 and RFC 4861 + // section 4.2 for more information. + // + // Also auto-generate an IPv6 link-local address based on the endpoint's + // link address if it is configured to do so. Note, each interface is + // required to have IPv6 link-local unicast address, as per RFC 4291 + // section 2.1. + + // Join the All-Nodes multicast group before starting DAD as responses to DAD + // (NDP NS) messages may be sent to the All-Nodes multicast group if the + // source address of the NDP NS is the unspecified address, as per RFC 4861 + // section 7.2.4. + if err := e.joinGroupLocked(header.IPv6AllNodesMulticastAddress); err != nil { + // joinGroupLocked only returns an error if the group address is not a valid + // IPv6 multicast address. + panic(fmt.Sprintf("e.joinGroupLocked(%s): %s", header.IPv6AllNodesMulticastAddress, err)) + } + + // Perform DAD on the all the unicast IPv6 endpoints that are in the permanent + // state. + // + // Addresses may have already completed DAD but in the time since the endpoint + // was last enabled, other devices may have acquired the same addresses. + var err tcpip.Error + e.mu.addressableEndpointState.ForEachEndpoint(func(addressEndpoint stack.AddressEndpoint) bool { + addr := addressEndpoint.AddressWithPrefix().Address + if !header.IsV6UnicastAddress(addr) { + return true + } + + switch addressEndpoint.GetKind() { + case stack.Permanent: + addressEndpoint.SetKind(stack.PermanentTentative) + fallthrough + case stack.PermanentTentative: + err = e.mu.ndp.startDuplicateAddressDetection(addr, addressEndpoint) + return err == nil + default: + return true + } + }) + if err != nil { + return err + } + + // Do not auto-generate an IPv6 link-local address for loopback devices. + if e.protocol.options.AutoGenLinkLocal && !e.nic.IsLoopback() { + // The valid and preferred lifetime is infinite for the auto-generated + // link-local address. + e.mu.ndp.doSLAAC(header.IPv6LinkLocalPrefix.Subnet(), header.NDPInfiniteLifetime, header.NDPInfiniteLifetime) + } + + e.mu.ndp.startSolicitingRouters() + return nil +} + +// Enabled implements stack.NetworkEndpoint. +func (e *endpoint) Enabled() bool { + return e.nic.Enabled() && e.isEnabled() +} + +// isEnabled returns true if the endpoint is enabled, regardless of the +// enabled status of the NIC. +func (e *endpoint) isEnabled() bool { + return atomic.LoadUint32(&e.enabled) == 1 +} + +// setEnabled sets the enabled status for the endpoint. +// +// Returns true if the enabled status was updated. +func (e *endpoint) setEnabled(v bool) bool { + if v { + return atomic.SwapUint32(&e.enabled, 1) == 0 + } + return atomic.SwapUint32(&e.enabled, 0) == 1 +} + +// Disable implements stack.NetworkEndpoint. +func (e *endpoint) Disable() { + e.mu.Lock() + defer e.mu.Unlock() + e.disableLocked() +} + +func (e *endpoint) disableLocked() { + if !e.Enabled() { + return + } + + e.mu.ndp.stopSolicitingRouters() + // Stop DAD for all the tentative unicast addresses. + e.mu.addressableEndpointState.ForEachEndpoint(func(addressEndpoint stack.AddressEndpoint) bool { + if addressEndpoint.GetKind() != stack.PermanentTentative { + return true + } + + addr := addressEndpoint.AddressWithPrefix().Address + if header.IsV6UnicastAddress(addr) { + e.mu.ndp.stopDuplicateAddressDetection(addr, &stack.DADAborted{}) + } + + return true + }) + e.mu.ndp.cleanupState() + + // The endpoint may have already left the multicast group. + switch err := e.leaveGroupLocked(header.IPv6AllNodesMulticastAddress).(type) { + case nil, *tcpip.ErrBadLocalAddress: + default: + panic(fmt.Sprintf("unexpected error when leaving group = %s: %s", header.IPv6AllNodesMulticastAddress, err)) + } + + // Leave groups from the perspective of MLD so that routers know that + // we are no longer interested in the group. + e.mu.mld.softLeaveAll() + + if !e.setEnabled(false) { + panic("should have only done work to disable the endpoint if it was enabled") + } +} + +// DefaultTTL is the default hop limit for this endpoint. +func (e *endpoint) DefaultTTL() uint8 { + return e.protocol.DefaultTTL() +} + +// MTU implements stack.NetworkEndpoint. It returns the link-layer MTU minus the +// network layer max header length. +func (e *endpoint) MTU() uint32 { + networkMTU, err := calculateNetworkMTU(e.nic.MTU(), header.IPv6MinimumSize) + if err != nil { + return 0 + } + return networkMTU +} + +// MaxHeaderLength returns the maximum length needed by ipv6 headers (and +// underlying protocols). +func (e *endpoint) MaxHeaderLength() uint16 { + // TODO(gvisor.dev/issues/5035): The maximum header length returned here does + // not open the possibility for the caller to know about size required for + // extension headers. + return e.nic.MaxHeaderLength() + header.IPv6MinimumSize +} + +func addIPHeader(srcAddr, dstAddr tcpip.Address, pkt *stack.PacketBuffer, params stack.NetworkHeaderParams, extensionHeaders header.IPv6ExtHdrSerializer) tcpip.Error { + extHdrsLen := extensionHeaders.Length() + length := pkt.Size() + extensionHeaders.Length() + if length > math.MaxUint16 { + return &tcpip.ErrMessageTooLong{} + } + header.IPv6(pkt.NetworkHeader().Push(header.IPv6MinimumSize + extHdrsLen)).Encode(&header.IPv6Fields{ + PayloadLength: uint16(length), + TransportProtocol: params.Protocol, + HopLimit: params.TTL, + TrafficClass: params.TOS, + SrcAddr: srcAddr, + DstAddr: dstAddr, + ExtensionHeaders: extensionHeaders, + }) + pkt.NetworkProtocolNumber = ProtocolNumber + return nil +} + +func packetMustBeFragmented(pkt *stack.PacketBuffer, networkMTU uint32) bool { + payload := pkt.TransportHeader().View().Size() + pkt.Data().Size() + return pkt.GSOOptions.Type == stack.GSONone && uint32(payload) > networkMTU +} + +// handleFragments fragments pkt and calls the handler function on each +// fragment. It returns the number of fragments handled and the number of +// fragments left to be processed. The IP header must already be present in the +// original packet. The transport header protocol number is required to avoid +// parsing the IPv6 extension headers. +func (e *endpoint) handleFragments(r *stack.Route, networkMTU uint32, pkt *stack.PacketBuffer, transProto tcpip.TransportProtocolNumber, handler func(*stack.PacketBuffer) tcpip.Error) (int, int, tcpip.Error) { + networkHeader := header.IPv6(pkt.NetworkHeader().View()) + + // TODO(gvisor.dev/issue/3912): Once the Authentication or ESP Headers are + // supported for outbound packets, their length should not affect the fragment + // maximum payload length because they should only be transmitted once. + fragmentPayloadLen := (networkMTU - header.IPv6FragmentHeaderSize) &^ 7 + if fragmentPayloadLen < header.IPv6FragmentExtHdrFragmentOffsetBytesPerUnit { + // We need at least 8 bytes of space left for the fragmentable part because + // the fragment payload must obviously be non-zero and must be a multiple + // of 8 as per RFC 8200 section 4.5: + // Each complete fragment, except possibly the last ("rightmost") one, is + // an integer multiple of 8 octets long. + return 0, 1, &tcpip.ErrMessageTooLong{} + } + + if fragmentPayloadLen < uint32(pkt.TransportHeader().View().Size()) { + // As per RFC 8200 Section 4.5, the Transport Header is expected to be small + // enough to fit in the first fragment. + return 0, 1, &tcpip.ErrMessageTooLong{} + } + + pf := fragmentation.MakePacketFragmenter(pkt, fragmentPayloadLen, calculateFragmentReserve(pkt)) + id := atomic.AddUint32(&e.protocol.ids[hashRoute(r, e.protocol.hashIV)%buckets], 1) + + var n int + for { + fragPkt, more := buildNextFragment(&pf, networkHeader, transProto, id) + if err := handler(fragPkt); err != nil { + return n, pf.RemainingFragmentCount() + 1, err + } + n++ + if !more { + return n, pf.RemainingFragmentCount(), nil + } + } +} + +// WritePacket writes a packet to the given destination address and protocol. +func (e *endpoint) WritePacket(r *stack.Route, params stack.NetworkHeaderParams, pkt *stack.PacketBuffer) tcpip.Error { + if err := addIPHeader(r.LocalAddress(), r.RemoteAddress(), pkt, params, nil /* extensionHeaders */); err != nil { + return err + } + + // iptables filtering. All packets that reach here are locally + // generated. + outNicName := e.protocol.stack.FindNICNameFromID(e.nic.ID()) + if ok := e.protocol.stack.IPTables().CheckOutput(pkt, r, outNicName); !ok { + // iptables is telling us to drop the packet. + e.stats.ip.IPTablesOutputDropped.Increment() + return nil + } + + // If the packet is manipulated as per NAT Output rules, handle packet + // based on destination address and do not send the packet to link + // layer. + // + // We should do this for every packet, rather than only NATted packets, but + // removing this check short circuits broadcasts before they are sent out to + // other hosts. + if pkt.NatDone { + netHeader := header.IPv6(pkt.NetworkHeader().View()) + if ep := e.protocol.findEndpointWithAddress(netHeader.DestinationAddress()); ep != nil { + // Since we rewrote the packet but it is being routed back to us, we + // can safely assume the checksum is valid. + ep.handleLocalPacket(pkt, true /* canSkipRXChecksum */) + return nil + } + } + + return e.writePacket(r, pkt, params.Protocol, false /* headerIncluded */) +} + +func (e *endpoint) writePacket(r *stack.Route, pkt *stack.PacketBuffer, protocol tcpip.TransportProtocolNumber, headerIncluded bool) tcpip.Error { + if r.Loop()&stack.PacketLoop != 0 { + // If the packet was generated by the stack (not a raw/packet endpoint + // where a packet may be written with the header included), then we can + // safely assume the checksum is valid. + e.handleLocalPacket(pkt, !headerIncluded /* canSkipRXChecksum */) + } + if r.Loop()&stack.PacketOut == 0 { + return nil + } + + // Postrouting NAT can only change the source address, and does not alter the + // route or outgoing interface of the packet. + outNicName := e.protocol.stack.FindNICNameFromID(e.nic.ID()) + if ok := e.protocol.stack.IPTables().CheckPostrouting(pkt, r, outNicName); !ok { + // iptables is telling us to drop the packet. + e.stats.ip.IPTablesPostroutingDropped.Increment() + return nil + } + + stats := e.stats.ip + networkMTU, err := calculateNetworkMTU(e.nic.MTU(), uint32(pkt.NetworkHeader().View().Size())) + if err != nil { + stats.OutgoingPacketErrors.Increment() + return err + } + + if packetMustBeFragmented(pkt, networkMTU) { + if pkt.NetworkPacketInfo.IsForwardedPacket { + // As per RFC 2460, section 4.5: + // Unlike IPv4, fragmentation in IPv6 is performed only by source nodes, + // not by routers along a packet's delivery path. + return &tcpip.ErrMessageTooLong{} + } + sent, remain, err := e.handleFragments(r, networkMTU, pkt, protocol, func(fragPkt *stack.PacketBuffer) tcpip.Error { + // TODO(gvisor.dev/issue/3884): Evaluate whether we want to send each + // fragment one by one using WritePacket() (current strategy) or if we + // want to create a PacketBufferList from the fragments and feed it to + // WritePackets(). It'll be faster but cost more memory. + return e.nic.WritePacket(r, ProtocolNumber, fragPkt) + }) + stats.PacketsSent.IncrementBy(uint64(sent)) + stats.OutgoingPacketErrors.IncrementBy(uint64(remain)) + return err + } + + if err := e.nic.WritePacket(r, ProtocolNumber, pkt); err != nil { + stats.OutgoingPacketErrors.Increment() + return err + } + + stats.PacketsSent.Increment() + return nil +} + +// WritePackets implements stack.NetworkEndpoint. +func (e *endpoint) WritePackets(r *stack.Route, pkts stack.PacketBufferList, params stack.NetworkHeaderParams) (int, tcpip.Error) { + if r.Loop()&stack.PacketLoop != 0 { + panic("not implemented") + } + if r.Loop()&stack.PacketOut == 0 { + return pkts.Len(), nil + } + + stats := e.stats.ip + linkMTU := e.nic.MTU() + for pb := pkts.Front(); pb != nil; pb = pb.Next() { + if err := addIPHeader(r.LocalAddress(), r.RemoteAddress(), pb, params, nil /* extensionHeaders */); err != nil { + return 0, err + } + + networkMTU, err := calculateNetworkMTU(linkMTU, uint32(pb.NetworkHeader().View().Size())) + if err != nil { + stats.OutgoingPacketErrors.IncrementBy(uint64(pkts.Len())) + return 0, err + } + if packetMustBeFragmented(pb, networkMTU) { + // Keep track of the packet that is about to be fragmented so it can be + // removed once the fragmentation is done. + originalPkt := pb + if _, _, err := e.handleFragments(r, networkMTU, pb, params.Protocol, func(fragPkt *stack.PacketBuffer) tcpip.Error { + // Modify the packet list in place with the new fragments. + pkts.InsertAfter(pb, fragPkt) + pb = fragPkt + return nil + }); err != nil { + stats.OutgoingPacketErrors.IncrementBy(uint64(pkts.Len())) + return 0, err + } + // Remove the packet that was just fragmented and process the rest. + pkts.Remove(originalPkt) + } + } + + // iptables filtering. All packets that reach here are locally + // generated. + outNicName := e.protocol.stack.FindNICNameFromID(e.nic.ID()) + outputDropped, natPkts := e.protocol.stack.IPTables().CheckOutputPackets(pkts, r, outNicName) + stats.IPTablesOutputDropped.IncrementBy(uint64(len(outputDropped))) + for pkt := range outputDropped { + pkts.Remove(pkt) + } + + // The NAT-ed packets may now be destined for us. + locallyDelivered := 0 + for pkt := range natPkts { + ep := e.protocol.findEndpointWithAddress(header.IPv6(pkt.NetworkHeader().View()).DestinationAddress()) + if ep == nil { + // The NAT-ed packet is still destined for some remote node. + continue + } + + // Do not send the locally destined packet out the NIC. + pkts.Remove(pkt) + + // Deliver the packet locally. + ep.handleLocalPacket(pkt, true /* canSkipRXChecksum */) + locallyDelivered++ + } + + // We ignore the list of NAT-ed packets here because Postrouting NAT can only + // change the source address, and does not alter the route or outgoing + // interface of the packet. + postroutingDropped, _ := e.protocol.stack.IPTables().CheckPostroutingPackets(pkts, r, outNicName) + stats.IPTablesPostroutingDropped.IncrementBy(uint64(len(postroutingDropped))) + for pkt := range postroutingDropped { + pkts.Remove(pkt) + } + + // The rest of the packets can be delivered to the NIC as a batch. + pktsLen := pkts.Len() + written, err := e.nic.WritePackets(r, pkts, ProtocolNumber) + stats.PacketsSent.IncrementBy(uint64(written)) + stats.OutgoingPacketErrors.IncrementBy(uint64(pktsLen - written)) + + // Dropped packets aren't errors, so include them in the return value. + return locallyDelivered + written + len(outputDropped) + len(postroutingDropped), err +} + +// WriteHeaderIncludedPacket implements stack.NetworkEndpoint. +func (e *endpoint) WriteHeaderIncludedPacket(r *stack.Route, pkt *stack.PacketBuffer) tcpip.Error { + // The packet already has an IP header, but there are a few required checks. + h, ok := pkt.Data().PullUp(header.IPv6MinimumSize) + if !ok { + return &tcpip.ErrMalformedHeader{} + } + ipH := header.IPv6(h) + + // Always set the payload length. + pktSize := pkt.Data().Size() + ipH.SetPayloadLength(uint16(pktSize - header.IPv6MinimumSize)) + + // Set the source address when zero. + if ipH.SourceAddress() == header.IPv6Any { + ipH.SetSourceAddress(r.LocalAddress()) + } + + // Populate the packet buffer's network header and don't allow an invalid + // packet to be sent. + // + // Note that parsing only makes sure that the packet is well formed as per the + // wire format. We also want to check if the header's fields are valid before + // sending the packet. + proto, _, _, _, ok := parse.IPv6(pkt) + if !ok || !header.IPv6(pkt.NetworkHeader().View()).IsValid(pktSize) { + return &tcpip.ErrMalformedHeader{} + } + + return e.writePacket(r, pkt, proto, true /* headerIncluded */) +} + +// forwardPacket attempts to forward a packet to its final destination. +func (e *endpoint) forwardPacket(pkt *stack.PacketBuffer) ip.ForwardingError { + h := header.IPv6(pkt.NetworkHeader().View()) + + dstAddr := h.DestinationAddress() + // As per RFC 4291 section 2.5.6, + // + // Routers must not forward any packets with Link-Local source or + // destination addresses to other links. + if header.IsV6LinkLocalUnicastAddress(h.SourceAddress()) { + return &ip.ErrLinkLocalSourceAddress{} + } + if header.IsV6LinkLocalUnicastAddress(dstAddr) || header.IsV6LinkLocalMulticastAddress(dstAddr) { + return &ip.ErrLinkLocalDestinationAddress{} + } + + hopLimit := h.HopLimit() + if hopLimit <= 1 { + // As per RFC 4443 section 3.3, + // + // If a router receives a packet with a Hop Limit of zero, or if a + // router decrements a packet's Hop Limit to zero, it MUST discard the + // packet and originate an ICMPv6 Time Exceeded message with Code 0 to + // the source of the packet. This indicates either a routing loop or + // too small an initial Hop Limit value. + // + // We return the original error rather than the result of returning + // the ICMP packet because the original error is more relevant to + // the caller. + _ = e.protocol.returnError(&icmpReasonHopLimitExceeded{}, pkt) + return &ip.ErrTTLExceeded{} + } + + stk := e.protocol.stack + + // Check if the destination is owned by the stack. + if ep := e.protocol.findEndpointWithAddress(dstAddr); ep != nil { + inNicName := stk.FindNICNameFromID(e.nic.ID()) + outNicName := stk.FindNICNameFromID(ep.nic.ID()) + if ok := stk.IPTables().CheckForward(pkt, inNicName, outNicName); !ok { + // iptables is telling us to drop the packet. + e.stats.ip.IPTablesForwardDropped.Increment() + return nil + } + + // The packet originally arrived on e so provide its NIC as the input NIC. + ep.handleValidatedPacket(h, pkt, e.nic.Name() /* inNICName */) + return nil + } + + // Check extension headers for any errors requiring action during forwarding. + if err := e.processExtensionHeaders(h, pkt, true /* forwarding */); err != nil { + return &ip.ErrParameterProblem{} + } + + r, err := stk.FindRoute(0, "", dstAddr, ProtocolNumber, false /* multicastLoop */) + switch err.(type) { + case nil: + case *tcpip.ErrNoRoute, *tcpip.ErrNetworkUnreachable: + // We return the original error rather than the result of returning the + // ICMP packet because the original error is more relevant to the caller. + _ = e.protocol.returnError(&icmpReasonNetUnreachable{}, pkt) + return &ip.ErrNoRoute{} + default: + return &ip.ErrOther{Err: err} + } + defer r.Release() + + inNicName := stk.FindNICNameFromID(e.nic.ID()) + outNicName := stk.FindNICNameFromID(r.NICID()) + if ok := stk.IPTables().CheckForward(pkt, inNicName, outNicName); !ok { + // iptables is telling us to drop the packet. + e.stats.ip.IPTablesForwardDropped.Increment() + return nil + } + + // We need to do a deep copy of the IP packet because + // WriteHeaderIncludedPacket takes ownership of the packet buffer, but we do + // not own it. + newPkt := pkt.DeepCopyForForwarding(int(r.MaxHeaderLength())) + newHdr := header.IPv6(newPkt.NetworkHeader().View()) + + // As per RFC 8200 section 3, + // + // Hop Limit 8-bit unsigned integer. Decremented by 1 by + // each node that forwards the packet. + newHdr.SetHopLimit(hopLimit - 1) + + forwardToEp, ok := e.protocol.getEndpointForNIC(r.NICID()) + if !ok { + // The interface was removed after we obtained the route. + return &ip.ErrOther{Err: &tcpip.ErrUnknownDevice{}} + } + + switch err := forwardToEp.writePacket(r, newPkt, newPkt.TransportProtocolNumber, true /* headerIncluded */); err.(type) { + case nil: + return nil + case *tcpip.ErrMessageTooLong: + // As per RFC 4443, section 3.2: + // A Packet Too Big MUST be sent by a router in response to a packet that + // it cannot forward because the packet is larger than the MTU of the + // outgoing link. + _ = e.protocol.returnError(&icmpReasonPacketTooBig{}, pkt) + return &ip.ErrMessageTooLong{} + default: + return &ip.ErrOther{Err: err} + } +} + +// HandlePacket is called by the link layer when new ipv6 packets arrive for +// this endpoint. +func (e *endpoint) HandlePacket(pkt *stack.PacketBuffer) { + stats := e.stats.ip + + stats.PacketsReceived.Increment() + + if !e.isEnabled() { + stats.DisabledPacketsReceived.Increment() + return + } + + h, ok := e.protocol.parseAndValidate(pkt) + if !ok { + stats.MalformedPacketsReceived.Increment() + return + } + + if !e.nic.IsLoopback() { + if !e.protocol.options.AllowExternalLoopbackTraffic { + if header.IsV6LoopbackAddress(h.SourceAddress()) { + stats.InvalidSourceAddressesReceived.Increment() + return + } + + if header.IsV6LoopbackAddress(h.DestinationAddress()) { + stats.InvalidDestinationAddressesReceived.Increment() + return + } + } + + if e.protocol.stack.HandleLocal() { + addressEndpoint := e.AcquireAssignedAddress(header.IPv6(pkt.NetworkHeader().View()).SourceAddress(), e.nic.Promiscuous(), stack.CanBePrimaryEndpoint) + if addressEndpoint != nil { + addressEndpoint.DecRef() + + // The source address is one of our own, so we never should have gotten + // a packet like this unless HandleLocal is false or our NIC is the + // loopback interface. + stats.InvalidSourceAddressesReceived.Increment() + return + } + } + + // Loopback traffic skips the prerouting chain. + inNicName := e.protocol.stack.FindNICNameFromID(e.nic.ID()) + if ok := e.protocol.stack.IPTables().CheckPrerouting(pkt, e, inNicName); !ok { + // iptables is telling us to drop the packet. + stats.IPTablesPreroutingDropped.Increment() + return + } + } + + e.handleValidatedPacket(h, pkt, e.nic.Name() /* inNICName */) +} + +// handleLocalPacket is like HandlePacket except it does not perform the +// prerouting iptables hook or check for loopback traffic that originated from +// outside of the netstack (i.e. martian loopback packets). +func (e *endpoint) handleLocalPacket(pkt *stack.PacketBuffer, canSkipRXChecksum bool) { + stats := e.stats.ip + stats.PacketsReceived.Increment() + + pkt = pkt.CloneToInbound() + pkt.RXTransportChecksumValidated = canSkipRXChecksum + + h, ok := e.protocol.parseAndValidate(pkt) + if !ok { + stats.MalformedPacketsReceived.Increment() + return + } + + e.handleValidatedPacket(h, pkt, e.nic.Name() /* inNICName */) +} + +func (e *endpoint) handleValidatedPacket(h header.IPv6, pkt *stack.PacketBuffer, inNICName string) { + pkt.NICID = e.nic.ID() + + // Raw socket packets are delivered based solely on the transport protocol + // number. We only require that the packet be valid IPv6. + e.dispatcher.DeliverRawPacket(h.TransportProtocol(), pkt) + + stats := e.stats.ip + stats.ValidPacketsReceived.Increment() + + srcAddr := h.SourceAddress() + dstAddr := h.DestinationAddress() + + // As per RFC 4291 section 2.7: + // Multicast addresses must not be used as source addresses in IPv6 + // packets or appear in any Routing header. + if header.IsV6MulticastAddress(srcAddr) { + stats.InvalidSourceAddressesReceived.Increment() + return + } + + // The destination address should be an address we own or a group we joined + // for us to receive the packet. Otherwise, attempt to forward the packet. + if addressEndpoint := e.AcquireAssignedAddress(dstAddr, e.nic.Promiscuous(), stack.CanBePrimaryEndpoint); addressEndpoint != nil { + addressEndpoint.DecRef() + } else if !e.IsInGroup(dstAddr) { + if !e.Forwarding() { + stats.InvalidDestinationAddressesReceived.Increment() + return + } + switch err := e.forwardPacket(pkt); err.(type) { + case nil: + return + case *ip.ErrLinkLocalSourceAddress: + e.stats.ip.Forwarding.LinkLocalSource.Increment() + case *ip.ErrLinkLocalDestinationAddress: + e.stats.ip.Forwarding.LinkLocalDestination.Increment() + case *ip.ErrTTLExceeded: + e.stats.ip.Forwarding.ExhaustedTTL.Increment() + case *ip.ErrNoRoute: + e.stats.ip.Forwarding.Unrouteable.Increment() + case *ip.ErrParameterProblem: + e.stats.ip.Forwarding.ExtensionHeaderProblem.Increment() + case *ip.ErrMessageTooLong: + e.stats.ip.Forwarding.PacketTooBig.Increment() + default: + panic(fmt.Sprintf("unexpected error %s while trying to forward packet: %#v", err, pkt)) + } + e.stats.ip.Forwarding.Errors.Increment() + return + } + + // iptables filtering. All packets that reach here are intended for + // this machine and need not be forwarded. + if ok := e.protocol.stack.IPTables().CheckInput(pkt, inNICName); !ok { + // iptables is telling us to drop the packet. + stats.IPTablesInputDropped.Increment() + return + } + + // Any returned error is only useful for terminating execution early, but + // we have nothing left to do, so we can drop it. + _ = e.processExtensionHeaders(h, pkt, false /* forwarding */) +} + +// processExtensionHeaders processes the extension headers in the given packet. +// Returns an error if the processing of a header failed or if the packet should +// be discarded. +func (e *endpoint) processExtensionHeaders(h header.IPv6, pkt *stack.PacketBuffer, forwarding bool) error { + stats := e.stats.ip + srcAddr := h.SourceAddress() + dstAddr := h.DestinationAddress() + + // Create a VV to parse the packet. We don't plan to modify anything here. + // vv consists of: + // - Any IPv6 header bytes after the first 40 (i.e. extensions). + // - The transport header, if present. + // - Any other payload data. + vv := pkt.NetworkHeader().View()[header.IPv6MinimumSize:].ToVectorisedView() + vv.AppendView(pkt.TransportHeader().View()) + vv.AppendViews(pkt.Data().Views()) + it := header.MakeIPv6PayloadIterator(header.IPv6ExtensionHeaderIdentifier(h.NextHeader()), vv) + + var ( + hasFragmentHeader bool + routerAlert *header.IPv6RouterAlertOption + ) + + for { + // Keep track of the start of the previous header so we can report the + // special case of a Hop by Hop at a location other than at the start. + previousHeaderStart := it.HeaderOffset() + extHdr, done, err := it.Next() + if err != nil { + stats.MalformedPacketsReceived.Increment() + return err + } + if done { + break + } + + // As per RFC 8200, section 4: + // + // Extension headers (except for the Hop-by-Hop Options header) are + // not processed, inserted, or deleted by any node along a packet's + // delivery path until the packet reaches the node identified in the + // Destination Address field of the IPv6 header. + // + // Furthermore, as per RFC 8200 section 4.1, the Hop By Hop extension + // header is restricted to appear first in the list of extension headers. + // + // Therefore, we can immediately return once we hit any header other + // than the Hop-by-Hop header while forwarding a packet. + if forwarding { + if _, ok := extHdr.(header.IPv6HopByHopOptionsExtHdr); !ok { + return nil + } + } + + switch extHdr := extHdr.(type) { + case header.IPv6HopByHopOptionsExtHdr: + // As per RFC 8200 section 4.1, the Hop By Hop extension header is + // restricted to appear immediately after an IPv6 fixed header. + if previousHeaderStart != 0 { + _ = e.protocol.returnError(&icmpReasonParameterProblem{ + code: header.ICMPv6UnknownHeader, + pointer: previousHeaderStart, + forwarding: forwarding, + }, pkt) + return fmt.Errorf("found Hop-by-Hop header = %#v with non-zero previous header offset = %d", extHdr, previousHeaderStart) + } + + optsIt := extHdr.Iter() + + for { + opt, done, err := optsIt.Next() + if err != nil { + stats.MalformedPacketsReceived.Increment() + return err + } + if done { + break + } + + switch opt := opt.(type) { + case *header.IPv6RouterAlertOption: + if routerAlert != nil { + // As per RFC 2711 section 3, there should be at most one Router + // Alert option per packet. + // + // There MUST only be one option of this type, regardless of + // value, per Hop-by-Hop header. + stats.MalformedPacketsReceived.Increment() + return fmt.Errorf("found multiple Router Alert options (%#v, %#v)", opt, routerAlert) + } + routerAlert = opt + stats.OptionRouterAlertReceived.Increment() + default: + switch opt.UnknownAction() { + case header.IPv6OptionUnknownActionSkip: + case header.IPv6OptionUnknownActionDiscard: + return fmt.Errorf("found unknown Hop-by-Hop header option = %#v with discard action", opt) + case header.IPv6OptionUnknownActionDiscardSendICMPNoMulticastDest: + if header.IsV6MulticastAddress(dstAddr) { + return fmt.Errorf("found unknown hop-by-hop header option = %#v with discard action", opt) + } + fallthrough + case header.IPv6OptionUnknownActionDiscardSendICMP: + // This case satisfies a requirement of RFC 8200 section 4.2 which + // states that an unknown option starting with bits [10] should: + // + // discard the packet and, regardless of whether or not the + // packet's Destination Address was a multicast address, send an + // ICMP Parameter Problem, Code 2, message to the packet's + // Source Address, pointing to the unrecognized Option Type. + _ = e.protocol.returnError(&icmpReasonParameterProblem{ + code: header.ICMPv6UnknownOption, + pointer: it.ParseOffset() + optsIt.OptionOffset(), + respondToMulticast: true, + forwarding: forwarding, + }, pkt) + return fmt.Errorf("found unknown hop-by-hop header option = %#v with discard action", opt) + default: + panic(fmt.Sprintf("unrecognized action for an unrecognized Hop By Hop extension header option = %#v", opt)) + } + } + } + + case header.IPv6RoutingExtHdr: + // As per RFC 8200 section 4.4, if a node encounters a routing header with + // an unrecognized routing type value, with a non-zero Segments Left + // value, the node must discard the packet and send an ICMP Parameter + // Problem, Code 0 to the packet's Source Address, pointing to the + // unrecognized Routing Type. + // + // If the Segments Left is 0, the node must ignore the Routing extension + // header and process the next header in the packet. + // + // Note, the stack does not yet handle any type of routing extension + // header, so we just make sure Segments Left is zero before processing + // the next extension header. + if extHdr.SegmentsLeft() != 0 { + _ = e.protocol.returnError(&icmpReasonParameterProblem{ + code: header.ICMPv6ErroneousHeader, + pointer: it.ParseOffset(), + // For the sake of consistency, we're using the value of `forwarding` + // here, even though it should always be false if we've reached this + // point. If `forwarding` is true here, we're executing undefined + // behavior no matter what. + forwarding: forwarding, + }, pkt) + return fmt.Errorf("found unrecognized routing type with non-zero segments left in header = %#v", extHdr) + } + + case header.IPv6FragmentExtHdr: + hasFragmentHeader = true + + if extHdr.IsAtomic() { + // This fragment extension header indicates that this packet is an + // atomic fragment. An atomic fragment is a fragment that contains + // all the data required to reassemble a full packet. As per RFC 6946, + // atomic fragments must not interfere with "normal" fragmented traffic + // so we skip processing the fragment instead of feeding it through the + // reassembly process below. + continue + } + + fragmentFieldOffset := it.ParseOffset() + + // Don't consume the iterator if we have the first fragment because we + // will use it to validate that the first fragment holds the upper layer + // header. + rawPayload := it.AsRawHeader(extHdr.FragmentOffset() != 0 /* consume */) + + if extHdr.FragmentOffset() == 0 { + // Check that the iterator ends with a raw payload as the first fragment + // should include all headers up to and including any upper layer + // headers, as per RFC 8200 section 4.5; only upper layer data + // (non-headers) should follow the fragment extension header. + var lastHdr header.IPv6PayloadHeader + + for { + it, done, err := it.Next() + if err != nil { + stats.MalformedPacketsReceived.Increment() + stats.MalformedFragmentsReceived.Increment() + return err + } + if done { + break + } + + lastHdr = it + } + + // If the last header is a raw header, then the last portion of the IPv6 + // payload is not a known IPv6 extension header. Note, this does not + // mean that the last portion is an upper layer header or not an + // extension header because: + // 1) we do not yet support all extension headers + // 2) we do not validate the upper layer header before reassembling. + // + // This check makes sure that a known IPv6 extension header is not + // present after the Fragment extension header in a non-initial + // fragment. + // + // TODO(#2196): Support IPv6 Authentication and Encapsulated + // Security Payload extension headers. + // TODO(#2333): Validate that the upper layer header is valid. + switch lastHdr.(type) { + case header.IPv6RawPayloadHeader: + default: + stats.MalformedPacketsReceived.Increment() + stats.MalformedFragmentsReceived.Increment() + return fmt.Errorf("known extension header = %#v present after fragment header in a non-initial fragment", lastHdr) + } + } + + fragmentPayloadLen := rawPayload.Buf.Size() + if fragmentPayloadLen == 0 { + // Drop the packet as it's marked as a fragment but has no payload. + stats.MalformedPacketsReceived.Increment() + stats.MalformedFragmentsReceived.Increment() + return fmt.Errorf("fragment has no payload") + } + + // As per RFC 2460 Section 4.5: + // + // If the length of a fragment, as derived from the fragment packet's + // Payload Length field, is not a multiple of 8 octets and the M flag + // of that fragment is 1, then that fragment must be discarded and an + // ICMP Parameter Problem, Code 0, message should be sent to the source + // of the fragment, pointing to the Payload Length field of the + // fragment packet. + if extHdr.More() && fragmentPayloadLen%header.IPv6FragmentExtHdrFragmentOffsetBytesPerUnit != 0 { + stats.MalformedPacketsReceived.Increment() + stats.MalformedFragmentsReceived.Increment() + _ = e.protocol.returnError(&icmpReasonParameterProblem{ + code: header.ICMPv6ErroneousHeader, + pointer: header.IPv6PayloadLenOffset, + }, pkt) + return fmt.Errorf("found fragment length = %d that is not a multiple of 8 octets", fragmentPayloadLen) + } + + // The packet is a fragment, let's try to reassemble it. + start := extHdr.FragmentOffset() * header.IPv6FragmentExtHdrFragmentOffsetBytesPerUnit + + // As per RFC 2460 Section 4.5: + // + // If the length and offset of a fragment are such that the Payload + // Length of the packet reassembled from that fragment would exceed + // 65,535 octets, then that fragment must be discarded and an ICMP + // Parameter Problem, Code 0, message should be sent to the source of + // the fragment, pointing to the Fragment Offset field of the fragment + // packet. + lengthAfterReassembly := int(start) + fragmentPayloadLen + if lengthAfterReassembly > header.IPv6MaximumPayloadSize { + stats.MalformedPacketsReceived.Increment() + stats.MalformedFragmentsReceived.Increment() + _ = e.protocol.returnError(&icmpReasonParameterProblem{ + code: header.ICMPv6ErroneousHeader, + pointer: fragmentFieldOffset, + }, pkt) + return fmt.Errorf("determined that reassembled packet length = %d would exceed allowed length = %d", lengthAfterReassembly, header.IPv6MaximumPayloadSize) + } + + // Note that pkt doesn't have its transport header set after reassembly, + // and won't until DeliverNetworkPacket sets it. + resPkt, proto, ready, err := e.protocol.fragmentation.Process( + // IPv6 ignores the Protocol field since the ID only needs to be unique + // across source-destination pairs, as per RFC 8200 section 4.5. + fragmentation.FragmentID{ + Source: srcAddr, + Destination: dstAddr, + ID: extHdr.ID(), + }, + start, + start+uint16(fragmentPayloadLen)-1, + extHdr.More(), + uint8(rawPayload.Identifier), + pkt, + ) + if err != nil { + stats.MalformedPacketsReceived.Increment() + stats.MalformedFragmentsReceived.Increment() + return err + } + + if ready { + pkt = resPkt + + // We create a new iterator with the reassembled packet because we could + // have more extension headers in the reassembled payload, as per RFC + // 8200 section 4.5. We also use the NextHeader value from the first + // fragment. + data := pkt.Data() + dataVV := buffer.NewVectorisedView(data.Size(), data.Views()) + it = header.MakeIPv6PayloadIterator(header.IPv6ExtensionHeaderIdentifier(proto), dataVV) + } + + case header.IPv6DestinationOptionsExtHdr: + optsIt := extHdr.Iter() + + for { + opt, done, err := optsIt.Next() + if err != nil { + stats.MalformedPacketsReceived.Increment() + return err + } + if done { + break + } + + // We currently do not support any IPv6 Destination extension header + // options. + switch opt.UnknownAction() { + case header.IPv6OptionUnknownActionSkip: + case header.IPv6OptionUnknownActionDiscard: + return fmt.Errorf("found unknown destination header option = %#v with discard action", opt) + case header.IPv6OptionUnknownActionDiscardSendICMPNoMulticastDest: + if header.IsV6MulticastAddress(dstAddr) { + return fmt.Errorf("found unknown destination header option %#v with discard action", opt) + } + fallthrough + case header.IPv6OptionUnknownActionDiscardSendICMP: + // This case satisfies a requirement of RFC 8200 section 4.2 + // which states that an unknown option starting with bits [10] should: + // + // discard the packet and, regardless of whether or not the + // packet's Destination Address was a multicast address, send an + // ICMP Parameter Problem, Code 2, message to the packet's + // Source Address, pointing to the unrecognized Option Type. + // + _ = e.protocol.returnError(&icmpReasonParameterProblem{ + code: header.ICMPv6UnknownOption, + pointer: it.ParseOffset() + optsIt.OptionOffset(), + respondToMulticast: true, + }, pkt) + return fmt.Errorf("found unknown destination header option %#v with discard action", opt) + default: + panic(fmt.Sprintf("unrecognized action for an unrecognized Destination extension header option = %#v", opt)) + } + } + + case header.IPv6RawPayloadHeader: + // If the last header in the payload isn't a known IPv6 extension header, + // handle it as if it is transport layer data. + + // Calculate the number of octets parsed from data. We want to remove all + // the data except the unparsed portion located at the end, which its size + // is extHdr.Buf.Size(). + trim := pkt.Data().Size() - extHdr.Buf.Size() + + // For unfragmented packets, extHdr still contains the transport header. + // Get rid of it. + // + // For reassembled fragments, pkt.TransportHeader is unset, so this is a + // no-op and pkt.Data begins with the transport header. + trim += pkt.TransportHeader().View().Size() + + pkt.Data().DeleteFront(trim) + + stats.PacketsDelivered.Increment() + if p := tcpip.TransportProtocolNumber(extHdr.Identifier); p == header.ICMPv6ProtocolNumber { + pkt.TransportProtocolNumber = p + e.handleICMP(pkt, hasFragmentHeader, routerAlert) + } else { + stats.PacketsDelivered.Increment() + switch res := e.dispatcher.DeliverTransportPacket(p, pkt); res { + case stack.TransportPacketHandled: + case stack.TransportPacketDestinationPortUnreachable: + // As per RFC 4443 section 3.1: + // A destination node SHOULD originate a Destination Unreachable + // message with Code 4 in response to a packet for which the + // transport protocol (e.g., UDP) has no listener, if that transport + // protocol has no alternative means to inform the sender. + _ = e.protocol.returnError(&icmpReasonPortUnreachable{}, pkt) + return fmt.Errorf("destination port unreachable") + case stack.TransportPacketProtocolUnreachable: + // As per RFC 8200 section 4. (page 7): + // Extension headers are numbered from IANA IP Protocol Numbers + // [IANA-PN], the same values used for IPv4 and IPv6. When + // processing a sequence of Next Header values in a packet, the + // first one that is not an extension header [IANA-EH] indicates + // that the next item in the packet is the corresponding upper-layer + // header. + // With more related information on page 8: + // If, as a result of processing a header, the destination node is + // required to proceed to the next header but the Next Header value + // in the current header is unrecognized by the node, it should + // discard the packet and send an ICMP Parameter Problem message to + // the source of the packet, with an ICMP Code value of 1 + // ("unrecognized Next Header type encountered") and the ICMP + // Pointer field containing the offset of the unrecognized value + // within the original packet. + // + // Which when taken together indicate that an unknown protocol should + // be treated as an unrecognized next header value. + // The location of the Next Header field is in a different place in + // the initial IPv6 header than it is in the extension headers so + // treat it specially. + prevHdrIDOffset := uint32(header.IPv6NextHeaderOffset) + if previousHeaderStart != 0 { + prevHdrIDOffset = previousHeaderStart + } + _ = e.protocol.returnError(&icmpReasonParameterProblem{ + code: header.ICMPv6UnknownHeader, + pointer: prevHdrIDOffset, + }, pkt) + return fmt.Errorf("transport protocol unreachable") + default: + panic(fmt.Sprintf("unrecognized result from DeliverTransportPacket = %d", res)) + } + } + + default: + // Since the iterator returns IPv6RawPayloadHeader for unknown Extension + // Header IDs this should never happen unless we missed a supported type + // here. + panic(fmt.Sprintf("unrecognized type from it.Next() = %T", extHdr)) + + } + } + return nil +} + +// Close cleans up resources associated with the endpoint. +func (e *endpoint) Close() { + e.mu.Lock() + e.disableLocked() + e.mu.addressableEndpointState.Cleanup() + e.mu.Unlock() + + e.protocol.forgetEndpoint(e.nic.ID()) +} + +// NetworkProtocolNumber implements stack.NetworkEndpoint. +func (e *endpoint) NetworkProtocolNumber() tcpip.NetworkProtocolNumber { + return e.protocol.Number() +} + +// AddAndAcquirePermanentAddress implements stack.AddressableEndpoint. +func (e *endpoint) AddAndAcquirePermanentAddress(addr tcpip.AddressWithPrefix, properties stack.AddressProperties) (stack.AddressEndpoint, tcpip.Error) { + // TODO(b/169350103): add checks here after making sure we no longer receive + // an empty address. + e.mu.Lock() + defer e.mu.Unlock() + return e.addAndAcquirePermanentAddressLocked(addr, properties) +} + +// addAndAcquirePermanentAddressLocked is like AddAndAcquirePermanentAddress but +// with locking requirements. +// +// addAndAcquirePermanentAddressLocked also joins the passed address's +// solicited-node multicast group and start duplicate address detection. +// +// Precondition: e.mu must be write locked. +func (e *endpoint) addAndAcquirePermanentAddressLocked(addr tcpip.AddressWithPrefix, properties stack.AddressProperties) (stack.AddressEndpoint, tcpip.Error) { + addressEndpoint, err := e.mu.addressableEndpointState.AddAndAcquirePermanentAddress(addr, properties) + if err != nil { + return nil, err + } + + if !header.IsV6UnicastAddress(addr.Address) { + return addressEndpoint, nil + } + + addressEndpoint.SetKind(stack.PermanentTentative) + + if e.Enabled() { + if err := e.mu.ndp.startDuplicateAddressDetection(addr.Address, addressEndpoint); err != nil { + return nil, err + } + } + + snmc := header.SolicitedNodeAddr(addr.Address) + if err := e.joinGroupLocked(snmc); err != nil { + // joinGroupLocked only returns an error if the group address is not a valid + // IPv6 multicast address. + panic(fmt.Sprintf("e.joinGroupLocked(%s): %s", snmc, err)) + } + + return addressEndpoint, nil +} + +// RemovePermanentAddress implements stack.AddressableEndpoint. +func (e *endpoint) RemovePermanentAddress(addr tcpip.Address) tcpip.Error { + e.mu.Lock() + defer e.mu.Unlock() + + addressEndpoint := e.getAddressRLocked(addr) + if addressEndpoint == nil || !addressEndpoint.GetKind().IsPermanent() { + return &tcpip.ErrBadLocalAddress{} + } + + return e.removePermanentEndpointLocked(addressEndpoint, true /* allowSLAACInvalidation */, &stack.DADAborted{}) +} + +// removePermanentEndpointLocked is like removePermanentAddressLocked except +// it works with a stack.AddressEndpoint. +// +// Precondition: e.mu must be write locked. +func (e *endpoint) removePermanentEndpointLocked(addressEndpoint stack.AddressEndpoint, allowSLAACInvalidation bool, dadResult stack.DADResult) tcpip.Error { + addr := addressEndpoint.AddressWithPrefix() + // If we are removing an address generated via SLAAC, cleanup + // its SLAAC resources and notify the integrator. + switch addressEndpoint.ConfigType() { + case stack.AddressConfigSlaac: + e.mu.ndp.cleanupSLAACAddrResourcesAndNotify(addr, allowSLAACInvalidation) + case stack.AddressConfigSlaacTemp: + e.mu.ndp.cleanupTempSLAACAddrResourcesAndNotify(addr) + } + + return e.removePermanentEndpointInnerLocked(addressEndpoint, dadResult) +} + +// removePermanentEndpointInnerLocked is like removePermanentEndpointLocked +// except it does not cleanup SLAAC address state. +// +// Precondition: e.mu must be write locked. +func (e *endpoint) removePermanentEndpointInnerLocked(addressEndpoint stack.AddressEndpoint, dadResult stack.DADResult) tcpip.Error { + addr := addressEndpoint.AddressWithPrefix() + e.mu.ndp.stopDuplicateAddressDetection(addr.Address, dadResult) + + if err := e.mu.addressableEndpointState.RemovePermanentEndpoint(addressEndpoint); err != nil { + return err + } + + snmc := header.SolicitedNodeAddr(addr.Address) + err := e.leaveGroupLocked(snmc) + // The endpoint may have already left the multicast group. + if _, ok := err.(*tcpip.ErrBadLocalAddress); ok { + err = nil + } + return err +} + +// hasPermanentAddressLocked returns true if the endpoint has a permanent +// address equal to the passed address. +// +// Precondition: e.mu must be read or write locked. +func (e *endpoint) hasPermanentAddressRLocked(addr tcpip.Address) bool { + addressEndpoint := e.getAddressRLocked(addr) + if addressEndpoint == nil { + return false + } + return addressEndpoint.GetKind().IsPermanent() +} + +// getAddressRLocked returns the endpoint for the passed address. +// +// Precondition: e.mu must be read or write locked. +func (e *endpoint) getAddressRLocked(localAddr tcpip.Address) stack.AddressEndpoint { + return e.mu.addressableEndpointState.GetAddress(localAddr) +} + +// MainAddress implements stack.AddressableEndpoint. +func (e *endpoint) MainAddress() tcpip.AddressWithPrefix { + e.mu.RLock() + defer e.mu.RUnlock() + return e.mu.addressableEndpointState.MainAddress() +} + +// AcquireAssignedAddress implements stack.AddressableEndpoint. +func (e *endpoint) AcquireAssignedAddress(localAddr tcpip.Address, allowTemp bool, tempPEB stack.PrimaryEndpointBehavior) stack.AddressEndpoint { + e.mu.RLock() + defer e.mu.RUnlock() + return e.acquireAddressOrCreateTempLocked(localAddr, allowTemp, tempPEB) +} + +// acquireAddressOrCreateTempLocked is like AcquireAssignedAddress but with +// locking requirements. +// +// Precondition: e.mu must be write locked. +func (e *endpoint) acquireAddressOrCreateTempLocked(localAddr tcpip.Address, allowTemp bool, tempPEB stack.PrimaryEndpointBehavior) stack.AddressEndpoint { + return e.mu.addressableEndpointState.AcquireAssignedAddress(localAddr, allowTemp, tempPEB) +} + +// AcquireOutgoingPrimaryAddress implements stack.AddressableEndpoint. +func (e *endpoint) AcquireOutgoingPrimaryAddress(remoteAddr tcpip.Address, allowExpired bool) stack.AddressEndpoint { + e.mu.RLock() + defer e.mu.RUnlock() + return e.acquireOutgoingPrimaryAddressRLocked(remoteAddr, allowExpired) +} + +// getLinkLocalAddressRLocked returns a link-local address from the primary list +// of addresses, if one is available. +// +// See stack.PrimaryEndpointBehavior for more details about the primary list. +// +// Precondition: e.mu must be read locked. +func (e *endpoint) getLinkLocalAddressRLocked() tcpip.Address { + var linkLocalAddr tcpip.Address + e.mu.addressableEndpointState.ForEachPrimaryEndpoint(func(addressEndpoint stack.AddressEndpoint) bool { + if addressEndpoint.IsAssigned(false /* allowExpired */) { + if addr := addressEndpoint.AddressWithPrefix().Address; header.IsV6LinkLocalUnicastAddress(addr) { + linkLocalAddr = addr + return false + } + } + return true + }) + return linkLocalAddr +} + +// acquireOutgoingPrimaryAddressRLocked is like AcquireOutgoingPrimaryAddress +// but with locking requirements. +// +// Precondition: e.mu must be read locked. +func (e *endpoint) acquireOutgoingPrimaryAddressRLocked(remoteAddr tcpip.Address, allowExpired bool) stack.AddressEndpoint { + // addrCandidate is a candidate for Source Address Selection, as per + // RFC 6724 section 5. + type addrCandidate struct { + addressEndpoint stack.AddressEndpoint + addr tcpip.Address + scope header.IPv6AddressScope + + label uint8 + matchingPrefix uint8 + } + + if len(remoteAddr) == 0 { + return e.mu.addressableEndpointState.AcquireOutgoingPrimaryAddress(remoteAddr, allowExpired) + } + + // Create a candidate set of available addresses we can potentially use as a + // source address. + var cs []addrCandidate + e.mu.addressableEndpointState.ForEachPrimaryEndpoint(func(addressEndpoint stack.AddressEndpoint) bool { + // If r is not valid for outgoing connections, it is not a valid endpoint. + if !addressEndpoint.IsAssigned(allowExpired) { + return true + } + + addr := addressEndpoint.AddressWithPrefix().Address + scope, err := header.ScopeForIPv6Address(addr) + if err != nil { + // Should never happen as we got r from the primary IPv6 endpoint list and + // ScopeForIPv6Address only returns an error if addr is not an IPv6 + // address. + panic(fmt.Sprintf("header.ScopeForIPv6Address(%s): %s", addr, err)) + } + + cs = append(cs, addrCandidate{ + addressEndpoint: addressEndpoint, + addr: addr, + scope: scope, + label: getLabel(addr), + matchingPrefix: remoteAddr.MatchingPrefix(addr), + }) + + return true + }) + + remoteScope, err := header.ScopeForIPv6Address(remoteAddr) + if err != nil { + // primaryIPv6Endpoint should never be called with an invalid IPv6 address. + panic(fmt.Sprintf("header.ScopeForIPv6Address(%s): %s", remoteAddr, err)) + } + + remoteLabel := getLabel(remoteAddr) + + // Sort the addresses as per RFC 6724 section 5 rules 1-3. + // + // TODO(b/146021396): Implement rules 4, 5 of RFC 6724 section 5. + sort.Slice(cs, func(i, j int) bool { + sa := cs[i] + sb := cs[j] + + // Prefer same address as per RFC 6724 section 5 rule 1. + if sa.addr == remoteAddr { + return true + } + if sb.addr == remoteAddr { + return false + } + + // Prefer appropriate scope as per RFC 6724 section 5 rule 2. + if sa.scope < sb.scope { + return sa.scope >= remoteScope + } else if sb.scope < sa.scope { + return sb.scope < remoteScope + } + + // Avoid deprecated addresses as per RFC 6724 section 5 rule 3. + if saDep, sbDep := sa.addressEndpoint.Deprecated(), sb.addressEndpoint.Deprecated(); saDep != sbDep { + // If sa is not deprecated, it is preferred over sb. + return sbDep + } + + // Prefer matching label as per RFC 6724 section 5 rule 6. + if sa, sb := sa.label == remoteLabel, sb.label == remoteLabel; sa != sb { + if sa { + return true + } + if sb { + return false + } + } + + // Prefer temporary addresses as per RFC 6724 section 5 rule 7. + if saTemp, sbTemp := sa.addressEndpoint.ConfigType() == stack.AddressConfigSlaacTemp, sb.addressEndpoint.ConfigType() == stack.AddressConfigSlaacTemp; saTemp != sbTemp { + return saTemp + } + + // Use longest matching prefix as per RFC 6724 section 5 rule 8. + if sa.matchingPrefix > sb.matchingPrefix { + return true + } + if sb.matchingPrefix > sa.matchingPrefix { + return false + } + + // sa and sb are equal, return the endpoint that is closest to the front of + // the primary endpoint list. + return i < j + }) + + // Return the most preferred address that can have its reference count + // incremented. + for _, c := range cs { + if c.addressEndpoint.IncRef() { + return c.addressEndpoint + } + } + + return nil +} + +// PrimaryAddresses implements stack.AddressableEndpoint. +func (e *endpoint) PrimaryAddresses() []tcpip.AddressWithPrefix { + e.mu.RLock() + defer e.mu.RUnlock() + return e.mu.addressableEndpointState.PrimaryAddresses() +} + +// PermanentAddresses implements stack.AddressableEndpoint. +func (e *endpoint) PermanentAddresses() []tcpip.AddressWithPrefix { + e.mu.RLock() + defer e.mu.RUnlock() + return e.mu.addressableEndpointState.PermanentAddresses() +} + +// JoinGroup implements stack.GroupAddressableEndpoint. +func (e *endpoint) JoinGroup(addr tcpip.Address) tcpip.Error { + e.mu.Lock() + defer e.mu.Unlock() + return e.joinGroupLocked(addr) +} + +// joinGroupLocked is like JoinGroup but with locking requirements. +// +// Precondition: e.mu must be locked. +func (e *endpoint) joinGroupLocked(addr tcpip.Address) tcpip.Error { + if !header.IsV6MulticastAddress(addr) { + return &tcpip.ErrBadAddress{} + } + + e.mu.mld.joinGroup(addr) + return nil +} + +// LeaveGroup implements stack.GroupAddressableEndpoint. +func (e *endpoint) LeaveGroup(addr tcpip.Address) tcpip.Error { + e.mu.Lock() + defer e.mu.Unlock() + return e.leaveGroupLocked(addr) +} + +// leaveGroupLocked is like LeaveGroup but with locking requirements. +// +// Precondition: e.mu must be locked. +func (e *endpoint) leaveGroupLocked(addr tcpip.Address) tcpip.Error { + return e.mu.mld.leaveGroup(addr) +} + +// IsInGroup implements stack.GroupAddressableEndpoint. +func (e *endpoint) IsInGroup(addr tcpip.Address) bool { + e.mu.RLock() + defer e.mu.RUnlock() + return e.mu.mld.isInGroup(addr) +} + +// Stats implements stack.NetworkEndpoint. +func (e *endpoint) Stats() stack.NetworkEndpointStats { + return &e.stats.localStats +} + +var _ stack.NetworkProtocol = (*protocol)(nil) +var _ fragmentation.TimeoutHandler = (*protocol)(nil) + +type protocol struct { + stack *stack.Stack + options Options + + mu struct { + sync.RWMutex + + // eps is keyed by NICID to allow protocol methods to retrieve an endpoint + // when handling a packet, by looking at which NIC handled the packet. + eps map[tcpip.NICID]*endpoint + + // ICMP types for which the stack's global rate limiting must apply. + icmpRateLimitedTypes map[header.ICMPv6Type]struct{} + } + + ids []uint32 + hashIV uint32 + + // defaultTTL is the current default TTL for the protocol. Only the + // uint8 portion of it is meaningful. + // + // Must be accessed using atomic operations. + defaultTTL uint32 + + fragmentation *fragmentation.Fragmentation + icmpRateLimiter *stack.ICMPRateLimiter +} + +// Number returns the ipv6 protocol number. +func (p *protocol) Number() tcpip.NetworkProtocolNumber { + return ProtocolNumber +} + +// MinimumPacketSize returns the minimum valid ipv6 packet size. +func (p *protocol) MinimumPacketSize() int { + return header.IPv6MinimumSize +} + +// ParseAddresses implements stack.NetworkProtocol. +func (*protocol) ParseAddresses(v buffer.View) (src, dst tcpip.Address) { + h := header.IPv6(v) + return h.SourceAddress(), h.DestinationAddress() +} + +// NewEndpoint creates a new ipv6 endpoint. +func (p *protocol) NewEndpoint(nic stack.NetworkInterface, dispatcher stack.TransportDispatcher) stack.NetworkEndpoint { + e := &endpoint{ + nic: nic, + dispatcher: dispatcher, + protocol: p, + } + + // NDP options must be 8 octet aligned and the first 2 bytes are used for + // the type and length fields leaving 6 octets as the minimum size for a + // nonce option without padding. + const nonceSize = 6 + + // As per RFC 7527 section 4.1, + // + // If any probe is looped back within RetransTimer milliseconds after + // having sent DupAddrDetectTransmits NS(DAD) messages, the interface + // continues with another MAX_MULTICAST_SOLICIT number of NS(DAD) + // messages transmitted RetransTimer milliseconds apart. + // + // Value taken from RFC 4861 section 10. + const maxMulticastSolicit = 3 + dadOptions := ip.DADOptions{ + Clock: p.stack.Clock(), + SecureRNG: p.stack.SecureRNG(), + NonceSize: nonceSize, + ExtendDADTransmits: maxMulticastSolicit, + Protocol: &e.mu.ndp, + NICID: nic.ID(), + } + + e.mu.Lock() + e.mu.addressableEndpointState.Init(e) + e.mu.ndp.init(e, dadOptions) + e.mu.mld.init(e) + e.dad.mu.Lock() + e.dad.mu.dad.Init(&e.dad.mu, p.options.DADConfigs, dadOptions) + e.dad.mu.Unlock() + e.mu.Unlock() + + stackStats := p.stack.Stats() + tcpip.InitStatCounters(reflect.ValueOf(&e.stats.localStats).Elem()) + e.stats.ip.Init(&e.stats.localStats.IP, &stackStats.IP) + e.stats.icmp.init(&e.stats.localStats.ICMP, &stackStats.ICMP.V6) + + p.mu.Lock() + defer p.mu.Unlock() + p.mu.eps[nic.ID()] = e + return e +} + +func (p *protocol) findEndpointWithAddress(addr tcpip.Address) *endpoint { + p.mu.RLock() + defer p.mu.RUnlock() + + for _, e := range p.mu.eps { + if addressEndpoint := e.AcquireAssignedAddress(addr, false /* allowTemp */, stack.NeverPrimaryEndpoint); addressEndpoint != nil { + addressEndpoint.DecRef() + return e + } + } + + return nil +} + +func (p *protocol) getEndpointForNIC(id tcpip.NICID) (*endpoint, bool) { + p.mu.RLock() + defer p.mu.RUnlock() + ep, ok := p.mu.eps[id] + return ep, ok +} + +func (p *protocol) forgetEndpoint(nicID tcpip.NICID) { + p.mu.Lock() + defer p.mu.Unlock() + delete(p.mu.eps, nicID) +} + +// SetOption implements stack.NetworkProtocol. +func (p *protocol) SetOption(option tcpip.SettableNetworkProtocolOption) tcpip.Error { + switch v := option.(type) { + case *tcpip.DefaultTTLOption: + p.SetDefaultTTL(uint8(*v)) + return nil + default: + return &tcpip.ErrUnknownProtocolOption{} + } +} + +// Option implements stack.NetworkProtocol. +func (p *protocol) Option(option tcpip.GettableNetworkProtocolOption) tcpip.Error { + switch v := option.(type) { + case *tcpip.DefaultTTLOption: + *v = tcpip.DefaultTTLOption(p.DefaultTTL()) + return nil + default: + return &tcpip.ErrUnknownProtocolOption{} + } +} + +// SetDefaultTTL sets the default TTL for endpoints created with this protocol. +func (p *protocol) SetDefaultTTL(ttl uint8) { + atomic.StoreUint32(&p.defaultTTL, uint32(ttl)) +} + +// DefaultTTL returns the default TTL for endpoints created with this protocol. +func (p *protocol) DefaultTTL() uint8 { + return uint8(atomic.LoadUint32(&p.defaultTTL)) +} + +// Close implements stack.TransportProtocol. +func (*protocol) Close() {} + +// Wait implements stack.TransportProtocol. +func (*protocol) Wait() {} + +// parseAndValidate parses the packet (including its transport layer header) and +// returns the parsed IP header. +// +// Returns true if the IP header was successfully parsed. +func (p *protocol) parseAndValidate(pkt *stack.PacketBuffer) (header.IPv6, bool) { + transProtoNum, hasTransportHdr, ok := p.Parse(pkt) + if !ok { + return nil, false + } + + h := header.IPv6(pkt.NetworkHeader().View()) + // Do not include the link header's size when calculating the size of the IP + // packet. + if !h.IsValid(pkt.Size() - pkt.LinkHeader().View().Size()) { + return nil, false + } + + if hasTransportHdr { + switch err := p.stack.ParsePacketBufferTransport(transProtoNum, pkt); err { + case stack.ParsedOK: + case stack.UnknownTransportProtocol, stack.TransportLayerParseError: + // The transport layer will handle unknown protocols and transport layer + // parsing errors. + default: + panic(fmt.Sprintf("unexpected error parsing transport header = %d", err)) + } + } + + return h, true +} + +// Parse implements stack.NetworkProtocol. +func (*protocol) Parse(pkt *stack.PacketBuffer) (proto tcpip.TransportProtocolNumber, hasTransportHdr bool, ok bool) { + proto, _, fragOffset, fragMore, ok := parse.IPv6(pkt) + if !ok { + return 0, false, false + } + + return proto, !fragMore && fragOffset == 0, true +} + +// allowICMPReply reports whether an ICMP reply with provided type may +// be sent following the rate mask options and global ICMP rate limiter. +func (p *protocol) allowICMPReply(icmpType header.ICMPv6Type) bool { + p.mu.RLock() + defer p.mu.RUnlock() + + if _, ok := p.mu.icmpRateLimitedTypes[icmpType]; ok { + return p.stack.AllowICMPMessage() + } + return true +} + +// calculateNetworkMTU calculates the network-layer payload MTU based on the +// link-layer payload MTU and the length of every IPv6 header. +// Note that this is different than the Payload Length field of the IPv6 header, +// which includes the length of the extension headers. +func calculateNetworkMTU(linkMTU, networkHeadersLen uint32) (uint32, tcpip.Error) { + if linkMTU < header.IPv6MinimumMTU { + return 0, &tcpip.ErrInvalidEndpointState{} + } + + // As per RFC 7112 section 5, we should discard packets if their IPv6 header + // is bigger than 1280 bytes (ie, the minimum link MTU) since we do not + // support PMTU discovery: + // Hosts that do not discover the Path MTU MUST limit the IPv6 Header Chain + // length to 1280 bytes. Limiting the IPv6 Header Chain length to 1280 + // bytes ensures that the header chain length does not exceed the IPv6 + // minimum MTU. + if networkHeadersLen > header.IPv6MinimumMTU { + return 0, &tcpip.ErrMalformedHeader{} + } + + networkMTU := linkMTU - networkHeadersLen + if networkMTU > maxPayloadSize { + networkMTU = maxPayloadSize + } + return networkMTU, nil +} + +// Options holds options to configure a new protocol. +type Options struct { + // NDPConfigs is the default NDP configurations used by interfaces. + NDPConfigs NDPConfigurations + + // AutoGenLinkLocal determines whether or not the stack attempts to + // auto-generate a link-local address for newly enabled non-loopback + // NICs. + // + // Note, setting this to true does not mean that a link-local address is + // assigned right away, or at all. If Duplicate Address Detection is enabled, + // an address is only assigned if it successfully resolves. If it fails, no + // further attempts are made to auto-generate a link-local address. + // + // The generated link-local address follows RFC 4291 Appendix A guidelines. + AutoGenLinkLocal bool + + // NDPDisp is the NDP event dispatcher that an integrator can provide to + // receive NDP related events. + NDPDisp NDPDispatcher + + // OpaqueIIDOpts hold the options for generating opaque interface + // identifiers (IIDs) as outlined by RFC 7217. + OpaqueIIDOpts OpaqueInterfaceIdentifierOptions + + // TempIIDSeed is used to seed the initial temporary interface identifier + // history value used to generate IIDs for temporary SLAAC addresses. + // + // Temporary SLAAC addresses are short-lived addresses which are unpredictable + // and random from the perspective of other nodes on the network. It is + // recommended that the seed be a random byte buffer of at least + // header.IIDSize bytes to make sure that temporary SLAAC addresses are + // sufficiently random. It should follow minimum randomness requirements for + // security as outlined by RFC 4086. + // + // Note: using a nil value, the same seed across netstack program runs, or a + // seed that is too small would reduce randomness and increase predictability, + // defeating the purpose of temporary SLAAC addresses. + TempIIDSeed []byte + + // MLD holds options for MLD. + MLD MLDOptions + + // DADConfigs holds the default DAD configurations used by IPv6 endpoints. + DADConfigs stack.DADConfigurations + + // AllowExternalLoopbackTraffic indicates that inbound loopback packets (i.e. + // martian loopback packets) should be accepted. + AllowExternalLoopbackTraffic bool +} + +// NewProtocolWithOptions returns an IPv6 network protocol. +func NewProtocolWithOptions(opts Options) stack.NetworkProtocolFactory { + opts.NDPConfigs.validate() + + ids := hash.RandN32(buckets) + hashIV := hash.RandN32(1)[0] + + return func(s *stack.Stack) stack.NetworkProtocol { + p := &protocol{ + stack: s, + options: opts, + + ids: ids, + hashIV: hashIV, + } + p.fragmentation = fragmentation.NewFragmentation(header.IPv6FragmentExtHdrFragmentOffsetBytesPerUnit, fragmentation.HighFragThreshold, fragmentation.LowFragThreshold, ReassembleTimeout, s.Clock(), p) + p.mu.eps = make(map[tcpip.NICID]*endpoint) + p.SetDefaultTTL(DefaultTTL) + // Set default ICMP rate limiting to Linux defaults. + // + // Default: 0-1,3-127 (rate limit ICMPv6 errors except Packet Too Big) + // See https://www.kernel.org/doc/Documentation/networking/ip-sysctl.txt. + defaultIcmpTypes := make(map[header.ICMPv6Type]struct{}) + for i := header.ICMPv6Type(0); i < header.ICMPv6EchoRequest; i++ { + switch i { + case header.ICMPv6PacketTooBig: + // Do not rate limit packet too big by default. + default: + defaultIcmpTypes[i] = struct{}{} + } + } + p.mu.icmpRateLimitedTypes = defaultIcmpTypes + + return p + } +} + +// NewProtocol is equivalent to NewProtocolWithOptions with an empty Options. +func NewProtocol(s *stack.Stack) stack.NetworkProtocol { + return NewProtocolWithOptions(Options{})(s) +} + +func calculateFragmentReserve(pkt *stack.PacketBuffer) int { + return pkt.AvailableHeaderBytes() + pkt.NetworkHeader().View().Size() + header.IPv6FragmentHeaderSize +} + +// hashRoute calculates a hash value for the given route. It uses the source & +// destination address and 32-bit number to generate the hash. +func hashRoute(r *stack.Route, hashIV uint32) uint32 { + // The FNV-1a was chosen because it is a fast hashing algorithm, and + // cryptographic properties are not needed here. + h := fnv.New32a() + if _, err := h.Write([]byte(r.LocalAddress())); err != nil { + panic(fmt.Sprintf("Hash.Write: %s, but Hash' implementation of Write is not expected to ever return an error", err)) + } + + if _, err := h.Write([]byte(r.RemoteAddress())); err != nil { + panic(fmt.Sprintf("Hash.Write: %s, but Hash' implementation of Write is not expected to ever return an error", err)) + } + + s := make([]byte, 4) + binary.LittleEndian.PutUint32(s, hashIV) + if _, err := h.Write(s); err != nil { + panic(fmt.Sprintf("Hash.Write: %s, but Hash' implementation of Write is not expected ever to return an error", err)) + } + + return h.Sum32() +} + +func buildNextFragment(pf *fragmentation.PacketFragmenter, originalIPHeaders header.IPv6, transportProto tcpip.TransportProtocolNumber, id uint32) (*stack.PacketBuffer, bool) { + fragPkt, offset, copied, more := pf.BuildNextFragment() + fragPkt.NetworkProtocolNumber = ProtocolNumber + + originalIPHeadersLength := len(originalIPHeaders) + + s := header.IPv6ExtHdrSerializer{&header.IPv6SerializableFragmentExtHdr{ + FragmentOffset: uint16(offset / header.IPv6FragmentExtHdrFragmentOffsetBytesPerUnit), + M: more, + Identification: id, + }} + + fragmentIPHeadersLength := originalIPHeadersLength + s.Length() + fragmentIPHeaders := header.IPv6(fragPkt.NetworkHeader().Push(fragmentIPHeadersLength)) + + // Copy the IPv6 header and any extension headers already populated. + if copied := copy(fragmentIPHeaders, originalIPHeaders); copied != originalIPHeadersLength { + panic(fmt.Sprintf("wrong number of bytes copied into fragmentIPHeaders: got %d, want %d", copied, originalIPHeadersLength)) + } + + nextHeader, _ := s.Serialize(transportProto, fragmentIPHeaders[originalIPHeadersLength:]) + + fragmentIPHeaders.SetNextHeader(nextHeader) + fragmentIPHeaders.SetPayloadLength(uint16(copied + fragmentIPHeadersLength - header.IPv6MinimumSize)) + + return fragPkt, more +} diff --git a/vendor/gvisor.dev/gvisor/pkg/tcpip/network/ipv6/ipv6_state_autogen.go b/vendor/gvisor.dev/gvisor/pkg/tcpip/network/ipv6/ipv6_state_autogen.go new file mode 100644 index 000000000..13d427822 --- /dev/null +++ b/vendor/gvisor.dev/gvisor/pkg/tcpip/network/ipv6/ipv6_state_autogen.go @@ -0,0 +1,136 @@ +// automatically generated by stateify. + +package ipv6 + +import ( + "gvisor.dev/gvisor/pkg/state" +) + +func (i *icmpv6DestinationUnreachableSockError) StateTypeName() string { + return "pkg/tcpip/network/ipv6.icmpv6DestinationUnreachableSockError" +} + +func (i *icmpv6DestinationUnreachableSockError) StateFields() []string { + return []string{} +} + +func (i *icmpv6DestinationUnreachableSockError) beforeSave() {} + +// +checklocksignore +func (i *icmpv6DestinationUnreachableSockError) StateSave(stateSinkObject state.Sink) { + i.beforeSave() +} + +func (i *icmpv6DestinationUnreachableSockError) afterLoad() {} + +// +checklocksignore +func (i *icmpv6DestinationUnreachableSockError) StateLoad(stateSourceObject state.Source) { +} + +func (i *icmpv6DestinationNetworkUnreachableSockError) StateTypeName() string { + return "pkg/tcpip/network/ipv6.icmpv6DestinationNetworkUnreachableSockError" +} + +func (i *icmpv6DestinationNetworkUnreachableSockError) StateFields() []string { + return []string{ + "icmpv6DestinationUnreachableSockError", + } +} + +func (i *icmpv6DestinationNetworkUnreachableSockError) beforeSave() {} + +// +checklocksignore +func (i *icmpv6DestinationNetworkUnreachableSockError) StateSave(stateSinkObject state.Sink) { + i.beforeSave() + stateSinkObject.Save(0, &i.icmpv6DestinationUnreachableSockError) +} + +func (i *icmpv6DestinationNetworkUnreachableSockError) afterLoad() {} + +// +checklocksignore +func (i *icmpv6DestinationNetworkUnreachableSockError) StateLoad(stateSourceObject state.Source) { + stateSourceObject.Load(0, &i.icmpv6DestinationUnreachableSockError) +} + +func (i *icmpv6DestinationPortUnreachableSockError) StateTypeName() string { + return "pkg/tcpip/network/ipv6.icmpv6DestinationPortUnreachableSockError" +} + +func (i *icmpv6DestinationPortUnreachableSockError) StateFields() []string { + return []string{ + "icmpv6DestinationUnreachableSockError", + } +} + +func (i *icmpv6DestinationPortUnreachableSockError) beforeSave() {} + +// +checklocksignore +func (i *icmpv6DestinationPortUnreachableSockError) StateSave(stateSinkObject state.Sink) { + i.beforeSave() + stateSinkObject.Save(0, &i.icmpv6DestinationUnreachableSockError) +} + +func (i *icmpv6DestinationPortUnreachableSockError) afterLoad() {} + +// +checklocksignore +func (i *icmpv6DestinationPortUnreachableSockError) StateLoad(stateSourceObject state.Source) { + stateSourceObject.Load(0, &i.icmpv6DestinationUnreachableSockError) +} + +func (i *icmpv6DestinationAddressUnreachableSockError) StateTypeName() string { + return "pkg/tcpip/network/ipv6.icmpv6DestinationAddressUnreachableSockError" +} + +func (i *icmpv6DestinationAddressUnreachableSockError) StateFields() []string { + return []string{ + "icmpv6DestinationUnreachableSockError", + } +} + +func (i *icmpv6DestinationAddressUnreachableSockError) beforeSave() {} + +// +checklocksignore +func (i *icmpv6DestinationAddressUnreachableSockError) StateSave(stateSinkObject state.Sink) { + i.beforeSave() + stateSinkObject.Save(0, &i.icmpv6DestinationUnreachableSockError) +} + +func (i *icmpv6DestinationAddressUnreachableSockError) afterLoad() {} + +// +checklocksignore +func (i *icmpv6DestinationAddressUnreachableSockError) StateLoad(stateSourceObject state.Source) { + stateSourceObject.Load(0, &i.icmpv6DestinationUnreachableSockError) +} + +func (e *icmpv6PacketTooBigSockError) StateTypeName() string { + return "pkg/tcpip/network/ipv6.icmpv6PacketTooBigSockError" +} + +func (e *icmpv6PacketTooBigSockError) StateFields() []string { + return []string{ + "mtu", + } +} + +func (e *icmpv6PacketTooBigSockError) beforeSave() {} + +// +checklocksignore +func (e *icmpv6PacketTooBigSockError) StateSave(stateSinkObject state.Sink) { + e.beforeSave() + stateSinkObject.Save(0, &e.mtu) +} + +func (e *icmpv6PacketTooBigSockError) afterLoad() {} + +// +checklocksignore +func (e *icmpv6PacketTooBigSockError) StateLoad(stateSourceObject state.Source) { + stateSourceObject.Load(0, &e.mtu) +} + +func init() { + state.Register((*icmpv6DestinationUnreachableSockError)(nil)) + state.Register((*icmpv6DestinationNetworkUnreachableSockError)(nil)) + state.Register((*icmpv6DestinationPortUnreachableSockError)(nil)) + state.Register((*icmpv6DestinationAddressUnreachableSockError)(nil)) + state.Register((*icmpv6PacketTooBigSockError)(nil)) +} diff --git a/vendor/gvisor.dev/gvisor/pkg/tcpip/network/ipv6/mld.go b/vendor/gvisor.dev/gvisor/pkg/tcpip/network/ipv6/mld.go new file mode 100644 index 000000000..bc1af193c --- /dev/null +++ b/vendor/gvisor.dev/gvisor/pkg/tcpip/network/ipv6/mld.go @@ -0,0 +1,286 @@ +// Copyright 2020 The gVisor Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package ipv6 + +import ( + "fmt" + "time" + + "gvisor.dev/gvisor/pkg/tcpip" + "gvisor.dev/gvisor/pkg/tcpip/buffer" + "gvisor.dev/gvisor/pkg/tcpip/header" + "gvisor.dev/gvisor/pkg/tcpip/network/internal/ip" + "gvisor.dev/gvisor/pkg/tcpip/stack" +) + +const ( + // UnsolicitedReportIntervalMax is the maximum delay between sending + // unsolicited MLD reports. + // + // Obtained from RFC 2710 Section 7.10. + UnsolicitedReportIntervalMax = 10 * time.Second +) + +// MLDOptions holds options for MLD. +type MLDOptions struct { + // Enabled indicates whether MLD will be performed. + // + // When enabled, MLD may transmit MLD report and done messages when + // joining and leaving multicast groups respectively, and handle incoming + // MLD packets. + // + // This field is ignored and is always assumed to be false for interfaces + // without neighbouring nodes (e.g. loopback). + Enabled bool +} + +var _ ip.MulticastGroupProtocol = (*mldState)(nil) + +// mldState is the per-interface MLD state. +// +// mldState.init MUST be called to initialize the MLD state. +type mldState struct { + // The IPv6 endpoint this mldState is for. + ep *endpoint + + genericMulticastProtocol ip.GenericMulticastProtocolState +} + +// Enabled implements ip.MulticastGroupProtocol. +func (mld *mldState) Enabled() bool { + // No need to perform MLD on loopback interfaces since they don't have + // neighbouring nodes. + return mld.ep.protocol.options.MLD.Enabled && !mld.ep.nic.IsLoopback() && mld.ep.Enabled() +} + +// SendReport implements ip.MulticastGroupProtocol. +// +// Precondition: mld.ep.mu must be read locked. +func (mld *mldState) SendReport(groupAddress tcpip.Address) (bool, tcpip.Error) { + return mld.writePacket(groupAddress, groupAddress, header.ICMPv6MulticastListenerReport) +} + +// SendLeave implements ip.MulticastGroupProtocol. +// +// Precondition: mld.ep.mu must be read locked. +func (mld *mldState) SendLeave(groupAddress tcpip.Address) tcpip.Error { + _, err := mld.writePacket(header.IPv6AllRoutersLinkLocalMulticastAddress, groupAddress, header.ICMPv6MulticastListenerDone) + return err +} + +// ShouldPerformProtocol implements ip.MulticastGroupProtocol. +func (mld *mldState) ShouldPerformProtocol(groupAddress tcpip.Address) bool { + // As per RFC 2710 section 5 page 10, + // + // The link-scope all-nodes address (FF02::1) is handled as a special + // case. The node starts in Idle Listener state for that address on + // every interface, never transitions to another state, and never sends + // a Report or Done for that address. + // + // MLD messages are never sent for multicast addresses whose scope is 0 + // (reserved) or 1 (node-local). + if groupAddress == header.IPv6AllNodesMulticastAddress { + return false + } + + scope := header.V6MulticastScope(groupAddress) + return scope != header.IPv6Reserved0MulticastScope && scope != header.IPv6InterfaceLocalMulticastScope +} + +// init sets up an mldState struct, and is required to be called before using +// a new mldState. +// +// Must only be called once for the lifetime of mld. +func (mld *mldState) init(ep *endpoint) { + mld.ep = ep + mld.genericMulticastProtocol.Init(&ep.mu.RWMutex, ip.GenericMulticastProtocolOptions{ + Rand: ep.protocol.stack.Rand(), + Clock: ep.protocol.stack.Clock(), + Protocol: mld, + MaxUnsolicitedReportDelay: UnsolicitedReportIntervalMax, + }) +} + +// handleMulticastListenerQuery handles a query message. +// +// Precondition: mld.ep.mu must be locked. +func (mld *mldState) handleMulticastListenerQuery(mldHdr header.MLD) { + mld.genericMulticastProtocol.HandleQueryLocked(mldHdr.MulticastAddress(), mldHdr.MaximumResponseDelay()) +} + +// handleMulticastListenerReport handles a report message. +// +// Precondition: mld.ep.mu must be locked. +func (mld *mldState) handleMulticastListenerReport(mldHdr header.MLD) { + mld.genericMulticastProtocol.HandleReportLocked(mldHdr.MulticastAddress()) +} + +// joinGroup handles joining a new group and sending and scheduling the required +// messages. +// +// If the group is already joined, returns *tcpip.ErrDuplicateAddress. +// +// Precondition: mld.ep.mu must be locked. +func (mld *mldState) joinGroup(groupAddress tcpip.Address) { + mld.genericMulticastProtocol.JoinGroupLocked(groupAddress) +} + +// isInGroup returns true if the specified group has been joined locally. +// +// Precondition: mld.ep.mu must be read locked. +func (mld *mldState) isInGroup(groupAddress tcpip.Address) bool { + return mld.genericMulticastProtocol.IsLocallyJoinedRLocked(groupAddress) +} + +// leaveGroup handles removing the group from the membership map, cancels any +// delay timers associated with that group, and sends the Done message, if +// required. +// +// Precondition: mld.ep.mu must be locked. +func (mld *mldState) leaveGroup(groupAddress tcpip.Address) tcpip.Error { + // LeaveGroup returns false only if the group was not joined. + if mld.genericMulticastProtocol.LeaveGroupLocked(groupAddress) { + return nil + } + + return &tcpip.ErrBadLocalAddress{} +} + +// softLeaveAll leaves all groups from the perspective of MLD, but remains +// joined locally. +// +// Precondition: mld.ep.mu must be locked. +func (mld *mldState) softLeaveAll() { + mld.genericMulticastProtocol.MakeAllNonMemberLocked() +} + +// initializeAll attemps to initialize the MLD state for each group that has +// been joined locally. +// +// Precondition: mld.ep.mu must be locked. +func (mld *mldState) initializeAll() { + mld.genericMulticastProtocol.InitializeGroupsLocked() +} + +// sendQueuedReports attempts to send any reports that are queued for sending. +// +// Precondition: mld.ep.mu must be locked. +func (mld *mldState) sendQueuedReports() { + mld.genericMulticastProtocol.SendQueuedReportsLocked() +} + +// writePacket assembles and sends an MLD packet. +// +// Precondition: mld.ep.mu must be read locked. +func (mld *mldState) writePacket(destAddress, groupAddress tcpip.Address, mldType header.ICMPv6Type) (bool, tcpip.Error) { + sentStats := mld.ep.stats.icmp.packetsSent + var mldStat tcpip.MultiCounterStat + switch mldType { + case header.ICMPv6MulticastListenerReport: + mldStat = sentStats.multicastListenerReport + case header.ICMPv6MulticastListenerDone: + mldStat = sentStats.multicastListenerDone + default: + panic(fmt.Sprintf("unrecognized mld type = %d", mldType)) + } + + icmp := header.ICMPv6(buffer.NewView(header.ICMPv6HeaderSize + header.MLDMinimumSize)) + icmp.SetType(mldType) + header.MLD(icmp.MessageBody()).SetMulticastAddress(groupAddress) + // As per RFC 2710 section 3, + // + // All MLD messages described in this document are sent with a link-local + // IPv6 Source Address, an IPv6 Hop Limit of 1, and an IPv6 Router Alert + // option in a Hop-by-Hop Options header. + // + // However, this would cause problems with Duplicate Address Detection with + // the first address as MLD snooping switches may not send multicast traffic + // that DAD depends on to the node performing DAD without the MLD report, as + // documented in RFC 4816: + // + // Note that when a node joins a multicast address, it typically sends a + // Multicast Listener Discovery (MLD) report message [RFC2710] [RFC3810] + // for the multicast address. In the case of Duplicate Address + // Detection, the MLD report message is required in order to inform MLD- + // snooping switches, rather than routers, to forward multicast packets. + // In the above description, the delay for joining the multicast address + // thus means delaying transmission of the corresponding MLD report + // message. Since the MLD specifications do not request a random delay + // to avoid race conditions, just delaying Neighbor Solicitation would + // cause congestion by the MLD report messages. The congestion would + // then prevent the MLD-snooping switches from working correctly and, as + // a result, prevent Duplicate Address Detection from working. The + // requirement to include the delay for the MLD report in this case + // avoids this scenario. [RFC3590] also talks about some interaction + // issues between Duplicate Address Detection and MLD, and specifies + // which source address should be used for the MLD report in this case. + // + // As per RFC 3590 section 4, we should still send out MLD reports with an + // unspecified source address if we do not have an assigned link-local + // address to use as the source address to ensure DAD works as expected on + // networks with MLD snooping switches: + // + // MLD Report and Done messages are sent with a link-local address as + // the IPv6 source address, if a valid address is available on the + // interface. If a valid link-local address is not available (e.g., one + // has not been configured), the message is sent with the unspecified + // address (::) as the IPv6 source address. + // + // Once a valid link-local address is available, a node SHOULD generate + // new MLD Report messages for all multicast addresses joined on the + // interface. + // + // Routers receiving an MLD Report or Done message with the unspecified + // address as the IPv6 source address MUST silently discard the packet + // without taking any action on the packets contents. + // + // Snooping switches MUST manage multicast forwarding state based on MLD + // Report and Done messages sent with the unspecified address as the + // IPv6 source address. + localAddress := mld.ep.getLinkLocalAddressRLocked() + if len(localAddress) == 0 { + localAddress = header.IPv6Any + } + + icmp.SetChecksum(header.ICMPv6Checksum(header.ICMPv6ChecksumParams{ + Header: icmp, + Src: localAddress, + Dst: destAddress, + })) + + extensionHeaders := header.IPv6ExtHdrSerializer{ + header.IPv6SerializableHopByHopExtHdr{ + &header.IPv6RouterAlertOption{Value: header.IPv6RouterAlertMLD}, + }, + } + + pkt := stack.NewPacketBuffer(stack.PacketBufferOptions{ + ReserveHeaderBytes: int(mld.ep.MaxHeaderLength()) + extensionHeaders.Length(), + Data: buffer.View(icmp).ToVectorisedView(), + }) + + if err := addIPHeader(localAddress, destAddress, pkt, stack.NetworkHeaderParams{ + Protocol: header.ICMPv6ProtocolNumber, + TTL: header.MLDHopLimit, + }, extensionHeaders); err != nil { + panic(fmt.Sprintf("failed to add IP header: %s", err)) + } + if err := mld.ep.nic.WritePacketToRemote(header.EthernetAddressFromMulticastIPv6Address(destAddress), ProtocolNumber, pkt); err != nil { + sentStats.dropped.Increment() + return false, err + } + mldStat.Increment() + return localAddress != header.IPv6Any, nil +} diff --git a/vendor/gvisor.dev/gvisor/pkg/tcpip/network/ipv6/ndp.go b/vendor/gvisor.dev/gvisor/pkg/tcpip/network/ipv6/ndp.go new file mode 100644 index 000000000..938427420 --- /dev/null +++ b/vendor/gvisor.dev/gvisor/pkg/tcpip/network/ipv6/ndp.go @@ -0,0 +1,1943 @@ +// Copyright 2019 The gVisor Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package ipv6 + +import ( + "fmt" + "time" + + "gvisor.dev/gvisor/pkg/sync" + "gvisor.dev/gvisor/pkg/tcpip" + "gvisor.dev/gvisor/pkg/tcpip/buffer" + "gvisor.dev/gvisor/pkg/tcpip/header" + "gvisor.dev/gvisor/pkg/tcpip/network/internal/ip" + "gvisor.dev/gvisor/pkg/tcpip/stack" +) + +const ( + // defaultMaxRtrSolicitations is the default number of Router + // Solicitation messages to send when an IPv6 endpoint becomes enabled. + // + // Default = 3 (from RFC 4861 section 10). + defaultMaxRtrSolicitations = 3 + + // defaultRtrSolicitationInterval is the default amount of time between + // sending Router Solicitation messages. + // + // Default = 4s (from 4861 section 10). + defaultRtrSolicitationInterval = 4 * time.Second + + // defaultMaxRtrSolicitationDelay is the default maximum amount of time + // to wait before sending the first Router Solicitation message. + // + // Default = 1s (from 4861 section 10). + defaultMaxRtrSolicitationDelay = time.Second + + // defaultHandleRAs is the default configuration for whether or not to + // handle incoming Router Advertisements as a host. + defaultHandleRAs = HandlingRAsEnabledWhenForwardingDisabled + + // defaultDiscoverDefaultRouters is the default configuration for + // whether or not to discover default routers from incoming Router + // Advertisements, as a host. + defaultDiscoverDefaultRouters = true + + // defaultDiscoverMoreSpecificRoutes is the default configuration for + // whether or not to discover more-specific routes from incoming Router + // Advertisements, as a host. + defaultDiscoverMoreSpecificRoutes = true + + // defaultDiscoverOnLinkPrefixes is the default configuration for + // whether or not to discover on-link prefixes from incoming Router + // Advertisements' Prefix Information option, as a host. + defaultDiscoverOnLinkPrefixes = true + + // defaultAutoGenGlobalAddresses is the default configuration for + // whether or not to generate global IPv6 addresses in response to + // receiving a new Prefix Information option with its Autonomous + // Address AutoConfiguration flag set, as a host. + // + // Default = true. + defaultAutoGenGlobalAddresses = true + + // minimumRtrSolicitationInterval is the minimum amount of time to wait + // between sending Router Solicitation messages. This limit is imposed + // to make sure that Router Solicitation messages are not sent all at + // once, defeating the purpose of sending the initial few messages. + minimumRtrSolicitationInterval = 500 * time.Millisecond + + // minimumMaxRtrSolicitationDelay is the minimum amount of time to wait + // before sending the first Router Solicitation message. It is 0 because + // we cannot have a negative delay. + minimumMaxRtrSolicitationDelay = 0 + + // MaxDiscoveredOffLinkRoutes is the maximum number of discovered off-link + // routes. The stack should stop discovering new off-link routes after + // this limit is reached. + // + // This value MUST be at minimum 2 as per RFC 4861 section 6.3.4, and + // SHOULD be more. + MaxDiscoveredOffLinkRoutes = 10 + + // MaxDiscoveredOnLinkPrefixes is the maximum number of discovered + // on-link prefixes. The stack should stop discovering new on-link + // prefixes after discovering MaxDiscoveredOnLinkPrefixes on-link + // prefixes. + MaxDiscoveredOnLinkPrefixes = 10 + + // validPrefixLenForAutoGen is the expected prefix length that an + // address can be generated for. Must be 64 bits as the interface + // identifier (IID) is 64 bits and an IPv6 address is 128 bits, so + // 128 - 64 = 64. + validPrefixLenForAutoGen = 64 + + // defaultAutoGenTempGlobalAddresses is the default configuration for whether + // or not to generate temporary SLAAC addresses. + defaultAutoGenTempGlobalAddresses = true + + // defaultMaxTempAddrValidLifetime is the default maximum valid lifetime + // for temporary SLAAC addresses generated as part of RFC 4941. + // + // Default = 7 days (from RFC 4941 section 5). + defaultMaxTempAddrValidLifetime = 7 * 24 * time.Hour + + // defaultMaxTempAddrPreferredLifetime is the default preferred lifetime + // for temporary SLAAC addresses generated as part of RFC 4941. + // + // Default = 1 day (from RFC 4941 section 5). + defaultMaxTempAddrPreferredLifetime = 24 * time.Hour + + // defaultRegenAdvanceDuration is the default duration before the deprecation + // of a temporary address when a new address will be generated. + // + // Default = 5s (from RFC 4941 section 5). + defaultRegenAdvanceDuration = 5 * time.Second + + // minRegenAdvanceDuration is the minimum duration before the deprecation + // of a temporary address when a new address will be generated. + minRegenAdvanceDuration = time.Duration(0) + + // maxSLAACAddrLocalRegenAttempts is the maximum number of times to attempt + // SLAAC address regenerations in response to an IPv6 endpoint-local conflict. + maxSLAACAddrLocalRegenAttempts = 10 + + // MinPrefixInformationValidLifetimeForUpdate is the minimum Valid + // Lifetime to update the valid lifetime of a generated address by + // SLAAC. + // + // Min = 2hrs. + MinPrefixInformationValidLifetimeForUpdate = 2 * time.Hour + + // MaxDesyncFactor is the upper bound for the preferred lifetime's desync + // factor for temporary SLAAC addresses. + // + // Must be greater than 0. + // + // Max = 10m (from RFC 4941 section 5). + MaxDesyncFactor = 10 * time.Minute + + // MinMaxTempAddrPreferredLifetime is the minimum value allowed for the + // maximum preferred lifetime for temporary SLAAC addresses. + // + // This value guarantees that a temporary address is preferred for at + // least 1hr if the SLAAC prefix is valid for at least that time. + MinMaxTempAddrPreferredLifetime = defaultRegenAdvanceDuration + MaxDesyncFactor + time.Hour + + // MinMaxTempAddrValidLifetime is the minimum value allowed for the + // maximum valid lifetime for temporary SLAAC addresses. + // + // This value guarantees that a temporary address is valid for at least + // 2hrs if the SLAAC prefix is valid for at least that time. + MinMaxTempAddrValidLifetime = 2 * time.Hour +) + +// NDPEndpoint is an endpoint that supports NDP. +type NDPEndpoint interface { + // SetNDPConfigurations sets the NDP configurations. + SetNDPConfigurations(NDPConfigurations) +} + +// DHCPv6ConfigurationFromNDPRA is a configuration available via DHCPv6 that an +// NDP Router Advertisement informed the Stack about. +type DHCPv6ConfigurationFromNDPRA int + +const ( + _ DHCPv6ConfigurationFromNDPRA = iota + + // DHCPv6NoConfiguration indicates that no configurations are available via + // DHCPv6. + DHCPv6NoConfiguration + + // DHCPv6ManagedAddress indicates that addresses are available via DHCPv6. + // + // DHCPv6ManagedAddress also implies DHCPv6OtherConfigurations because DHCPv6 + // returns all available configuration information when serving addresses. + DHCPv6ManagedAddress + + // DHCPv6OtherConfigurations indicates that other configuration information is + // available via DHCPv6. + // + // Other configurations are configurations other than addresses. Examples of + // other configurations are recursive DNS server list, DNS search lists and + // default gateway. + DHCPv6OtherConfigurations +) + +// NDPDispatcher is the interface integrators of netstack must implement to +// receive and handle NDP related events. +type NDPDispatcher interface { + // OnDuplicateAddressDetectionResult is called when the DAD process for an + // address on a NIC completes. + // + // This function is not permitted to block indefinitely. This function + // is also not permitted to call into the stack. + OnDuplicateAddressDetectionResult(tcpip.NICID, tcpip.Address, stack.DADResult) + + // OnOffLinkRouteUpdated is called when an off-link route is updated. + // + // This function is not permitted to block indefinitely. This function + // is also not permitted to call into the stack. + OnOffLinkRouteUpdated(tcpip.NICID, tcpip.Subnet, tcpip.Address, header.NDPRoutePreference) + + // OnOffLinkRouteInvalidated is called when an off-link route is invalidated. + // + // This function is not permitted to block indefinitely. This function + // is also not permitted to call into the stack. + OnOffLinkRouteInvalidated(tcpip.NICID, tcpip.Subnet, tcpip.Address) + + // OnOnLinkPrefixDiscovered is called when a new on-link prefix is discovered. + // + // This function is not permitted to block indefinitely. This function + // is also not permitted to call into the stack. + OnOnLinkPrefixDiscovered(tcpip.NICID, tcpip.Subnet) + + // OnOnLinkPrefixInvalidated is called when a discovered on-link prefix that + // was remembered is invalidated. + // + // This function is not permitted to block indefinitely. This function + // is also not permitted to call into the stack. + OnOnLinkPrefixInvalidated(tcpip.NICID, tcpip.Subnet) + + // OnAutoGenAddress is called when a new prefix with its autonomous address- + // configuration flag set is received and SLAAC was performed. + // + // This function is not permitted to block indefinitely. It must not + // call functions on the stack itself. + OnAutoGenAddress(tcpip.NICID, tcpip.AddressWithPrefix) + + // OnAutoGenAddressDeprecated is called when an auto-generated address (SLAAC) + // is deprecated, but is still considered valid. Note, if an address is + // invalidated at the same ime it is deprecated, the deprecation event may not + // be received. + // + // This function is not permitted to block indefinitely. It must not + // call functions on the stack itself. + OnAutoGenAddressDeprecated(tcpip.NICID, tcpip.AddressWithPrefix) + + // OnAutoGenAddressInvalidated is called when an auto-generated address + // (SLAAC) is invalidated. + // + // This function is not permitted to block indefinitely. It must not + // call functions on the stack itself. + OnAutoGenAddressInvalidated(tcpip.NICID, tcpip.AddressWithPrefix) + + // OnRecursiveDNSServerOption is called when the stack learns of DNS servers + // through NDP. Note, the addresses may contain link-local addresses. + // + // It is up to the caller to use the DNS Servers only for their valid + // lifetime. OnRecursiveDNSServerOption may be called for new or + // already known DNS servers. If called with known DNS servers, their + // valid lifetimes must be refreshed to the lifetime (it may be increased, + // decreased, or completely invalidated when the lifetime = 0). + // + // This function is not permitted to block indefinitely. It must not + // call functions on the stack itself. + OnRecursiveDNSServerOption(tcpip.NICID, []tcpip.Address, time.Duration) + + // OnDNSSearchListOption is called when the stack learns of DNS search lists + // through NDP. + // + // It is up to the caller to use the domain names in the search list + // for only their valid lifetime. OnDNSSearchListOption may be called + // with new or already known domain names. If called with known domain + // names, their valid lifetimes must be refreshed to the lifetime (it may + // be increased, decreased or completely invalidated when the lifetime = 0. + OnDNSSearchListOption(tcpip.NICID, []string, time.Duration) + + // OnDHCPv6Configuration is called with an updated configuration that is + // available via DHCPv6 for the passed NIC. + // + // This function is not permitted to block indefinitely. It must not + // call functions on the stack itself. + OnDHCPv6Configuration(tcpip.NICID, DHCPv6ConfigurationFromNDPRA) +} + +var _ fmt.Stringer = HandleRAsConfiguration(0) + +// HandleRAsConfiguration enumerates when RAs may be handled. +type HandleRAsConfiguration int + +const ( + // HandlingRAsDisabled indicates that Router Advertisements will not be + // handled. + HandlingRAsDisabled HandleRAsConfiguration = iota + + // HandlingRAsEnabledWhenForwardingDisabled indicates that router + // advertisements will only be handled when forwarding is disabled. + HandlingRAsEnabledWhenForwardingDisabled + + // HandlingRAsAlwaysEnabled indicates that Router Advertisements will always + // be handled, even when forwarding is enabled. + HandlingRAsAlwaysEnabled +) + +// String implements fmt.Stringer. +func (c HandleRAsConfiguration) String() string { + switch c { + case HandlingRAsDisabled: + return "HandlingRAsDisabled" + case HandlingRAsEnabledWhenForwardingDisabled: + return "HandlingRAsEnabledWhenForwardingDisabled" + case HandlingRAsAlwaysEnabled: + return "HandlingRAsAlwaysEnabled" + default: + return fmt.Sprintf("HandleRAsConfiguration(%d)", c) + } +} + +// enabled returns true iff Router Advertisements may be handled given the +// specified forwarding status. +func (c HandleRAsConfiguration) enabled(forwarding bool) bool { + switch c { + case HandlingRAsDisabled: + return false + case HandlingRAsEnabledWhenForwardingDisabled: + return !forwarding + case HandlingRAsAlwaysEnabled: + return true + default: + panic(fmt.Sprintf("unhandled HandleRAsConfiguration = %d", c)) + } +} + +// NDPConfigurations is the NDP configurations for the netstack. +type NDPConfigurations struct { + // The number of Router Solicitation messages to send when the IPv6 endpoint + // becomes enabled. + // + // Ignored unless configured to handle Router Advertisements. + MaxRtrSolicitations uint8 + + // The amount of time between transmitting Router Solicitation messages. + // + // Must be greater than or equal to 0.5s. + RtrSolicitationInterval time.Duration + + // The maximum amount of time before transmitting the first Router + // Solicitation message. + // + // Must be greater than or equal to 0s. + MaxRtrSolicitationDelay time.Duration + + // HandleRAs is the configuration for when Router Advertisements should be + // handled. + HandleRAs HandleRAsConfiguration + + // DiscoverDefaultRouters determines whether or not default routers are + // discovered from Router Advertisements, as per RFC 4861 section 6. This + // configuration is ignored if RAs will not be processed (see HandleRAs). + DiscoverDefaultRouters bool + + // DiscoverMoreSpecificRoutes determines whether or not more specific routes + // are discovered from Router Advertisements, as per RFC 4191. This + // configuration is ignored if RAs will not be processed (see HandleRAs). + DiscoverMoreSpecificRoutes bool + + // DiscoverOnLinkPrefixes determines whether or not on-link prefixes are + // discovered from Router Advertisements' Prefix Information option, as per + // RFC 4861 section 6. This configuration is ignored if RAs will not be + // processed (see HandleRAs). + DiscoverOnLinkPrefixes bool + + // AutoGenGlobalAddresses determines whether or not an IPv6 endpoint performs + // SLAAC to auto-generate global SLAAC addresses in response to Prefix + // Information options, as per RFC 4862. + // + // Note, if an address was already generated for some unique prefix, as + // part of SLAAC, this option does not affect whether or not the + // lifetime(s) of the generated address changes; this option only + // affects the generation of new addresses as part of SLAAC. + AutoGenGlobalAddresses bool + + // AutoGenAddressConflictRetries determines how many times to attempt to retry + // generation of a permanent auto-generated address in response to DAD + // conflicts. + // + // If the method used to generate the address does not support creating + // alternative addresses (e.g. IIDs based on the modified EUI64 of a NIC's + // MAC address), then no attempt is made to resolve the conflict. + AutoGenAddressConflictRetries uint8 + + // AutoGenTempGlobalAddresses determines whether or not temporary SLAAC + // addresses are generated for an IPv6 endpoint as part of SLAAC privacy + // extensions, as per RFC 4941. + // + // Ignored if AutoGenGlobalAddresses is false. + AutoGenTempGlobalAddresses bool + + // MaxTempAddrValidLifetime is the maximum valid lifetime for temporary + // SLAAC addresses. + MaxTempAddrValidLifetime time.Duration + + // MaxTempAddrPreferredLifetime is the maximum preferred lifetime for + // temporary SLAAC addresses. + MaxTempAddrPreferredLifetime time.Duration + + // RegenAdvanceDuration is the duration before the deprecation of a temporary + // address when a new address will be generated. + RegenAdvanceDuration time.Duration +} + +// DefaultNDPConfigurations returns an NDPConfigurations populated with +// default values. +func DefaultNDPConfigurations() NDPConfigurations { + return NDPConfigurations{ + MaxRtrSolicitations: defaultMaxRtrSolicitations, + RtrSolicitationInterval: defaultRtrSolicitationInterval, + MaxRtrSolicitationDelay: defaultMaxRtrSolicitationDelay, + HandleRAs: defaultHandleRAs, + DiscoverDefaultRouters: defaultDiscoverDefaultRouters, + DiscoverMoreSpecificRoutes: defaultDiscoverMoreSpecificRoutes, + DiscoverOnLinkPrefixes: defaultDiscoverOnLinkPrefixes, + AutoGenGlobalAddresses: defaultAutoGenGlobalAddresses, + AutoGenTempGlobalAddresses: defaultAutoGenTempGlobalAddresses, + MaxTempAddrValidLifetime: defaultMaxTempAddrValidLifetime, + MaxTempAddrPreferredLifetime: defaultMaxTempAddrPreferredLifetime, + RegenAdvanceDuration: defaultRegenAdvanceDuration, + } +} + +// validate modifies an NDPConfigurations with valid values. If invalid values +// are present in c, the corresponding default values are used instead. +func (c *NDPConfigurations) validate() { + if c.RtrSolicitationInterval < minimumRtrSolicitationInterval { + c.RtrSolicitationInterval = defaultRtrSolicitationInterval + } + + if c.MaxRtrSolicitationDelay < minimumMaxRtrSolicitationDelay { + c.MaxRtrSolicitationDelay = defaultMaxRtrSolicitationDelay + } + + if c.MaxTempAddrValidLifetime < MinMaxTempAddrValidLifetime { + c.MaxTempAddrValidLifetime = MinMaxTempAddrValidLifetime + } + + if c.MaxTempAddrPreferredLifetime < MinMaxTempAddrPreferredLifetime || c.MaxTempAddrPreferredLifetime > c.MaxTempAddrValidLifetime { + c.MaxTempAddrPreferredLifetime = MinMaxTempAddrPreferredLifetime + } + + if c.RegenAdvanceDuration < minRegenAdvanceDuration { + c.RegenAdvanceDuration = minRegenAdvanceDuration + } +} + +type timer struct { + // done indicates to the timer that the timer was stopped. + done *bool + + timer tcpip.Timer +} + +type offLinkRoute struct { + dest tcpip.Subnet + router tcpip.Address +} + +// ndpState is the per-Interface NDP state. +type ndpState struct { + // Do not allow overwriting this state. + _ sync.NoCopy + + // The IPv6 endpoint this ndpState is for. + ep *endpoint + + // configs is the per-interface NDP configurations. + configs NDPConfigurations + + // The DAD timers to send the next NS message, or resolve the address. + dad ip.DAD + + // The off-link routes discovered through Router Advertisements. + offLinkRoutes map[offLinkRoute]offLinkRouteState + + // rtrSolicitTimer is the timer used to send the next router solicitation + // message. + // + // rtrSolicitTimer is the zero value when NDP is not soliciting routers. + rtrSolicitTimer timer + + // The on-link prefixes discovered through Router Advertisements' Prefix + // Information option. + onLinkPrefixes map[tcpip.Subnet]onLinkPrefixState + + // The SLAAC prefixes discovered through Router Advertisements' Prefix + // Information option. + slaacPrefixes map[tcpip.Subnet]slaacPrefixState + + // The last learned DHCPv6 configuration from an NDP RA. + dhcpv6Configuration DHCPv6ConfigurationFromNDPRA + + // temporaryIIDHistory is the history value used to generate a new temporary + // IID. + temporaryIIDHistory [header.IIDSize]byte + + // temporaryAddressDesyncFactor is the preferred lifetime's desync factor for + // temporary SLAAC addresses. + temporaryAddressDesyncFactor time.Duration +} + +// offLinkRouteState holds data associated with an off-link route discovered by +// a Router Advertisement (RA). +type offLinkRouteState struct { + prf header.NDPRoutePreference + + // Job to invalidate the route. + // + // Must not be nil. + invalidationJob *tcpip.Job +} + +// onLinkPrefixState holds data associated with an on-link prefix discovered by +// a Router Advertisement's Prefix Information option (PI) when the NDP +// configurations was configured to do so. +type onLinkPrefixState struct { + // Job to invalidate the on-link prefix. + // + // Must not be nil. + invalidationJob *tcpip.Job +} + +// tempSLAACAddrState holds state associated with a temporary SLAAC address. +type tempSLAACAddrState struct { + // Job to deprecate the temporary SLAAC address. + // + // Must not be nil. + deprecationJob *tcpip.Job + + // Job to invalidate the temporary SLAAC address. + // + // Must not be nil. + invalidationJob *tcpip.Job + + // Job to regenerate the temporary SLAAC address. + // + // Must not be nil. + regenJob *tcpip.Job + + createdAt tcpip.MonotonicTime + + // The address's endpoint. + // + // Must not be nil. + addressEndpoint stack.AddressEndpoint + + // Has a new temporary SLAAC address already been regenerated? + regenerated bool +} + +// slaacPrefixState holds state associated with a SLAAC prefix. +type slaacPrefixState struct { + // Job to deprecate the prefix. + // + // Must not be nil. + deprecationJob *tcpip.Job + + // Job to invalidate the prefix. + // + // Must not be nil. + invalidationJob *tcpip.Job + + // nil iff the address is valid forever. + validUntil *tcpip.MonotonicTime + + // nil iff the address is preferred forever. + preferredUntil *tcpip.MonotonicTime + + // State associated with the stable address generated for the prefix. + stableAddr struct { + // The address's endpoint. + // + // May only be nil when the address is being (re-)generated. Otherwise, + // must not be nil as all SLAAC prefixes must have a stable address. + addressEndpoint stack.AddressEndpoint + + // The number of times an address has been generated locally where the IPv6 + // endpoint already had the generated address. + localGenerationFailures uint8 + } + + // The temporary (short-lived) addresses generated for the SLAAC prefix. + tempAddrs map[tcpip.Address]tempSLAACAddrState + + // The next two fields are used by both stable and temporary addresses + // generated for a SLAAC prefix. This is safe as only 1 address is in the + // generation and DAD process at any time. That is, no two addresses are + // generated at the same time for a given SLAAC prefix. + + // The number of times an address has been generated and added to the IPv6 + // endpoint. + // + // Addresses may be regenerated in reseponse to a DAD conflicts. + generationAttempts uint8 + + // The maximum number of times to attempt regeneration of a SLAAC address + // in response to DAD conflicts. + maxGenerationAttempts uint8 +} + +// startDuplicateAddressDetection performs Duplicate Address Detection. +// +// This function must only be called by IPv6 addresses that are currently +// tentative. +// +// The IPv6 endpoint that ndp belongs to MUST be locked. +func (ndp *ndpState) startDuplicateAddressDetection(addr tcpip.Address, addressEndpoint stack.AddressEndpoint) tcpip.Error { + // addr must be a valid unicast IPv6 address. + if !header.IsV6UnicastAddress(addr) { + return &tcpip.ErrAddressFamilyNotSupported{} + } + + if addressEndpoint.GetKind() != stack.PermanentTentative { + // The endpoint should be marked as tentative since we are starting DAD. + panic(fmt.Sprintf("ndpdad: addr %s is not tentative on NIC(%d)", addr, ndp.ep.nic.ID())) + } + + ret := ndp.dad.CheckDuplicateAddressLocked(addr, func(r stack.DADResult) { + if addressEndpoint.GetKind() != stack.PermanentTentative { + // The endpoint should still be marked as tentative since we are still + // performing DAD on it. + panic(fmt.Sprintf("ndpdad: addr %s is no longer tentative on NIC(%d)", addr, ndp.ep.nic.ID())) + } + + var dadSucceeded bool + switch r.(type) { + case *stack.DADAborted, *stack.DADError, *stack.DADDupAddrDetected: + dadSucceeded = false + case *stack.DADSucceeded: + dadSucceeded = true + default: + panic(fmt.Sprintf("unrecognized DAD result = %T", r)) + } + + if dadSucceeded { + addressEndpoint.SetKind(stack.Permanent) + } + + if ndpDisp := ndp.ep.protocol.options.NDPDisp; ndpDisp != nil { + ndpDisp.OnDuplicateAddressDetectionResult(ndp.ep.nic.ID(), addr, r) + } + + if dadSucceeded { + if addressEndpoint.ConfigType() == stack.AddressConfigSlaac { + // Reset the generation attempts counter as we are starting the + // generation of a new address for the SLAAC prefix. + ndp.regenerateTempSLAACAddr(addressEndpoint.AddressWithPrefix().Subnet(), true /* resetGenAttempts */) + } + + ndp.ep.onAddressAssignedLocked(addr) + } + }) + + switch ret { + case stack.DADStarting: + case stack.DADAlreadyRunning: + panic(fmt.Sprintf("ndpdad: already performing DAD for addr %s on NIC(%d)", addr, ndp.ep.nic.ID())) + case stack.DADDisabled: + addressEndpoint.SetKind(stack.Permanent) + + // Consider DAD to have resolved even if no DAD messages were actually + // transmitted. + if ndpDisp := ndp.ep.protocol.options.NDPDisp; ndpDisp != nil { + ndpDisp.OnDuplicateAddressDetectionResult(ndp.ep.nic.ID(), addr, &stack.DADSucceeded{}) + } + + ndp.ep.onAddressAssignedLocked(addr) + } + + return nil +} + +// stopDuplicateAddressDetection ends a running Duplicate Address Detection +// process. Note, this may leave the DAD process for a tentative address in +// such a state forever, unless some other external event resolves the DAD +// process (receiving an NA from the true owner of addr, or an NS for addr +// (implying another node is attempting to use addr)). It is up to the caller +// of this function to handle such a scenario. +// +// The IPv6 endpoint that ndp belongs to MUST be locked. +func (ndp *ndpState) stopDuplicateAddressDetection(addr tcpip.Address, reason stack.DADResult) { + ndp.dad.StopLocked(addr, reason) +} + +// handleRA handles a Router Advertisement message that arrived on the NIC +// this ndp is for. Does nothing if the NIC is configured to not handle RAs. +// +// The IPv6 endpoint that ndp belongs to MUST be locked. +func (ndp *ndpState) handleRA(ip tcpip.Address, ra header.NDPRouterAdvert) { + // Is the IPv6 endpoint configured to handle RAs at all? + // + // Currently, the stack does not determine router interface status on a + // per-interface basis; it is a protocol-wide configuration, so we check the + // protocol's forwarding flag to determine if the IPv6 endpoint is forwarding + // packets. + if !ndp.configs.HandleRAs.enabled(ndp.ep.Forwarding()) { + ndp.ep.stats.localStats.UnhandledRouterAdvertisements.Increment() + return + } + + // Only worry about the DHCPv6 configuration if we have an NDPDispatcher as we + // only inform the dispatcher on configuration changes. We do nothing else + // with the information. + if ndpDisp := ndp.ep.protocol.options.NDPDisp; ndpDisp != nil { + var configuration DHCPv6ConfigurationFromNDPRA + switch { + case ra.ManagedAddrConfFlag(): + configuration = DHCPv6ManagedAddress + + case ra.OtherConfFlag(): + configuration = DHCPv6OtherConfigurations + + default: + configuration = DHCPv6NoConfiguration + } + + if ndp.dhcpv6Configuration != configuration { + ndp.dhcpv6Configuration = configuration + ndpDisp.OnDHCPv6Configuration(ndp.ep.nic.ID(), configuration) + } + } + + // Is the IPv6 endpoint configured to discover default routers? + if ndp.configs.DiscoverDefaultRouters { + prf := ra.DefaultRouterPreference() + if prf == header.ReservedRoutePreference { + // As per RFC 4191 section 2.2, + // + // Prf (Default Router Preference) + // + // If the Reserved (10) value is received, the receiver MUST treat the + // value as if it were (00). + // + // Note that the value 00 is the medium (default) router preference value. + prf = header.MediumRoutePreference + } + + // We represent default routers with a default (off-link) route through the + // router. + ndp.handleOffLinkRouteDiscovery(offLinkRoute{dest: header.IPv6EmptySubnet, router: ip}, ra.RouterLifetime(), prf) + } + + // TODO(b/141556115): Do (RetransTimer, ReachableTime)) Parameter + // Discovery. + + // We know the options is valid as far as wire format is concerned since + // we got the Router Advertisement, as documented by this fn. Given this + // we do not check the iterator for errors on calls to Next. + it, _ := ra.Options().Iter(false) + for opt, done, _ := it.Next(); !done; opt, done, _ = it.Next() { + switch opt := opt.(type) { + case header.NDPRecursiveDNSServer: + if ndp.ep.protocol.options.NDPDisp == nil { + continue + } + + addrs, _ := opt.Addresses() + ndp.ep.protocol.options.NDPDisp.OnRecursiveDNSServerOption(ndp.ep.nic.ID(), addrs, opt.Lifetime()) + + case header.NDPDNSSearchList: + if ndp.ep.protocol.options.NDPDisp == nil { + continue + } + + domainNames, _ := opt.DomainNames() + ndp.ep.protocol.options.NDPDisp.OnDNSSearchListOption(ndp.ep.nic.ID(), domainNames, opt.Lifetime()) + + case header.NDPPrefixInformation: + prefix := opt.Subnet() + + // Is the prefix a link-local? + if header.IsV6LinkLocalUnicastAddress(prefix.ID()) { + // ...Yes, skip as per RFC 4861 section 6.3.4, + // and RFC 4862 section 5.5.3.b (for SLAAC). + continue + } + + // Is the Prefix Length 0? + if prefix.Prefix() == 0 { + // ...Yes, skip as this is an invalid prefix + // as all IPv6 addresses cannot be on-link. + continue + } + + if opt.OnLinkFlag() { + ndp.handleOnLinkPrefixInformation(opt) + } + + if opt.AutonomousAddressConfigurationFlag() { + ndp.handleAutonomousPrefixInformation(opt) + } + + case header.NDPRouteInformation: + if !ndp.configs.DiscoverMoreSpecificRoutes { + continue + } + + dest, err := opt.Prefix() + if err != nil { + panic(fmt.Sprintf("%T.Prefix(): %s", opt, err)) + } + + prf := opt.RoutePreference() + if prf == header.ReservedRoutePreference { + // As per RFC 4191 section 2.3, + // + // Prf (Route Preference) + // 2-bit signed integer. The Route Preference indicates + // whether to prefer the router associated with this prefix + // over others, when multiple identical prefixes (for + // different routers) have been received. If the Reserved + // (10) value is received, the Route Information Option MUST + // be ignored. + continue + } + + ndp.handleOffLinkRouteDiscovery(offLinkRoute{dest: dest, router: ip}, opt.RouteLifetime(), prf) + } + + // TODO(b/141556115): Do (MTU) Parameter Discovery. + } +} + +// invalidateOffLinkRoute invalidates a discovered off-link route. +// +// The IPv6 endpoint that ndp belongs to MUST be locked. +func (ndp *ndpState) invalidateOffLinkRoute(route offLinkRoute) { + state, ok := ndp.offLinkRoutes[route] + if !ok { + return + } + + state.invalidationJob.Cancel() + delete(ndp.offLinkRoutes, route) + + // Let the integrator know a discovered off-link route is invalidated. + if ndpDisp := ndp.ep.protocol.options.NDPDisp; ndpDisp != nil { + ndpDisp.OnOffLinkRouteInvalidated(ndp.ep.nic.ID(), route.dest, route.router) + } +} + +// handleOffLinkRouteDiscovery handles the discovery of an off-link route. +// +// Precondition: ndp.ep.mu must be locked. +func (ndp *ndpState) handleOffLinkRouteDiscovery(route offLinkRoute, lifetime time.Duration, prf header.NDPRoutePreference) { + ndpDisp := ndp.ep.protocol.options.NDPDisp + if ndpDisp == nil { + return + } + + state, ok := ndp.offLinkRoutes[route] + switch { + case !ok && lifetime != 0: + // This is a new route we are discovering. + // + // Only remember it if we currently know about less than + // MaxDiscoveredOffLinkRoutes routers. + if len(ndp.offLinkRoutes) < MaxDiscoveredOffLinkRoutes { + // Inform the integrator when we discovered an off-link route. + ndpDisp.OnOffLinkRouteUpdated(ndp.ep.nic.ID(), route.dest, route.router, prf) + + state := offLinkRouteState{ + prf: prf, + invalidationJob: ndp.ep.protocol.stack.NewJob(&ndp.ep.mu, func() { + ndp.invalidateOffLinkRoute(route) + }), + } + + state.invalidationJob.Schedule(lifetime) + + ndp.offLinkRoutes[route] = state + } + + case ok && lifetime != 0: + // This is an already discovered off-link route. Update the lifetime. + state.invalidationJob.Cancel() + state.invalidationJob.Schedule(lifetime) + + if prf != state.prf { + state.prf = prf + + // Inform the integrator about route preference updates. + ndpDisp.OnOffLinkRouteUpdated(ndp.ep.nic.ID(), route.dest, route.router, prf) + } + + ndp.offLinkRoutes[route] = state + + case ok && lifetime == 0: + // The already discovered off-link route is no longer considered valid so we + // invalidate it immediately. + ndp.invalidateOffLinkRoute(route) + } +} + +// rememberOnLinkPrefix remembers a newly discovered on-link prefix with IPv6 +// address with prefix prefix with lifetime l. +// +// The prefix identified by prefix MUST NOT already be known. +// +// The IPv6 endpoint that ndp belongs to MUST be locked. +func (ndp *ndpState) rememberOnLinkPrefix(prefix tcpip.Subnet, l time.Duration) { + ndpDisp := ndp.ep.protocol.options.NDPDisp + if ndpDisp == nil { + return + } + + // Inform the integrator when we discovered an on-link prefix. + ndpDisp.OnOnLinkPrefixDiscovered(ndp.ep.nic.ID(), prefix) + + state := onLinkPrefixState{ + invalidationJob: ndp.ep.protocol.stack.NewJob(&ndp.ep.mu, func() { + ndp.invalidateOnLinkPrefix(prefix) + }), + } + + if l < header.NDPInfiniteLifetime { + state.invalidationJob.Schedule(l) + } + + ndp.onLinkPrefixes[prefix] = state +} + +// invalidateOnLinkPrefix invalidates a discovered on-link prefix. +// +// The IPv6 endpoint that ndp belongs to MUST be locked. +func (ndp *ndpState) invalidateOnLinkPrefix(prefix tcpip.Subnet) { + s, ok := ndp.onLinkPrefixes[prefix] + + // Is the on-link prefix still discovered? + if !ok { + // ...Nope, do nothing further. + return + } + + s.invalidationJob.Cancel() + delete(ndp.onLinkPrefixes, prefix) + + // Let the integrator know a discovered on-link prefix is invalidated. + if ndpDisp := ndp.ep.protocol.options.NDPDisp; ndpDisp != nil { + ndpDisp.OnOnLinkPrefixInvalidated(ndp.ep.nic.ID(), prefix) + } +} + +// handleOnLinkPrefixInformation handles a Prefix Information option with +// its on-link flag set, as per RFC 4861 section 6.3.4. +// +// handleOnLinkPrefixInformation assumes that the prefix this pi is for is +// not the link-local prefix and the on-link flag is set. +// +// The IPv6 endpoint that ndp belongs to MUST be locked. +func (ndp *ndpState) handleOnLinkPrefixInformation(pi header.NDPPrefixInformation) { + prefix := pi.Subnet() + prefixState, ok := ndp.onLinkPrefixes[prefix] + vl := pi.ValidLifetime() + + if !ok && vl == 0 { + // Don't know about this prefix but it has a zero valid + // lifetime, so just ignore. + return + } + + if !ok && vl != 0 { + // This is a new on-link prefix we are discovering + // + // Only remember it if we currently know about less than + // MaxDiscoveredOnLinkPrefixes on-link prefixes. + if ndp.configs.DiscoverOnLinkPrefixes && len(ndp.onLinkPrefixes) < MaxDiscoveredOnLinkPrefixes { + ndp.rememberOnLinkPrefix(prefix, vl) + } + return + } + + if ok && vl == 0 { + // We know about the on-link prefix, but it is + // no longer to be considered on-link, so + // invalidate it. + ndp.invalidateOnLinkPrefix(prefix) + return + } + + // This is an already discovered on-link prefix with a + // new non-zero valid lifetime. + // + // Update the invalidation job. + + prefixState.invalidationJob.Cancel() + + if vl < header.NDPInfiniteLifetime { + // Prefix is valid for a finite lifetime, schedule the job to execute after + // the new valid lifetime. + prefixState.invalidationJob.Schedule(vl) + } + + ndp.onLinkPrefixes[prefix] = prefixState +} + +// handleAutonomousPrefixInformation handles a Prefix Information option with +// its autonomous flag set, as per RFC 4862 section 5.5.3. +// +// handleAutonomousPrefixInformation assumes that the prefix this pi is for is +// not the link-local prefix and the autonomous flag is set. +// +// The IPv6 endpoint that ndp belongs to MUST be locked. +func (ndp *ndpState) handleAutonomousPrefixInformation(pi header.NDPPrefixInformation) { + vl := pi.ValidLifetime() + pl := pi.PreferredLifetime() + + // If the preferred lifetime is greater than the valid lifetime, + // silently ignore the Prefix Information option, as per RFC 4862 + // section 5.5.3.c. + if pl > vl { + return + } + + prefix := pi.Subnet() + + // Check if we already maintain SLAAC state for prefix. + if state, ok := ndp.slaacPrefixes[prefix]; ok { + // As per RFC 4862 section 5.5.3.e, refresh prefix's SLAAC lifetimes. + ndp.refreshSLAACPrefixLifetimes(prefix, &state, pl, vl) + ndp.slaacPrefixes[prefix] = state + return + } + + // prefix is a new SLAAC prefix. Do the work as outlined by RFC 4862 section + // 5.5.3.d if ndp is configured to auto-generate new addresses via SLAAC. + if !ndp.configs.AutoGenGlobalAddresses { + return + } + + ndp.doSLAAC(prefix, pl, vl) +} + +// doSLAAC generates a new SLAAC address with the provided lifetimes +// for prefix. +// +// pl is the new preferred lifetime. vl is the new valid lifetime. +// +// The IPv6 endpoint that ndp belongs to MUST be locked. +func (ndp *ndpState) doSLAAC(prefix tcpip.Subnet, pl, vl time.Duration) { + // If we do not already have an address for this prefix and the valid + // lifetime is 0, no need to do anything further, as per RFC 4862 + // section 5.5.3.d. + if vl == 0 { + return + } + + // Make sure the prefix is valid (as far as its length is concerned) to + // generate a valid IPv6 address from an interface identifier (IID), as + // per RFC 4862 sectiion 5.5.3.d. + if prefix.Prefix() != validPrefixLenForAutoGen { + return + } + + state := slaacPrefixState{ + deprecationJob: ndp.ep.protocol.stack.NewJob(&ndp.ep.mu, func() { + state, ok := ndp.slaacPrefixes[prefix] + if !ok { + panic(fmt.Sprintf("ndp: must have a slaacPrefixes entry for the deprecated SLAAC prefix %s", prefix)) + } + + ndp.deprecateSLAACAddress(state.stableAddr.addressEndpoint) + }), + invalidationJob: ndp.ep.protocol.stack.NewJob(&ndp.ep.mu, func() { + state, ok := ndp.slaacPrefixes[prefix] + if !ok { + panic(fmt.Sprintf("ndp: must have a slaacPrefixes entry for the invalidated SLAAC prefix %s", prefix)) + } + + ndp.invalidateSLAACPrefix(prefix, state) + }), + tempAddrs: make(map[tcpip.Address]tempSLAACAddrState), + maxGenerationAttempts: ndp.configs.AutoGenAddressConflictRetries + 1, + } + + now := ndp.ep.protocol.stack.Clock().NowMonotonic() + + // The time an address is preferred until is needed to properly generate the + // address. + if pl < header.NDPInfiniteLifetime { + t := now.Add(pl) + state.preferredUntil = &t + } + + if !ndp.generateSLAACAddr(prefix, &state) { + // We were unable to generate an address for the prefix, we do not nothing + // further as there is no reason to maintain state or jobs for a prefix we + // do not have an address for. + return + } + + // Setup the initial jobs to deprecate and invalidate prefix. + + if pl < header.NDPInfiniteLifetime && pl != 0 { + state.deprecationJob.Schedule(pl) + } + + if vl < header.NDPInfiniteLifetime { + state.invalidationJob.Schedule(vl) + t := now.Add(vl) + state.validUntil = &t + } + + // If the address is assigned (DAD resolved), generate a temporary address. + if state.stableAddr.addressEndpoint.GetKind() == stack.Permanent { + // Reset the generation attempts counter as we are starting the generation + // of a new address for the SLAAC prefix. + ndp.generateTempSLAACAddr(prefix, &state, true /* resetGenAttempts */) + } + + ndp.slaacPrefixes[prefix] = state +} + +// addAndAcquireSLAACAddr adds a SLAAC address to the IPv6 endpoint. +// +// The IPv6 endpoint that ndp belongs to MUST be locked. +func (ndp *ndpState) addAndAcquireSLAACAddr(addr tcpip.AddressWithPrefix, configType stack.AddressConfigType, deprecated bool) stack.AddressEndpoint { + // Inform the integrator that we have a new SLAAC address. + ndpDisp := ndp.ep.protocol.options.NDPDisp + if ndpDisp == nil { + return nil + } + + addressEndpoint, err := ndp.ep.addAndAcquirePermanentAddressLocked(addr, stack.AddressProperties{ + PEB: stack.FirstPrimaryEndpoint, + ConfigType: configType, + Deprecated: deprecated, + }) + if err != nil { + panic(fmt.Sprintf("ndp: error when adding SLAAC address %+v: %s", addr, err)) + } + + ndpDisp.OnAutoGenAddress(ndp.ep.nic.ID(), addr) + + return addressEndpoint +} + +// generateSLAACAddr generates a SLAAC address for prefix. +// +// Returns true if an address was successfully generated. +// +// Panics if the prefix is not a SLAAC prefix or it already has an address. +// +// The IPv6 endpoint that ndp belongs to MUST be locked. +func (ndp *ndpState) generateSLAACAddr(prefix tcpip.Subnet, state *slaacPrefixState) bool { + if addressEndpoint := state.stableAddr.addressEndpoint; addressEndpoint != nil { + panic(fmt.Sprintf("ndp: SLAAC prefix %s already has a permenant address %s", prefix, addressEndpoint.AddressWithPrefix())) + } + + // If we have already reached the maximum address generation attempts for the + // prefix, do not generate another address. + if state.generationAttempts == state.maxGenerationAttempts { + return false + } + + var generatedAddr tcpip.AddressWithPrefix + addrBytes := []byte(prefix.ID()) + + for i := 0; ; i++ { + // If we were unable to generate an address after the maximum SLAAC address + // local regeneration attempts, do nothing further. + if i == maxSLAACAddrLocalRegenAttempts { + return false + } + + dadCounter := state.generationAttempts + state.stableAddr.localGenerationFailures + if oIID := ndp.ep.protocol.options.OpaqueIIDOpts; oIID.NICNameFromID != nil { + addrBytes = header.AppendOpaqueInterfaceIdentifier( + addrBytes[:header.IIDOffsetInIPv6Address], + prefix, + oIID.NICNameFromID(ndp.ep.nic.ID(), ndp.ep.nic.Name()), + dadCounter, + oIID.SecretKey, + ) + } else if dadCounter == 0 { + // Modified-EUI64 based IIDs have no way to resolve DAD conflicts, so if + // the DAD counter is non-zero, we cannot use this method. + // + // Only attempt to generate an interface-specific IID if we have a valid + // link address. + // + // TODO(b/141011931): Validate a LinkEndpoint's link address (provided by + // LinkEndpoint.LinkAddress) before reaching this point. + linkAddr := ndp.ep.nic.LinkAddress() + if !header.IsValidUnicastEthernetAddress(linkAddr) { + return false + } + + // Generate an address within prefix from the modified EUI-64 of ndp's + // NIC's Ethernet MAC address. + header.EthernetAdddressToModifiedEUI64IntoBuf(linkAddr, addrBytes[header.IIDOffsetInIPv6Address:]) + } else { + // We have no way to regenerate an address in response to an address + // conflict when addresses are not generated with opaque IIDs. + return false + } + + generatedAddr = tcpip.AddressWithPrefix{ + Address: tcpip.Address(addrBytes), + PrefixLen: validPrefixLenForAutoGen, + } + + if !ndp.ep.hasPermanentAddressRLocked(generatedAddr.Address) { + break + } + + state.stableAddr.localGenerationFailures++ + } + + deprecated := state.preferredUntil != nil && !state.preferredUntil.After(ndp.ep.protocol.stack.Clock().NowMonotonic()) + if addressEndpoint := ndp.addAndAcquireSLAACAddr(generatedAddr, stack.AddressConfigSlaac, deprecated); addressEndpoint != nil { + state.stableAddr.addressEndpoint = addressEndpoint + state.generationAttempts++ + return true + } + + return false +} + +// regenerateSLAACAddr regenerates an address for a SLAAC prefix. +// +// If generating a new address for the prefix fails, the prefix is invalidated. +// +// The IPv6 endpoint that ndp belongs to MUST be locked. +func (ndp *ndpState) regenerateSLAACAddr(prefix tcpip.Subnet) { + state, ok := ndp.slaacPrefixes[prefix] + if !ok { + panic(fmt.Sprintf("ndp: SLAAC prefix state not found to regenerate address for %s", prefix)) + } + + if ndp.generateSLAACAddr(prefix, &state) { + ndp.slaacPrefixes[prefix] = state + return + } + + // We were unable to generate a permanent address for the SLAAC prefix so + // invalidate the prefix as there is no reason to maintain state for a + // SLAAC prefix we do not have an address for. + ndp.invalidateSLAACPrefix(prefix, state) +} + +// generateTempSLAACAddr generates a new temporary SLAAC address. +// +// If resetGenAttempts is true, the prefix's generation counter is reset. +// +// Returns true if a new address was generated. +func (ndp *ndpState) generateTempSLAACAddr(prefix tcpip.Subnet, prefixState *slaacPrefixState, resetGenAttempts bool) bool { + // Are we configured to auto-generate new temporary global addresses for the + // prefix? + if !ndp.configs.AutoGenTempGlobalAddresses || prefix == header.IPv6LinkLocalPrefix.Subnet() { + return false + } + + if resetGenAttempts { + prefixState.generationAttempts = 0 + prefixState.maxGenerationAttempts = ndp.configs.AutoGenAddressConflictRetries + 1 + } + + // If we have already reached the maximum address generation attempts for the + // prefix, do not generate another address. + if prefixState.generationAttempts == prefixState.maxGenerationAttempts { + return false + } + + stableAddr := prefixState.stableAddr.addressEndpoint.AddressWithPrefix().Address + now := ndp.ep.protocol.stack.Clock().NowMonotonic() + + // As per RFC 4941 section 3.3 step 4, the valid lifetime of a temporary + // address is the lower of the valid lifetime of the stable address or the + // maximum temporary address valid lifetime. + vl := ndp.configs.MaxTempAddrValidLifetime + if prefixState.validUntil != nil { + if prefixVL := prefixState.validUntil.Sub(now); vl > prefixVL { + vl = prefixVL + } + } + + if vl <= 0 { + // Cannot create an address without a valid lifetime. + return false + } + + // As per RFC 4941 section 3.3 step 4, the preferred lifetime of a temporary + // address is the lower of the preferred lifetime of the stable address or the + // maximum temporary address preferred lifetime - the temporary address desync + // factor. + pl := ndp.configs.MaxTempAddrPreferredLifetime - ndp.temporaryAddressDesyncFactor + if prefixState.preferredUntil != nil { + if prefixPL := prefixState.preferredUntil.Sub(now); pl > prefixPL { + // Respect the preferred lifetime of the prefix, as per RFC 4941 section + // 3.3 step 4. + pl = prefixPL + } + } + + // As per RFC 4941 section 3.3 step 5, a temporary address is created only if + // the calculated preferred lifetime is greater than the advance regeneration + // duration. In particular, we MUST NOT create a temporary address with a zero + // Preferred Lifetime. + if pl <= ndp.configs.RegenAdvanceDuration { + return false + } + + // Attempt to generate a new address that is not already assigned to the IPv6 + // endpoint. + var generatedAddr tcpip.AddressWithPrefix + for i := 0; ; i++ { + // If we were unable to generate an address after the maximum SLAAC address + // local regeneration attempts, do nothing further. + if i == maxSLAACAddrLocalRegenAttempts { + return false + } + + generatedAddr = header.GenerateTempIPv6SLAACAddr(ndp.temporaryIIDHistory[:], stableAddr) + if !ndp.ep.hasPermanentAddressRLocked(generatedAddr.Address) { + break + } + } + + // As per RFC RFC 4941 section 3.3 step 5, we MUST NOT create a temporary + // address with a zero preferred lifetime. The checks above ensure this + // so we know the address is not deprecated. + addressEndpoint := ndp.addAndAcquireSLAACAddr(generatedAddr, stack.AddressConfigSlaacTemp, false /* deprecated */) + if addressEndpoint == nil { + return false + } + + state := tempSLAACAddrState{ + deprecationJob: ndp.ep.protocol.stack.NewJob(&ndp.ep.mu, func() { + prefixState, ok := ndp.slaacPrefixes[prefix] + if !ok { + panic(fmt.Sprintf("ndp: must have a slaacPrefixes entry for %s to deprecate temporary address %s", prefix, generatedAddr)) + } + + tempAddrState, ok := prefixState.tempAddrs[generatedAddr.Address] + if !ok { + panic(fmt.Sprintf("ndp: must have a tempAddr entry to deprecate temporary address %s", generatedAddr)) + } + + ndp.deprecateSLAACAddress(tempAddrState.addressEndpoint) + }), + invalidationJob: ndp.ep.protocol.stack.NewJob(&ndp.ep.mu, func() { + prefixState, ok := ndp.slaacPrefixes[prefix] + if !ok { + panic(fmt.Sprintf("ndp: must have a slaacPrefixes entry for %s to invalidate temporary address %s", prefix, generatedAddr)) + } + + tempAddrState, ok := prefixState.tempAddrs[generatedAddr.Address] + if !ok { + panic(fmt.Sprintf("ndp: must have a tempAddr entry to invalidate temporary address %s", generatedAddr)) + } + + ndp.invalidateTempSLAACAddr(prefixState.tempAddrs, generatedAddr.Address, tempAddrState) + }), + regenJob: ndp.ep.protocol.stack.NewJob(&ndp.ep.mu, func() { + prefixState, ok := ndp.slaacPrefixes[prefix] + if !ok { + panic(fmt.Sprintf("ndp: must have a slaacPrefixes entry for %s to regenerate temporary address after %s", prefix, generatedAddr)) + } + + tempAddrState, ok := prefixState.tempAddrs[generatedAddr.Address] + if !ok { + panic(fmt.Sprintf("ndp: must have a tempAddr entry to regenerate temporary address after %s", generatedAddr)) + } + + // If an address has already been regenerated for this address, don't + // regenerate another address. + if tempAddrState.regenerated { + return + } + + // Reset the generation attempts counter as we are starting the generation + // of a new address for the SLAAC prefix. + tempAddrState.regenerated = ndp.generateTempSLAACAddr(prefix, &prefixState, true /* resetGenAttempts */) + prefixState.tempAddrs[generatedAddr.Address] = tempAddrState + ndp.slaacPrefixes[prefix] = prefixState + }), + createdAt: now, + addressEndpoint: addressEndpoint, + } + + state.deprecationJob.Schedule(pl) + state.invalidationJob.Schedule(vl) + state.regenJob.Schedule(pl - ndp.configs.RegenAdvanceDuration) + + prefixState.generationAttempts++ + prefixState.tempAddrs[generatedAddr.Address] = state + + return true +} + +// regenerateTempSLAACAddr regenerates a temporary address for a SLAAC prefix. +// +// The IPv6 endpoint that ndp belongs to MUST be locked. +func (ndp *ndpState) regenerateTempSLAACAddr(prefix tcpip.Subnet, resetGenAttempts bool) { + state, ok := ndp.slaacPrefixes[prefix] + if !ok { + panic(fmt.Sprintf("ndp: SLAAC prefix state not found to regenerate temporary address for %s", prefix)) + } + + ndp.generateTempSLAACAddr(prefix, &state, resetGenAttempts) + ndp.slaacPrefixes[prefix] = state +} + +// refreshSLAACPrefixLifetimes refreshes the lifetimes of a SLAAC prefix. +// +// pl is the new preferred lifetime. vl is the new valid lifetime. +// +// The IPv6 endpoint that ndp belongs to MUST be locked. +func (ndp *ndpState) refreshSLAACPrefixLifetimes(prefix tcpip.Subnet, prefixState *slaacPrefixState, pl, vl time.Duration) { + // If the preferred lifetime is zero, then the prefix should be deprecated. + deprecated := pl == 0 + if deprecated { + ndp.deprecateSLAACAddress(prefixState.stableAddr.addressEndpoint) + } else { + prefixState.stableAddr.addressEndpoint.SetDeprecated(false) + } + + // If prefix was preferred for some finite lifetime before, cancel the + // deprecation job so it can be reset. + prefixState.deprecationJob.Cancel() + + now := ndp.ep.protocol.stack.Clock().NowMonotonic() + + // Schedule the deprecation job if prefix has a finite preferred lifetime. + if pl < header.NDPInfiniteLifetime { + if !deprecated { + prefixState.deprecationJob.Schedule(pl) + } + t := now.Add(pl) + prefixState.preferredUntil = &t + } else { + prefixState.preferredUntil = nil + } + + // As per RFC 4862 section 5.5.3.e, update the valid lifetime for prefix: + // + // 1) If the received Valid Lifetime is greater than 2 hours or greater than + // RemainingLifetime, set the valid lifetime of the prefix to the + // advertised Valid Lifetime. + // + // 2) If RemainingLifetime is less than or equal to 2 hours, ignore the + // advertised Valid Lifetime. + // + // 3) Otherwise, reset the valid lifetime of the prefix to 2 hours. + + if vl >= header.NDPInfiniteLifetime { + // Handle the infinite valid lifetime separately as we do not schedule a + // job in this case. + prefixState.invalidationJob.Cancel() + prefixState.validUntil = nil + } else { + var effectiveVl time.Duration + var rl time.Duration + + // If the prefix was originally set to be valid forever, assume the + // remaining time to be the maximum possible value. + if prefixState.validUntil == nil { + rl = header.NDPInfiniteLifetime + } else { + rl = prefixState.validUntil.Sub(now) + } + + if vl > MinPrefixInformationValidLifetimeForUpdate || vl > rl { + effectiveVl = vl + } else if rl > MinPrefixInformationValidLifetimeForUpdate { + effectiveVl = MinPrefixInformationValidLifetimeForUpdate + } + + if effectiveVl != 0 { + prefixState.invalidationJob.Cancel() + prefixState.invalidationJob.Schedule(effectiveVl) + t := now.Add(effectiveVl) + prefixState.validUntil = &t + } + } + + // If DAD is not yet complete on the stable address, there is no need to do + // work with temporary addresses. + if prefixState.stableAddr.addressEndpoint.GetKind() != stack.Permanent { + return + } + + // Note, we do not need to update the entries in the temporary address map + // after updating the jobs because the jobs are held as pointers. + var regenForAddr tcpip.Address + allAddressesRegenerated := true + for tempAddr, tempAddrState := range prefixState.tempAddrs { + // As per RFC 4941 section 3.3 step 4, the valid lifetime of a temporary + // address is the lower of the valid lifetime of the stable address or the + // maximum temporary address valid lifetime. Note, the valid lifetime of a + // temporary address is relative to the address's creation time. + validUntil := tempAddrState.createdAt.Add(ndp.configs.MaxTempAddrValidLifetime) + if prefixState.validUntil != nil && prefixState.validUntil.Before(validUntil) { + validUntil = *prefixState.validUntil + } + + // If the address is no longer valid, invalidate it immediately. Otherwise, + // reset the invalidation job. + newValidLifetime := validUntil.Sub(now) + if newValidLifetime <= 0 { + ndp.invalidateTempSLAACAddr(prefixState.tempAddrs, tempAddr, tempAddrState) + continue + } + tempAddrState.invalidationJob.Cancel() + tempAddrState.invalidationJob.Schedule(newValidLifetime) + + // As per RFC 4941 section 3.3 step 4, the preferred lifetime of a temporary + // address is the lower of the preferred lifetime of the stable address or + // the maximum temporary address preferred lifetime - the temporary address + // desync factor. Note, the preferred lifetime of a temporary address is + // relative to the address's creation time. + preferredUntil := tempAddrState.createdAt.Add(ndp.configs.MaxTempAddrPreferredLifetime - ndp.temporaryAddressDesyncFactor) + if prefixState.preferredUntil != nil && prefixState.preferredUntil.Before(preferredUntil) { + preferredUntil = *prefixState.preferredUntil + } + + // If the address is no longer preferred, deprecate it immediately. + // Otherwise, schedule the deprecation job again. + newPreferredLifetime := preferredUntil.Sub(now) + tempAddrState.deprecationJob.Cancel() + + if newPreferredLifetime <= 0 { + ndp.deprecateSLAACAddress(tempAddrState.addressEndpoint) + } else { + tempAddrState.addressEndpoint.SetDeprecated(false) + tempAddrState.deprecationJob.Schedule(newPreferredLifetime) + } + + tempAddrState.regenJob.Cancel() + if tempAddrState.regenerated { + } else { + allAddressesRegenerated = false + + if newPreferredLifetime <= ndp.configs.RegenAdvanceDuration { + // The new preferred lifetime is less than the advance regeneration + // duration so regenerate an address for this temporary address + // immediately after we finish iterating over the temporary addresses. + regenForAddr = tempAddr + } else { + tempAddrState.regenJob.Schedule(newPreferredLifetime - ndp.configs.RegenAdvanceDuration) + } + } + } + + // Generate a new temporary address if all of the existing temporary addresses + // have been regenerated, or we need to immediately regenerate an address + // due to an update in preferred lifetime. + // + // If each temporay address has already been regenerated, no new temporary + // address is generated. To ensure continuation of temporary SLAAC addresses, + // we manually try to regenerate an address here. + if len(regenForAddr) != 0 || allAddressesRegenerated { + // Reset the generation attempts counter as we are starting the generation + // of a new address for the SLAAC prefix. + if state, ok := prefixState.tempAddrs[regenForAddr]; ndp.generateTempSLAACAddr(prefix, prefixState, true /* resetGenAttempts */) && ok { + state.regenerated = true + prefixState.tempAddrs[regenForAddr] = state + } + } +} + +// deprecateSLAACAddress marks the address as deprecated and notifies the NDP +// dispatcher that address has been deprecated. +// +// deprecateSLAACAddress does nothing if the address is already deprecated. +// +// The IPv6 endpoint that ndp belongs to MUST be locked. +func (ndp *ndpState) deprecateSLAACAddress(addressEndpoint stack.AddressEndpoint) { + if addressEndpoint.Deprecated() { + return + } + + addressEndpoint.SetDeprecated(true) + if ndpDisp := ndp.ep.protocol.options.NDPDisp; ndpDisp != nil { + ndpDisp.OnAutoGenAddressDeprecated(ndp.ep.nic.ID(), addressEndpoint.AddressWithPrefix()) + } +} + +// invalidateSLAACPrefix invalidates a SLAAC prefix. +// +// The IPv6 endpoint that ndp belongs to MUST be locked. +func (ndp *ndpState) invalidateSLAACPrefix(prefix tcpip.Subnet, state slaacPrefixState) { + ndp.cleanupSLAACPrefixResources(prefix, state) + + if addressEndpoint := state.stableAddr.addressEndpoint; addressEndpoint != nil { + if ndpDisp := ndp.ep.protocol.options.NDPDisp; ndpDisp != nil { + ndpDisp.OnAutoGenAddressInvalidated(ndp.ep.nic.ID(), addressEndpoint.AddressWithPrefix()) + } + + if err := ndp.ep.removePermanentEndpointInnerLocked(addressEndpoint, &stack.DADAborted{}); err != nil { + panic(fmt.Sprintf("ndp: error removing stable SLAAC address %s: %s", addressEndpoint.AddressWithPrefix(), err)) + } + } +} + +// cleanupSLAACAddrResourcesAndNotify cleans up an invalidated SLAAC address's +// resources. +// +// The IPv6 endpoint that ndp belongs to MUST be locked. +func (ndp *ndpState) cleanupSLAACAddrResourcesAndNotify(addr tcpip.AddressWithPrefix, invalidatePrefix bool) { + if ndpDisp := ndp.ep.protocol.options.NDPDisp; ndpDisp != nil { + ndpDisp.OnAutoGenAddressInvalidated(ndp.ep.nic.ID(), addr) + } + + prefix := addr.Subnet() + state, ok := ndp.slaacPrefixes[prefix] + if !ok || state.stableAddr.addressEndpoint == nil || addr.Address != state.stableAddr.addressEndpoint.AddressWithPrefix().Address { + return + } + + if !invalidatePrefix { + // If the prefix is not being invalidated, disassociate the address from the + // prefix and do nothing further. + state.stableAddr.addressEndpoint.DecRef() + state.stableAddr.addressEndpoint = nil + ndp.slaacPrefixes[prefix] = state + return + } + + ndp.cleanupSLAACPrefixResources(prefix, state) +} + +// cleanupSLAACPrefixResources cleans up a SLAAC prefix's jobs and entry. +// +// Panics if the SLAAC prefix is not known. +// +// The IPv6 endpoint that ndp belongs to MUST be locked. +func (ndp *ndpState) cleanupSLAACPrefixResources(prefix tcpip.Subnet, state slaacPrefixState) { + // Invalidate all temporary addresses. + for tempAddr, tempAddrState := range state.tempAddrs { + ndp.invalidateTempSLAACAddr(state.tempAddrs, tempAddr, tempAddrState) + } + + if state.stableAddr.addressEndpoint != nil { + state.stableAddr.addressEndpoint.DecRef() + state.stableAddr.addressEndpoint = nil + } + state.deprecationJob.Cancel() + state.invalidationJob.Cancel() + delete(ndp.slaacPrefixes, prefix) +} + +// invalidateTempSLAACAddr invalidates a temporary SLAAC address. +// +// The IPv6 endpoint that ndp belongs to MUST be locked. +func (ndp *ndpState) invalidateTempSLAACAddr(tempAddrs map[tcpip.Address]tempSLAACAddrState, tempAddr tcpip.Address, tempAddrState tempSLAACAddrState) { + ndp.cleanupTempSLAACAddrResourcesAndNotifyInner(tempAddrs, tempAddr, tempAddrState) + + if err := ndp.ep.removePermanentEndpointInnerLocked(tempAddrState.addressEndpoint, &stack.DADAborted{}); err != nil { + panic(fmt.Sprintf("error removing temporary SLAAC address %s: %s", tempAddrState.addressEndpoint.AddressWithPrefix(), err)) + } +} + +// cleanupTempSLAACAddrResourcesAndNotify cleans up an invalidated temporary +// SLAAC address's resources from ndp and notifies the NDP dispatcher that the +// address was invalidated. +// +// The IPv6 endpoint that ndp belongs to MUST be locked. +func (ndp *ndpState) cleanupTempSLAACAddrResourcesAndNotify(addr tcpip.AddressWithPrefix) { + prefix := addr.Subnet() + state, ok := ndp.slaacPrefixes[prefix] + if !ok { + panic(fmt.Sprintf("ndp: must have a slaacPrefixes entry to clean up temp addr %s resources", addr)) + } + + tempAddrState, ok := state.tempAddrs[addr.Address] + if !ok { + panic(fmt.Sprintf("ndp: must have a tempAddr entry to clean up temp addr %s resources", addr)) + } + + ndp.cleanupTempSLAACAddrResourcesAndNotifyInner(state.tempAddrs, addr.Address, tempAddrState) +} + +// cleanupTempSLAACAddrResourcesAndNotifyInner is like +// cleanupTempSLAACAddrResourcesAndNotify except it does not lookup the +// temporary address's state in ndp - it assumes the passed state is valid. +// +// The IPv6 endpoint that ndp belongs to MUST be locked. +func (ndp *ndpState) cleanupTempSLAACAddrResourcesAndNotifyInner(tempAddrs map[tcpip.Address]tempSLAACAddrState, tempAddr tcpip.Address, tempAddrState tempSLAACAddrState) { + if ndpDisp := ndp.ep.protocol.options.NDPDisp; ndpDisp != nil { + ndpDisp.OnAutoGenAddressInvalidated(ndp.ep.nic.ID(), tempAddrState.addressEndpoint.AddressWithPrefix()) + } + + tempAddrState.addressEndpoint.DecRef() + tempAddrState.addressEndpoint = nil + tempAddrState.deprecationJob.Cancel() + tempAddrState.invalidationJob.Cancel() + tempAddrState.regenJob.Cancel() + delete(tempAddrs, tempAddr) +} + +// cleanupState cleans up ndp's state. +// +// This function invalidates all discovered on-link prefixes, discovered +// routers, and auto-generated addresses. +// +// The IPv6 endpoint that ndp belongs to MUST be locked. +func (ndp *ndpState) cleanupState() { + for prefix, state := range ndp.slaacPrefixes { + ndp.invalidateSLAACPrefix(prefix, state) + } + + for prefix := range ndp.onLinkPrefixes { + ndp.invalidateOnLinkPrefix(prefix) + } + + if got := len(ndp.onLinkPrefixes); got != 0 { + panic(fmt.Sprintf("ndp: still have discovered on-link prefixes after cleaning up; found = %d", got)) + } + + for route := range ndp.offLinkRoutes { + ndp.invalidateOffLinkRoute(route) + } + + if got := len(ndp.offLinkRoutes); got != 0 { + panic(fmt.Sprintf("ndp: still have discovered off-link routes after cleaning up; found = %d", got)) + } + + ndp.dhcpv6Configuration = 0 +} + +// startSolicitingRouters starts soliciting routers, as per RFC 4861 section +// 6.3.7. If routers are already being solicited, this function does nothing. +// +// If ndp is not configured to handle Router Advertisements, routers will not +// be solicited as there is no point soliciting routers if we don't handle their +// advertisements. +// +// The IPv6 endpoint that ndp belongs to MUST be locked. +func (ndp *ndpState) startSolicitingRouters() { + if ndp.rtrSolicitTimer.timer != nil { + // We are already soliciting routers. + return + } + + remaining := ndp.configs.MaxRtrSolicitations + if remaining == 0 { + return + } + + if !ndp.configs.HandleRAs.enabled(ndp.ep.Forwarding()) { + return + } + + // Calculate the random delay before sending our first RS, as per RFC + // 4861 section 6.3.7. + var delay time.Duration + if ndp.configs.MaxRtrSolicitationDelay > 0 { + delay = time.Duration(ndp.ep.protocol.stack.Rand().Int63n(int64(ndp.configs.MaxRtrSolicitationDelay))) + } + + // Protected by ndp.ep.mu. + done := false + + ndp.rtrSolicitTimer = timer{ + done: &done, + timer: ndp.ep.protocol.stack.Clock().AfterFunc(delay, func() { + // As per RFC 4861 section 4.1: + // + // IP Fields: + // Source Address + // An IP address assigned to the sending interface, or + // the unspecified address if no address is assigned + // to the sending interface. + localAddr := header.IPv6Any + if addressEndpoint := ndp.ep.AcquireOutgoingPrimaryAddress(header.IPv6AllRoutersLinkLocalMulticastAddress, false); addressEndpoint != nil { + localAddr = addressEndpoint.AddressWithPrefix().Address + addressEndpoint.DecRef() + } + + // As per RFC 4861 section 4.1, an NDP RS SHOULD include the source + // link-layer address option if the source address of the NDP RS is + // specified. This option MUST NOT be included if the source address is + // unspecified. + // + // TODO(b/141011931): Validate a LinkEndpoint's link address (provided by + // LinkEndpoint.LinkAddress) before reaching this point. + var optsSerializer header.NDPOptionsSerializer + linkAddress := ndp.ep.nic.LinkAddress() + if localAddr != header.IPv6Any && header.IsValidUnicastEthernetAddress(linkAddress) { + optsSerializer = header.NDPOptionsSerializer{ + header.NDPSourceLinkLayerAddressOption(linkAddress), + } + } + payloadSize := header.ICMPv6HeaderSize + header.NDPRSMinimumSize + optsSerializer.Length() + icmpData := header.ICMPv6(buffer.NewView(payloadSize)) + icmpData.SetType(header.ICMPv6RouterSolicit) + rs := header.NDPRouterSolicit(icmpData.MessageBody()) + rs.Options().Serialize(optsSerializer) + icmpData.SetChecksum(header.ICMPv6Checksum(header.ICMPv6ChecksumParams{ + Header: icmpData, + Src: localAddr, + Dst: header.IPv6AllRoutersLinkLocalMulticastAddress, + })) + + pkt := stack.NewPacketBuffer(stack.PacketBufferOptions{ + ReserveHeaderBytes: int(ndp.ep.MaxHeaderLength()), + Data: buffer.View(icmpData).ToVectorisedView(), + }) + + sent := ndp.ep.stats.icmp.packetsSent + if err := addIPHeader(localAddr, header.IPv6AllRoutersLinkLocalMulticastAddress, pkt, stack.NetworkHeaderParams{ + Protocol: header.ICMPv6ProtocolNumber, + TTL: header.NDPHopLimit, + }, nil /* extensionHeaders */); err != nil { + panic(fmt.Sprintf("failed to add IP header: %s", err)) + } + + if err := ndp.ep.nic.WritePacketToRemote(header.EthernetAddressFromMulticastIPv6Address(header.IPv6AllRoutersLinkLocalMulticastAddress), ProtocolNumber, pkt); err != nil { + sent.dropped.Increment() + // Don't send any more messages if we had an error. + remaining = 0 + } else { + sent.routerSolicit.Increment() + remaining-- + } + + ndp.ep.mu.Lock() + defer ndp.ep.mu.Unlock() + + if done { + // Router solicitation was stopped. + return + } + + if remaining == 0 { + // We are done soliciting routers. + ndp.stopSolicitingRouters() + return + } + + ndp.rtrSolicitTimer.timer.Reset(ndp.configs.RtrSolicitationInterval) + }), + } +} + +// forwardingChanged handles a change in forwarding configuration. +// +// If transitioning to a host, router solicitation will be started. Otherwise, +// router solicitation will be stopped if NDP is not configured to handle RAs +// as a router. +// +// Precondition: ndp.ep.mu must be locked. +func (ndp *ndpState) forwardingChanged(forwarding bool) { + if forwarding { + if ndp.configs.HandleRAs.enabled(forwarding) { + return + } + + ndp.stopSolicitingRouters() + return + } + + // Solicit routers when transitioning to a host. + // + // If the endpoint is not currently enabled, routers will be solicited when + // the endpoint becomes enabled (if it is still a host). + if ndp.ep.Enabled() { + ndp.startSolicitingRouters() + } +} + +// stopSolicitingRouters stops soliciting routers. If routers are not currently +// being solicited, this function does nothing. +// +// The IPv6 endpoint that ndp belongs to MUST be locked. +func (ndp *ndpState) stopSolicitingRouters() { + if ndp.rtrSolicitTimer.timer == nil { + // Nothing to do. + return + } + + ndp.rtrSolicitTimer.timer.Stop() + *ndp.rtrSolicitTimer.done = true + ndp.rtrSolicitTimer = timer{} +} + +func (ndp *ndpState) init(ep *endpoint, dadOptions ip.DADOptions) { + if ndp.offLinkRoutes != nil { + panic("attempted to initialize NDP state twice") + } + + ndp.ep = ep + ndp.configs = ep.protocol.options.NDPConfigs + ndp.dad.Init(&ndp.ep.mu, ep.protocol.options.DADConfigs, dadOptions) + ndp.offLinkRoutes = make(map[offLinkRoute]offLinkRouteState) + ndp.onLinkPrefixes = make(map[tcpip.Subnet]onLinkPrefixState) + ndp.slaacPrefixes = make(map[tcpip.Subnet]slaacPrefixState) + + header.InitialTempIID(ndp.temporaryIIDHistory[:], ndp.ep.protocol.options.TempIIDSeed, ndp.ep.nic.ID()) + ndp.temporaryAddressDesyncFactor = time.Duration(ep.protocol.stack.Rand().Int63n(int64(MaxDesyncFactor))) +} + +func (ndp *ndpState) SendDADMessage(addr tcpip.Address, nonce []byte) tcpip.Error { + snmc := header.SolicitedNodeAddr(addr) + return ndp.ep.sendNDPNS(header.IPv6Any, snmc, addr, header.EthernetAddressFromMulticastIPv6Address(snmc), header.NDPOptionsSerializer{ + header.NDPNonceOption(nonce), + }) +} + +func (e *endpoint) sendNDPNS(srcAddr, dstAddr, targetAddr tcpip.Address, remoteLinkAddr tcpip.LinkAddress, opts header.NDPOptionsSerializer) tcpip.Error { + icmp := header.ICMPv6(buffer.NewView(header.ICMPv6NeighborSolicitMinimumSize + opts.Length())) + icmp.SetType(header.ICMPv6NeighborSolicit) + ns := header.NDPNeighborSolicit(icmp.MessageBody()) + ns.SetTargetAddress(targetAddr) + ns.Options().Serialize(opts) + icmp.SetChecksum(header.ICMPv6Checksum(header.ICMPv6ChecksumParams{ + Header: icmp, + Src: srcAddr, + Dst: dstAddr, + })) + + pkt := stack.NewPacketBuffer(stack.PacketBufferOptions{ + ReserveHeaderBytes: int(e.MaxHeaderLength()), + Data: buffer.View(icmp).ToVectorisedView(), + }) + + if err := addIPHeader(srcAddr, dstAddr, pkt, stack.NetworkHeaderParams{ + Protocol: header.ICMPv6ProtocolNumber, + TTL: header.NDPHopLimit, + }, nil /* extensionHeaders */); err != nil { + panic(fmt.Sprintf("failed to add IP header: %s", err)) + } + + sent := e.stats.icmp.packetsSent + err := e.nic.WritePacketToRemote(remoteLinkAddr, ProtocolNumber, pkt) + if err != nil { + sent.dropped.Increment() + } else { + sent.neighborSolicit.Increment() + } + return err +} diff --git a/vendor/gvisor.dev/gvisor/pkg/tcpip/network/ipv6/stats.go b/vendor/gvisor.dev/gvisor/pkg/tcpip/network/ipv6/stats.go new file mode 100644 index 000000000..2f18f60e8 --- /dev/null +++ b/vendor/gvisor.dev/gvisor/pkg/tcpip/network/ipv6/stats.go @@ -0,0 +1,136 @@ +// Copyright 2020 The gVisor Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package ipv6 + +import ( + "gvisor.dev/gvisor/pkg/tcpip" + "gvisor.dev/gvisor/pkg/tcpip/network/internal/ip" + "gvisor.dev/gvisor/pkg/tcpip/stack" +) + +var _ stack.IPNetworkEndpointStats = (*Stats)(nil) + +// Stats holds statistics related to the IPv6 protocol family. +type Stats struct { + // IP holds IPv6 statistics. + IP tcpip.IPStats + + // ICMP holds ICMPv6 statistics. + ICMP tcpip.ICMPv6Stats + + // UnhandledRouterAdvertisements is the number of Router Advertisements that + // were observed but not handled. + UnhandledRouterAdvertisements *tcpip.StatCounter +} + +// IsNetworkEndpointStats implements stack.NetworkEndpointStats. +func (*Stats) IsNetworkEndpointStats() {} + +// IPStats implements stack.IPNetworkEndointStats +func (s *Stats) IPStats() *tcpip.IPStats { + return &s.IP +} + +type sharedStats struct { + localStats Stats + ip ip.MultiCounterIPStats + icmp multiCounterICMPv6Stats +} + +// LINT.IfChange(multiCounterICMPv6PacketStats) + +type multiCounterICMPv6PacketStats struct { + echoRequest tcpip.MultiCounterStat + echoReply tcpip.MultiCounterStat + dstUnreachable tcpip.MultiCounterStat + packetTooBig tcpip.MultiCounterStat + timeExceeded tcpip.MultiCounterStat + paramProblem tcpip.MultiCounterStat + routerSolicit tcpip.MultiCounterStat + routerAdvert tcpip.MultiCounterStat + neighborSolicit tcpip.MultiCounterStat + neighborAdvert tcpip.MultiCounterStat + redirectMsg tcpip.MultiCounterStat + multicastListenerQuery tcpip.MultiCounterStat + multicastListenerReport tcpip.MultiCounterStat + multicastListenerDone tcpip.MultiCounterStat +} + +func (m *multiCounterICMPv6PacketStats) init(a, b *tcpip.ICMPv6PacketStats) { + m.echoRequest.Init(a.EchoRequest, b.EchoRequest) + m.echoReply.Init(a.EchoReply, b.EchoReply) + m.dstUnreachable.Init(a.DstUnreachable, b.DstUnreachable) + m.packetTooBig.Init(a.PacketTooBig, b.PacketTooBig) + m.timeExceeded.Init(a.TimeExceeded, b.TimeExceeded) + m.paramProblem.Init(a.ParamProblem, b.ParamProblem) + m.routerSolicit.Init(a.RouterSolicit, b.RouterSolicit) + m.routerAdvert.Init(a.RouterAdvert, b.RouterAdvert) + m.neighborSolicit.Init(a.NeighborSolicit, b.NeighborSolicit) + m.neighborAdvert.Init(a.NeighborAdvert, b.NeighborAdvert) + m.redirectMsg.Init(a.RedirectMsg, b.RedirectMsg) + m.multicastListenerQuery.Init(a.MulticastListenerQuery, b.MulticastListenerQuery) + m.multicastListenerReport.Init(a.MulticastListenerReport, b.MulticastListenerReport) + m.multicastListenerDone.Init(a.MulticastListenerDone, b.MulticastListenerDone) +} + +// LINT.ThenChange(../../tcpip.go:ICMPv6PacketStats) + +// LINT.IfChange(multiCounterICMPv6SentPacketStats) + +type multiCounterICMPv6SentPacketStats struct { + multiCounterICMPv6PacketStats + dropped tcpip.MultiCounterStat + rateLimited tcpip.MultiCounterStat +} + +func (m *multiCounterICMPv6SentPacketStats) init(a, b *tcpip.ICMPv6SentPacketStats) { + m.multiCounterICMPv6PacketStats.init(&a.ICMPv6PacketStats, &b.ICMPv6PacketStats) + m.dropped.Init(a.Dropped, b.Dropped) + m.rateLimited.Init(a.RateLimited, b.RateLimited) +} + +// LINT.ThenChange(../../tcpip.go:ICMPv6SentPacketStats) + +// LINT.IfChange(multiCounterICMPv6ReceivedPacketStats) + +type multiCounterICMPv6ReceivedPacketStats struct { + multiCounterICMPv6PacketStats + unrecognized tcpip.MultiCounterStat + invalid tcpip.MultiCounterStat + routerOnlyPacketsDroppedByHost tcpip.MultiCounterStat +} + +func (m *multiCounterICMPv6ReceivedPacketStats) init(a, b *tcpip.ICMPv6ReceivedPacketStats) { + m.multiCounterICMPv6PacketStats.init(&a.ICMPv6PacketStats, &b.ICMPv6PacketStats) + m.unrecognized.Init(a.Unrecognized, b.Unrecognized) + m.invalid.Init(a.Invalid, b.Invalid) + m.routerOnlyPacketsDroppedByHost.Init(a.RouterOnlyPacketsDroppedByHost, b.RouterOnlyPacketsDroppedByHost) +} + +// LINT.ThenChange(../../tcpip.go:ICMPv6ReceivedPacketStats) + +// LINT.IfChange(multiCounterICMPv6Stats) + +type multiCounterICMPv6Stats struct { + packetsSent multiCounterICMPv6SentPacketStats + packetsReceived multiCounterICMPv6ReceivedPacketStats +} + +func (m *multiCounterICMPv6Stats) init(a, b *tcpip.ICMPv6Stats) { + m.packetsSent.init(&a.PacketsSent, &b.PacketsSent) + m.packetsReceived.init(&a.PacketsReceived, &b.PacketsReceived) +} + +// LINT.ThenChange(../../tcpip.go:ICMPv6Stats) diff --git a/vendor/modules.txt b/vendor/modules.txt index 3427b5364..3e3c83529 100644 --- a/vendor/modules.txt +++ b/vendor/modules.txt @@ -198,6 +198,7 @@ gvisor.dev/gvisor/pkg/tcpip/network/hash gvisor.dev/gvisor/pkg/tcpip/network/internal/fragmentation gvisor.dev/gvisor/pkg/tcpip/network/internal/ip gvisor.dev/gvisor/pkg/tcpip/network/ipv4 +gvisor.dev/gvisor/pkg/tcpip/network/ipv6 gvisor.dev/gvisor/pkg/tcpip/ports gvisor.dev/gvisor/pkg/tcpip/seqnum gvisor.dev/gvisor/pkg/tcpip/stack From 8706468c9bb1bed0c7eacaa601d0752c2a9a801b Mon Sep 17 00:00:00 2001 From: Guillaume Rose Date: Thu, 30 Sep 2021 12:16:51 +0200 Subject: [PATCH 5/6] Add basic e2e tests for ipv6 --- test/basic_test.go | 20 ++++++++++++++++++++ 1 file changed, 20 insertions(+) diff --git a/test/basic_test.go b/test/basic_test.go index 7b7fadfac..51efcecba 100644 --- a/test/basic_test.go +++ b/test/basic_test.go @@ -58,4 +58,24 @@ var _ = Describe("dns", func() { Expect(err).ShouldNot(HaveOccurred()) Expect(string(out)).To(ContainSubstring("Address: 192.168.127.254")) }) + + It("should resolve ipv6", func() { + out, err := sshExec("nslookup ipv6.google.com") + Expect(err).ShouldNot(HaveOccurred()) + Expect(string(out)).To(ContainSubstring("Address: 2a00:1450:4007:810::200e")) + }) +}) + +var _ = Describe("ipv6", func() { + It("tcp should work", func() { + out, err := sshExec("curl ipv6.google.com") + Expect(err).ShouldNot(HaveOccurred()) + Expect(string(out)).To(ContainSubstring("")) + }) + + It("udp should work", func() { + out, err := sshExec("dig ipv6.google.com @2001:4860:4860::8888") + Expect(err).ShouldNot(HaveOccurred()) + Expect(string(out)).To(ContainSubstring("opcode: QUERY, status: NOERROR")) + }) }) From 0e796927e5702a743b1f47527b1a208a6682f193 Mon Sep 17 00:00:00 2001 From: Guillaume Rose Date: Thu, 30 Sep 2021 12:07:21 +0200 Subject: [PATCH 6/6] Hacks for ipv6 --- pkg/tap/link.go | 13 ++++++ pkg/tap/router_advertisement.go | 83 +++++++++++++++++++++++++++++++++ pkg/tap/switch.go | 14 ++++++ 3 files changed, 110 insertions(+) create mode 100644 pkg/tap/router_advertisement.go diff --git a/pkg/tap/link.go b/pkg/tap/link.go index 2b0ad2501..2d61e62bf 100644 --- a/pkg/tap/link.go +++ b/pkg/tap/link.go @@ -9,7 +9,9 @@ import ( "gvisor.dev/gvisor/pkg/tcpip" "gvisor.dev/gvisor/pkg/tcpip/buffer" "gvisor.dev/gvisor/pkg/tcpip/header" + "gvisor.dev/gvisor/pkg/tcpip/network/ipv6" "gvisor.dev/gvisor/pkg/tcpip/stack" + "gvisor.dev/gvisor/pkg/tcpip/transport/icmp" ) type LinkEndpoint struct { @@ -111,6 +113,17 @@ func (e *LinkEndpoint) WritePacket(r stack.RouteInfo, protocol tcpip.NetworkProt } } + if pkt.NetworkProtocolNumber == ipv6.ProtocolNumber && pkt.TransportProtocolNumber == icmp.ProtocolNumber6 { + transportLayer := header.ICMPv6(pkt.TransportHeader().View()) + if transportLayer.Type() == header.ICMPv6NeighborAdvert { + ip := header.NDPNeighborAdvert(transportLayer.MessageBody()).TargetAddress().String() + if ip != "fe80::1" { + log.Debugf("dropping spoofing packets from the gateway about IP %s", ip) + return nil + } + } + } + if e.debug { vv := buffer.NewVectorisedView(pkt.Size(), pkt.Views()) packet := gopacket.NewPacket(vv.ToView(), layers.LayerTypeEthernet, gopacket.Default) diff --git a/pkg/tap/router_advertisement.go b/pkg/tap/router_advertisement.go new file mode 100644 index 000000000..c65469d8d --- /dev/null +++ b/pkg/tap/router_advertisement.go @@ -0,0 +1,83 @@ +package tap + +import ( + "encoding/binary" + + "gvisor.dev/gvisor/pkg/tcpip" + "gvisor.dev/gvisor/pkg/tcpip/buffer" + "gvisor.dev/gvisor/pkg/tcpip/header" + "gvisor.dev/gvisor/pkg/tcpip/network/ipv6" + "gvisor.dev/gvisor/pkg/tcpip/stack" + "gvisor.dev/gvisor/pkg/tcpip/transport/icmp" +) + +// raBuf returns a valid NDP Router Advertisement with options, router +// preference and DHCPv6 configurations specified. +func raBuf(src, dst tcpip.LinkAddress, ip tcpip.Address, rl uint16, managedAddress, otherConfigurations bool, prf header.NDPRoutePreference, optSer header.NDPOptionsSerializer) *stack.PacketBuffer { + const flagsByte = 1 + const routerLifetimeOffset = 2 + + icmpSize := header.ICMPv6HeaderSize + header.NDPRAMinimumSize + optSer.Length() + hdr := buffer.NewPrependable(header.EthernetMinimumSize + header.IPv6MinimumSize + icmpSize) + pkt := header.ICMPv6(hdr.Prepend(icmpSize)) + pkt.SetType(header.ICMPv6RouterAdvert) + pkt.SetCode(0) + raPayload := pkt.MessageBody() + ra := header.NDPRouterAdvert(raPayload) + // Populate the Router Lifetime. + binary.BigEndian.PutUint16(raPayload[routerLifetimeOffset:], rl) + // Populate the Managed Address flag field. + if managedAddress { + // The Managed Addresses flag field is the 7th bit of the flags byte. + raPayload[flagsByte] |= 1 << 7 + } + // Populate the Other Configurations flag field. + if otherConfigurations { + // The Other Configurations flag field is the 6th bit of the flags byte. + raPayload[flagsByte] |= 1 << 6 + } + // The Prf field is held in the flags byte. + raPayload[flagsByte] |= byte(prf) << 3 + opts := ra.Options() + opts.Serialize(optSer) + pkt.SetChecksum(header.ICMPv6Checksum(header.ICMPv6ChecksumParams{ + Header: pkt, + Src: ip, + Dst: header.IPv6AllNodesMulticastAddress, + })) + payloadLength := hdr.UsedLength() + iph := header.IPv6(hdr.Prepend(header.IPv6MinimumSize)) + iph.Encode(&header.IPv6Fields{ + PayloadLength: uint16(payloadLength), + TransportProtocol: icmp.ProtocolNumber6, + HopLimit: header.NDPHopLimit, + SrcAddr: ip, + DstAddr: header.IPv6AllNodesMulticastAddress, + }) + + eth := header.Ethernet(hdr.Prepend(header.EthernetMinimumSize)) + eth.Encode(&header.EthernetFields{ + Type: ipv6.ProtocolNumber, + SrcAddr: src, + DstAddr: dst, + }) + return stack.NewPacketBuffer(stack.PacketBufferOptions{ + Data: hdr.View().ToVectorisedView(), + }) +} + +// raBufWithOpts returns a valid NDP Router Advertisement with options. +// +// Note, raBufWithOpts does not populate any of the RA fields other than the +// Router Lifetime. +func raBufWithOpts(src, dst tcpip.LinkAddress, ip tcpip.Address, rl uint16, optSer header.NDPOptionsSerializer) *stack.PacketBuffer { + return raBuf(src, dst, ip, rl, false /* managedAddress */, false /* otherConfigurations */, 0 /* prf */, optSer) +} + +// raBuf returns a valid NDP Router Advertisement. +// +// Note, raBuf does not populate any of the RA fields other than the +// Router Lifetime. +func raBufSimple(src, dst tcpip.LinkAddress, ip tcpip.Address, rl uint16) *stack.PacketBuffer { + return raBufWithOpts(src, dst, ip, rl, header.NDPOptionsSerializer{}) +} diff --git a/pkg/tap/switch.go b/pkg/tap/switch.go index 0a7a3343b..1f7a761e8 100644 --- a/pkg/tap/switch.go +++ b/pkg/tap/switch.go @@ -15,6 +15,7 @@ import ( "gvisor.dev/gvisor/pkg/tcpip" "gvisor.dev/gvisor/pkg/tcpip/buffer" "gvisor.dev/gvisor/pkg/tcpip/header" + "gvisor.dev/gvisor/pkg/tcpip/network/ipv6" "gvisor.dev/gvisor/pkg/tcpip/stack" ) @@ -217,6 +218,19 @@ loop: e.cam[eth.SourceAddress()] = id e.camLock.Unlock() + if eth.Type() == ipv6.ProtocolNumber { + networkLayer := header.IPv6(buf[header.EthernetMinimumSize:]) + if networkLayer.TransportProtocol() == header.ICMPv6ProtocolNumber { + transportLayer := header.ICMPv6(networkLayer.Payload()) + if transportLayer.Type() == header.ICMPv6RouterSolicit { + routerAdvertisement := raBufSimple(e.gateway.LinkAddress(), eth.SourceAddress(), tcpip.Address(net.ParseIP("fe80::1")), 1000) + if err := e.tx(e.gateway.LinkAddress(), eth.SourceAddress(), routerAdvertisement); err != nil { + log.Error(err) + } + } + } + } + if eth.DestinationAddress() != e.gateway.LinkAddress() { if err := e.tx(eth.SourceAddress(), eth.DestinationAddress(), stack.NewPacketBuffer(stack.PacketBufferOptions{ Data: vv,