quinn-udp-0.4.1/.cargo_vcs_info.json0000644000000001470000000000100127600ustar { "git": { "sha1": "f0b4a31ef4a4758ef2ec75d56c79804bb2370b96" }, "path_in_vcs": "quinn-udp" }quinn-udp-0.4.1/Cargo.toml0000644000000023130000000000100107530ustar # THIS FILE IS AUTOMATICALLY GENERATED BY CARGO # # When uploading crates to the registry Cargo will automatically # "normalize" Cargo.toml files for maximal compatibility # with all versions of Cargo and also rewrite `path` dependencies # to registry (e.g., crates.io) dependencies. # # If you are reading this file be aware that the original Cargo.toml # will likely look very different (and much more reasonable). # See Cargo.toml.orig for the original contents. [package] edition = "2021" rust-version = "1.63" name = "quinn-udp" version = "0.4.1" description = "UDP sockets with ECN information for the QUIC transport protocol" keywords = ["quic"] categories = [ "network-programming", "asynchronous", ] license = "MIT OR Apache-2.0" repository = "https://github.com/quinn-rs/quinn" resolver = "1" [package.metadata.docs.rs] all-features = true [dependencies.bytes] version = "1" [dependencies.libc] version = "0.2.113" [dependencies.socket2] version = "0.5" [dependencies.tracing] version = "0.1.10" [features] default = ["log"] log = ["tracing/log"] [target."cfg(windows)".dependencies.windows-sys] version = "0.48.0" features = ["Win32_Networking_WinSock"] [badges.maintenance] status = "experimental" quinn-udp-0.4.1/Cargo.toml.orig000064400000000000000000000013611046102023000144360ustar 00000000000000[package] name = "quinn-udp" version = "0.4.1" edition = "2021" rust-version = "1.63" license = "MIT OR Apache-2.0" repository = "https://github.com/quinn-rs/quinn" description = "UDP sockets with ECN information for the QUIC transport protocol" keywords = ["quic"] categories = ["network-programming", "asynchronous"] workspace = ".." [package.metadata.docs.rs] all-features = true [features] default = ["log"] # Write logs via the `log` crate when no `tracing` subscriber exists log = ["tracing/log"] [badges] maintenance = { status = "experimental" } [dependencies] bytes = "1" libc = "0.2.113" socket2 = "0.5" tracing = "0.1.10" [target.'cfg(windows)'.dependencies] windows-sys = { version = "0.48.0", features = ["Win32_Networking_WinSock"] } quinn-udp-0.4.1/src/cmsg.rs000064400000000000000000000075451046102023000136470ustar 00000000000000use std::{mem, ptr}; #[derive(Copy, Clone)] #[repr(align(8))] // Conservative bound for align_of pub(crate) struct Aligned(pub(crate) T); /// Helper to encode a series of control messages ("cmsgs") to a buffer for use in `sendmsg`. /// /// The operation must be "finished" for the msghdr to be usable, either by calling `finish` /// explicitly or by dropping the `Encoder`. pub(crate) struct Encoder<'a> { hdr: &'a mut libc::msghdr, cmsg: Option<&'a mut libc::cmsghdr>, len: usize, } impl<'a> Encoder<'a> { /// # Safety /// - `hdr.msg_control` must be a suitably aligned pointer to `hdr.msg_controllen` bytes that /// can be safely written /// - The `Encoder` must be dropped before `hdr` is passed to a system call, and must not be leaked. pub(crate) unsafe fn new(hdr: &'a mut libc::msghdr) -> Self { Self { cmsg: libc::CMSG_FIRSTHDR(hdr).as_mut(), hdr, len: 0, } } /// Append a control message to the buffer. /// /// # Panics /// - If insufficient buffer space remains. /// - If `T` has stricter alignment requirements than `cmsghdr` pub(crate) fn push(&mut self, level: libc::c_int, ty: libc::c_int, value: T) { assert!(mem::align_of::() <= mem::align_of::()); let space = unsafe { libc::CMSG_SPACE(mem::size_of_val(&value) as _) as usize }; #[allow(clippy::unnecessary_cast)] // hdr.msg_controllen defined as size_t { assert!( self.hdr.msg_controllen as usize >= self.len + space, "control message buffer too small. Required: {}, Available: {}", self.len + space, self.hdr.msg_controllen ); } let cmsg = self.cmsg.take().expect("no control buffer space remaining"); cmsg.cmsg_level = level; cmsg.cmsg_type = ty; cmsg.cmsg_len = unsafe { libc::CMSG_LEN(mem::size_of_val(&value) as _) } as _; unsafe { ptr::write(libc::CMSG_DATA(cmsg) as *const T as *mut T, value); } self.len += space; self.cmsg = unsafe { libc::CMSG_NXTHDR(self.hdr, cmsg).as_mut() }; } /// Finishes appending control messages to the buffer pub(crate) fn finish(self) { // Delegates to the `Drop` impl } } // Statically guarantees that the encoding operation is "finished" before the control buffer is read // by `sendmsg`. impl<'a> Drop for Encoder<'a> { fn drop(&mut self) { self.hdr.msg_controllen = self.len as _; } } /// # Safety /// /// `cmsg` must refer to a cmsg containing a payload of type `T` pub(crate) unsafe fn decode(cmsg: &libc::cmsghdr) -> T { assert!(mem::align_of::() <= mem::align_of::()); #[allow(clippy::unnecessary_cast)] // cmsg.cmsg_len defined as size_t { debug_assert_eq!( cmsg.cmsg_len as usize, libc::CMSG_LEN(mem::size_of::() as _) as usize ); } ptr::read(libc::CMSG_DATA(cmsg) as *const T) } pub(crate) struct Iter<'a> { hdr: &'a libc::msghdr, cmsg: Option<&'a libc::cmsghdr>, } impl<'a> Iter<'a> { /// # Safety /// /// `hdr.msg_control` must point to memory outliving `'a` which can be soundly read for the /// lifetime of the constructed `Iter` and contains a buffer of cmsgs, i.e. is aligned for /// `cmsghdr`, is fully initialized, and has correct internal links. pub(crate) unsafe fn new(hdr: &'a libc::msghdr) -> Self { Self { hdr, cmsg: libc::CMSG_FIRSTHDR(hdr).as_ref(), } } } impl<'a> Iterator for Iter<'a> { type Item = &'a libc::cmsghdr; fn next(&mut self) -> Option<&'a libc::cmsghdr> { let current = self.cmsg.take()?; self.cmsg = unsafe { libc::CMSG_NXTHDR(self.hdr, current).as_ref() }; Some(current) } } quinn-udp-0.4.1/src/fallback.rs000064400000000000000000000070761046102023000144540ustar 00000000000000use std::{ io::{self, IoSliceMut}, sync::Mutex, time::Instant, }; use proto::Transmit; use super::{log_sendmsg_error, RecvMeta, UdpSockRef, UdpState, IO_ERROR_LOG_INTERVAL}; /// Fallback UDP socket interface that stubs out all special functionality /// /// Used when a better implementation is not available for a particular target, at the cost of /// reduced performance compared to that enabled by some target-specific interfaces. #[derive(Debug)] pub struct UdpSocketState { last_send_error: Mutex, } impl UdpSocketState { pub fn new() -> Self { let now = Instant::now(); Self { last_send_error: Mutex::new(now.checked_sub(2 * IO_ERROR_LOG_INTERVAL).unwrap_or(now)), } } pub fn configure(socket: UdpSockRef<'_>) -> io::Result<()> { socket.0.set_nonblocking(true) } pub fn send( &self, socket: UdpSockRef<'_>, _state: &UdpState, transmits: &[Transmit], ) -> Result { let mut sent = 0; for transmit in transmits { match socket.0.send_to( &transmit.contents, &socket2::SockAddr::from(transmit.destination), ) { Ok(_) => { sent += 1; } // We need to report that some packets were sent in this case, so we rely on // errors being either harmlessly transient (in the case of WouldBlock) or // recurring on the next call. Err(_) if sent != 0 => return Ok(sent), Err(e) => { if e.kind() == io::ErrorKind::WouldBlock { return Err(e); } // Other errors are ignored, since they will ususally be handled // by higher level retransmits and timeouts. // - PermissionDenied errors have been observed due to iptable rules. // Those are not fatal errors, since the // configuration can be dynamically changed. // - Destination unreachable errors have been observed for other log_sendmsg_error(&self.last_send_error, e, transmit); sent += 1; } } } Ok(sent) } pub fn recv( &self, socket: UdpSockRef<'_>, bufs: &mut [IoSliceMut<'_>], meta: &mut [RecvMeta], ) -> io::Result { // Safety: both `IoSliceMut` and `MaybeUninitSlice` promise to have the // same layout, that of `iovec`/`WSABUF`. Furthermore `recv_vectored` // promises to not write unitialised bytes to the `bufs` and pass it // directly to the `recvmsg` system call, so this is safe. let bufs = unsafe { &mut *(bufs as *mut [IoSliceMut<'_>] as *mut [socket2::MaybeUninitSlice<'_>]) }; let (len, _flags, addr) = socket.0.recv_from_vectored(bufs)?; meta[0] = RecvMeta { len, stride: len, addr: addr.as_socket().unwrap(), ecn: None, dst_ip: None, }; Ok(1) } } impl Default for UdpSocketState { fn default() -> Self { Self::new() } } /// Returns the platforms UDP socket capabilities pub(crate) fn udp_state() -> super::UdpState { super::UdpState { max_gso_segments: std::sync::atomic::AtomicUsize::new(1), gro_segments: 1, } } #[inline] pub(crate) fn may_fragment() -> bool { true } pub(crate) const BATCH_SIZE: usize = 1; quinn-udp-0.4.1/src/lib.rs000064400000000000000000000140171046102023000134540ustar 00000000000000//! Uniform interface to send/recv UDP packets with ECN information. #![warn(unreachable_pub)] #![warn(clippy::use_self)] #[cfg(unix)] use std::os::unix::io::AsFd; #[cfg(windows)] use std::os::windows::io::AsSocket; #[cfg(not(windows))] use std::sync::atomic::AtomicBool; use std::{ net::{IpAddr, Ipv6Addr, SocketAddr}, sync::{ atomic::{AtomicUsize, Ordering}, Mutex, }, time::{Duration, Instant}, }; use bytes::Bytes; use tracing::warn; #[cfg(unix)] mod cmsg; #[cfg(unix)] #[path = "unix.rs"] mod imp; #[cfg(windows)] #[path = "windows.rs"] mod imp; // No ECN support #[cfg(not(any(unix, windows)))] #[path = "fallback.rs"] mod imp; pub use imp::UdpSocketState; /// Whether transmitted datagrams might get fragmented by the IP layer /// /// Returns `false` on targets which employ e.g. the `IPV6_DONTFRAG` socket option. #[inline] pub fn may_fragment() -> bool { imp::may_fragment() } /// Number of UDP packets to send/receive at a time pub const BATCH_SIZE: usize = imp::BATCH_SIZE; /// The capabilities a UDP socket suppports on a certain platform #[derive(Debug)] pub struct UdpState { max_gso_segments: AtomicUsize, gro_segments: usize, /// True if we have received EINVAL error from `sendmsg` or `sendmmsg` system call at least once. /// /// If enabled, we assume that old kernel is used and switch to fallback mode. /// In particular, we do not use IP_TOS cmsg_type in this case, /// which is not supported on Linux <3.13 and results in not sending the UDP packet at all. #[cfg(not(windows))] sendmsg_einval: AtomicBool, } impl UdpState { pub fn new() -> Self { imp::udp_state() } /// The maximum amount of segments which can be transmitted if a platform /// supports Generic Send Offload (GSO). /// /// This is 1 if the platform doesn't support GSO. Subject to change if errors are detected /// while using GSO. #[inline] pub fn max_gso_segments(&self) -> usize { self.max_gso_segments.load(Ordering::Relaxed) } /// The number of segments to read when GRO is enabled. Used as a factor to /// compute the receive buffer size. /// /// Returns 1 if the platform doesn't support GRO. #[inline] pub fn gro_segments(&self) -> usize { self.gro_segments } /// Returns true if we previously got an EINVAL error from `sendmsg` or `sendmmsg` syscall. #[inline] #[cfg(not(windows))] fn sendmsg_einval(&self) -> bool { self.sendmsg_einval.load(Ordering::Relaxed) } /// Sets the flag indicating we got EINVAL error from `sendmsg` or `sendmmsg` syscall. #[inline] #[cfg(all(unix, not(any(target_os = "macos", target_os = "ios"))))] fn set_sendmsg_einval(&self) { self.sendmsg_einval.store(true, Ordering::Relaxed) } } impl Default for UdpState { fn default() -> Self { Self::new() } } #[derive(Debug, Copy, Clone)] pub struct RecvMeta { pub addr: SocketAddr, pub len: usize, pub stride: usize, pub ecn: Option, /// The destination IP address which was encoded in this datagram pub dst_ip: Option, } impl Default for RecvMeta { /// Constructs a value with arbitrary fields, intended to be overwritten fn default() -> Self { Self { addr: SocketAddr::new(Ipv6Addr::UNSPECIFIED.into(), 0), len: 0, stride: 0, ecn: None, dst_ip: None, } } } /// An outgoing packet #[derive(Debug, Clone)] pub struct Transmit { /// The socket this datagram should be sent to pub destination: SocketAddr, /// Explicit congestion notification bits to set on the packet pub ecn: Option, /// Contents of the datagram pub contents: Bytes, /// The segment size if this transmission contains multiple datagrams. /// This is `None` if the transmit only contains a single datagram pub segment_size: Option, /// Optional source IP address for the datagram pub src_ip: Option, } /// Log at most 1 IO error per minute const IO_ERROR_LOG_INTERVAL: Duration = std::time::Duration::from_secs(60); /// Logs a warning message when sendmsg fails /// /// Logging will only be performed if at least [`IO_ERROR_LOG_INTERVAL`] /// has elapsed since the last error was logged. fn log_sendmsg_error( last_send_error: &Mutex, err: impl core::fmt::Debug, transmit: &Transmit, ) { let now = Instant::now(); let last_send_error = &mut *last_send_error.lock().expect("poisend lock"); if now.saturating_duration_since(*last_send_error) > IO_ERROR_LOG_INTERVAL { *last_send_error = now; warn!( "sendmsg error: {:?}, Transmit: {{ destination: {:?}, src_ip: {:?}, enc: {:?}, len: {:?}, segment_size: {:?} }}", err, transmit.destination, transmit.src_ip, transmit.ecn, transmit.contents.len(), transmit.segment_size); } } /// A borrowed UDP socket /// /// On Unix, constructible via `From`. On Windows, constructible via `From`. // Wrapper around socket2 to avoid making it a public dependency and incurring stability risk pub struct UdpSockRef<'a>(socket2::SockRef<'a>); #[cfg(unix)] impl<'s, S> From<&'s S> for UdpSockRef<'s> where S: AsFd, { fn from(socket: &'s S) -> Self { Self(socket.into()) } } #[cfg(windows)] impl<'s, S> From<&'s S> for UdpSockRef<'s> where S: AsSocket, { fn from(socket: &'s S) -> Self { Self(socket.into()) } } /// Explicit congestion notification codepoint #[repr(u8)] #[derive(Debug, Copy, Clone, Eq, PartialEq)] pub enum EcnCodepoint { #[doc(hidden)] Ect0 = 0b10, #[doc(hidden)] Ect1 = 0b01, #[doc(hidden)] Ce = 0b11, } impl EcnCodepoint { /// Create new object from the given bits pub fn from_bits(x: u8) -> Option { use self::EcnCodepoint::*; Some(match x & 0b11 { 0b10 => Ect0, 0b01 => Ect1, 0b11 => Ce, _ => { return None; } }) } } quinn-udp-0.4.1/src/unix.rs000064400000000000000000000657671046102023000137130ustar 00000000000000#[cfg(not(any(target_os = "macos", target_os = "ios")))] use std::ptr; use std::{ io, io::IoSliceMut, mem::{self, MaybeUninit}, net::{IpAddr, Ipv4Addr, Ipv6Addr, SocketAddr, SocketAddrV4, SocketAddrV6}, os::unix::io::AsRawFd, sync::{ atomic::{AtomicBool, AtomicUsize}, Mutex, }, time::Instant, }; use socket2::SockRef; use super::{ cmsg, log_sendmsg_error, EcnCodepoint, RecvMeta, Transmit, UdpSockRef, UdpState, IO_ERROR_LOG_INTERVAL, }; #[cfg(target_os = "freebsd")] type IpTosTy = libc::c_uchar; #[cfg(not(target_os = "freebsd"))] type IpTosTy = libc::c_int; /// Tokio-compatible UDP socket with some useful specializations. /// /// Unlike a standard tokio UDP socket, this allows ECN bits to be read and written on some /// platforms. #[derive(Debug)] pub struct UdpSocketState { last_send_error: Mutex, } impl UdpSocketState { pub fn new() -> Self { let now = Instant::now(); Self { last_send_error: Mutex::new(now.checked_sub(2 * IO_ERROR_LOG_INTERVAL).unwrap_or(now)), } } pub fn configure(sock: UdpSockRef<'_>) -> io::Result<()> { init(sock.0) } pub fn send( &self, socket: UdpSockRef<'_>, state: &UdpState, transmits: &[Transmit], ) -> Result { send(state, socket.0, &self.last_send_error, transmits) } pub fn recv( &self, socket: UdpSockRef<'_>, bufs: &mut [IoSliceMut<'_>], meta: &mut [RecvMeta], ) -> io::Result { recv(socket.0, bufs, meta) } } impl Default for UdpSocketState { fn default() -> Self { Self::new() } } fn init(io: SockRef<'_>) -> io::Result<()> { let mut cmsg_platform_space = 0; if cfg!(target_os = "linux") || cfg!(target_os = "freebsd") || cfg!(target_os = "macos") { cmsg_platform_space += unsafe { libc::CMSG_SPACE(mem::size_of::() as _) as usize }; } assert!( CMSG_LEN >= unsafe { libc::CMSG_SPACE(mem::size_of::() as _) as usize } + cmsg_platform_space ); assert!( mem::align_of::() <= mem::align_of::>(), "control message buffers will be misaligned" ); io.set_nonblocking(true)?; let addr = io.local_addr()?; let is_ipv4 = addr.family() == libc::AF_INET as libc::sa_family_t; // mac and ios do not support IP_RECVTOS on dual-stack sockets :( // older macos versions also don't have the flag and will error out if we don't ignore it if is_ipv4 || !io.only_v6()? { if let Err(err) = set_socket_option(&*io, libc::IPPROTO_IP, libc::IP_RECVTOS, OPTION_ON) { tracing::debug!("Ignoring error setting IP_RECVTOS on socket: {err:?}",); } } #[cfg(target_os = "linux")] { // opportunistically try to enable GRO. See gro::gro_segments(). let _ = set_socket_option(&*io, libc::SOL_UDP, libc::UDP_GRO, OPTION_ON); // Forbid IPv4 fragmentation. Set even for IPv6 to account for IPv6 mapped IPv4 addresses. set_socket_option( &*io, libc::IPPROTO_IP, libc::IP_MTU_DISCOVER, libc::IP_PMTUDISC_PROBE, )?; if is_ipv4 { set_socket_option(&*io, libc::IPPROTO_IP, libc::IP_PKTINFO, OPTION_ON)?; } else { set_socket_option( &*io, libc::IPPROTO_IPV6, libc::IPV6_MTU_DISCOVER, libc::IP_PMTUDISC_PROBE, )?; } } #[cfg(any(target_os = "freebsd", target_os = "macos", target_os = "ios"))] { if is_ipv4 { set_socket_option(&*io, libc::IPPROTO_IP, libc::IP_DONTFRAG, OPTION_ON)?; } } #[cfg(any(target_os = "freebsd", target_os = "macos"))] // IP_RECVDSTADDR == IP_SENDSRCADDR on FreeBSD // macOS uses only IP_RECVDSTADDR, no IP_SENDSRCADDR on macOS // macOS also supports IP_PKTINFO { if is_ipv4 { set_socket_option(&*io, libc::IPPROTO_IP, libc::IP_RECVDSTADDR, OPTION_ON)?; } } // Options standardized in RFC 3542 if !is_ipv4 { set_socket_option(&*io, libc::IPPROTO_IPV6, libc::IPV6_RECVPKTINFO, OPTION_ON)?; set_socket_option(&*io, libc::IPPROTO_IPV6, libc::IPV6_RECVTCLASS, OPTION_ON)?; // Linux's IP_PMTUDISC_PROBE allows us to operate under interface MTU rather than the // kernel's path MTU guess, but actually disabling fragmentation requires this too. See // __ip6_append_data in ip6_output.c. set_socket_option(&*io, libc::IPPROTO_IPV6, libc::IPV6_DONTFRAG, OPTION_ON)?; } Ok(()) } #[cfg(not(any(target_os = "macos", target_os = "ios")))] fn send( #[allow(unused_variables)] // only used on Linux state: &UdpState, io: SockRef<'_>, last_send_error: &Mutex, transmits: &[Transmit], ) -> io::Result { #[allow(unused_mut)] // only mutable on FreeBSD let mut encode_src_ip = true; #[cfg(target_os = "freebsd")] { let addr = io.local_addr()?; let is_ipv4 = addr.family() == libc::AF_INET as libc::sa_family_t; if is_ipv4 { if let Some(socket) = addr.as_socket_ipv4() { encode_src_ip = socket.ip() == &Ipv4Addr::UNSPECIFIED; } } } let mut msgs: [libc::mmsghdr; BATCH_SIZE] = unsafe { mem::zeroed() }; let mut iovecs: [libc::iovec; BATCH_SIZE] = unsafe { mem::zeroed() }; let mut cmsgs = [cmsg::Aligned([0u8; CMSG_LEN]); BATCH_SIZE]; // This assume_init looks a bit weird because one might think it // assumes the SockAddr data to be initialized, but that call // refers to the whole array, which itself is made up of MaybeUninit // containers. Their presence protects the SockAddr inside from // being assumed as initialized by the assume_init call. // TODO: Replace this with uninit_array once it becomes MSRV-stable let mut addrs: [MaybeUninit; BATCH_SIZE] = unsafe { MaybeUninit::uninit().assume_init() }; for (i, transmit) in transmits.iter().enumerate().take(BATCH_SIZE) { let dst_addr = unsafe { ptr::write( addrs[i].as_mut_ptr(), socket2::SockAddr::from(transmit.destination), ); &*addrs[i].as_ptr() }; prepare_msg( transmit, dst_addr, &mut msgs[i].msg_hdr, &mut iovecs[i], &mut cmsgs[i], encode_src_ip, state.sendmsg_einval(), ); } let num_transmits = transmits.len().min(BATCH_SIZE); loop { let n = unsafe { sendmmsg_with_fallback(io.as_raw_fd(), msgs.as_mut_ptr(), num_transmits as _) }; if n == -1 { let e = io::Error::last_os_error(); match e.kind() { io::ErrorKind::Interrupted => { // Retry the transmission continue; } io::ErrorKind::WouldBlock => return Err(e), _ => { // Some network adapters and drivers do not support GSO. Unfortunately, Linux // offers no easy way for us to detect this short of an EIO or sometimes EINVAL // when we try to actually send datagrams using it. #[cfg(target_os = "linux")] if let Some(libc::EIO) | Some(libc::EINVAL) = e.raw_os_error() { // Prevent new transmits from being scheduled using GSO. Existing GSO transmits // may already be in the pipeline, so we need to tolerate additional failures. if state.max_gso_segments() > 1 { tracing::error!("got transmit error, halting segmentation offload"); state .max_gso_segments .store(1, std::sync::atomic::Ordering::Relaxed); } } if e.raw_os_error() == Some(libc::EINVAL) { // Some arguments to `sendmsg` are not supported. // Switch to fallback mode. state.set_sendmsg_einval(); } // Other errors are ignored, since they will ususally be handled // by higher level retransmits and timeouts. // - PermissionDenied errors have been observed due to iptable rules. // Those are not fatal errors, since the // configuration can be dynamically changed. // - Destination unreachable errors have been observed for other log_sendmsg_error(last_send_error, e, &transmits[0]); // The ERRORS section in https://man7.org/linux/man-pages/man2/sendmmsg.2.html // describes that errors will only be returned if no message could be transmitted // at all. Therefore drop the first (problematic) message, // and retry the remaining ones. return Ok(num_transmits.min(1)); } } } return Ok(n as usize); } } #[cfg(any(target_os = "macos", target_os = "ios"))] fn send( state: &UdpState, io: SockRef<'_>, last_send_error: &Mutex, transmits: &[Transmit], ) -> io::Result { let mut hdr: libc::msghdr = unsafe { mem::zeroed() }; let mut iov: libc::iovec = unsafe { mem::zeroed() }; let mut ctrl = cmsg::Aligned([0u8; CMSG_LEN]); let mut sent = 0; while sent < transmits.len() { let addr = socket2::SockAddr::from(transmits[sent].destination); prepare_msg( &transmits[sent], &addr, &mut hdr, &mut iov, &mut ctrl, // Only tested on macOS cfg!(target_os = "macos"), state.sendmsg_einval(), ); let n = unsafe { libc::sendmsg(io.as_raw_fd(), &hdr, 0) }; if n == -1 { let e = io::Error::last_os_error(); match e.kind() { io::ErrorKind::Interrupted => { // Retry the transmission } io::ErrorKind::WouldBlock if sent != 0 => return Ok(sent), io::ErrorKind::WouldBlock => return Err(e), _ => { // Other errors are ignored, since they will ususally be handled // by higher level retransmits and timeouts. // - PermissionDenied errors have been observed due to iptable rules. // Those are not fatal errors, since the // configuration can be dynamically changed. // - Destination unreachable errors have been observed for other log_sendmsg_error(last_send_error, e, &transmits[sent]); sent += 1; } } } else { sent += 1; } } Ok(sent) } /// Implementation of `sendmmsg` with a fallback /// to `sendmsg` if syscall is not available. /// /// It uses [`libc::syscall`] instead of [`libc::sendmmsg`] /// to avoid linking error on systems where libc does not contain `sendmmsg`. #[cfg(not(any(target_os = "macos", target_os = "ios")))] unsafe fn sendmmsg_with_fallback( sockfd: libc::c_int, msgvec: *mut libc::mmsghdr, vlen: libc::c_uint, ) -> libc::c_int { let flags = 0; #[cfg(not(target_os = "freebsd"))] { let ret = libc::syscall(libc::SYS_sendmmsg, sockfd, msgvec, vlen, flags) as libc::c_int; if ret != -1 { return ret; } } // libc on FreeBSD implements `sendmmsg` as a high-level abstraction over `sendmsg`, // thus `SYS_sendmmsg` constant and direct system call do not exist #[cfg(target_os = "freebsd")] { let ret = libc::sendmmsg(sockfd, msgvec, vlen as usize, flags) as libc::c_int; if ret != -1 { return ret; } } let e = io::Error::last_os_error(); match e.raw_os_error() { Some(libc::ENOSYS) => { // Fallback to `sendmsg`. sendmmsg_fallback(sockfd, msgvec, vlen) } _ => -1, } } /// Fallback implementation of `sendmmsg` using `sendmsg` /// for systems which do not support `sendmmsg` /// such as Linux <3.0. #[cfg(not(any(target_os = "macos", target_os = "ios")))] unsafe fn sendmmsg_fallback( sockfd: libc::c_int, msgvec: *mut libc::mmsghdr, vlen: libc::c_uint, ) -> libc::c_int { let flags = 0; if vlen == 0 { return 0; } let n = libc::sendmsg(sockfd, &(*msgvec).msg_hdr, flags); if n == -1 { -1 } else { // type of `msg_len` field differs on Linux and FreeBSD, // it is up to the compiler to infer and cast `n` to correct type (*msgvec).msg_len = n as _; 1 } } #[cfg(not(any(target_os = "macos", target_os = "ios")))] fn recv(io: SockRef<'_>, bufs: &mut [IoSliceMut<'_>], meta: &mut [RecvMeta]) -> io::Result { let mut names = [MaybeUninit::::uninit(); BATCH_SIZE]; let mut ctrls = [cmsg::Aligned(MaybeUninit::<[u8; CMSG_LEN]>::uninit()); BATCH_SIZE]; let mut hdrs = unsafe { mem::zeroed::<[libc::mmsghdr; BATCH_SIZE]>() }; let max_msg_count = bufs.len().min(BATCH_SIZE); for i in 0..max_msg_count { prepare_recv( &mut bufs[i], &mut names[i], &mut ctrls[i], &mut hdrs[i].msg_hdr, ); } let msg_count = loop { let n = unsafe { recvmmsg_with_fallback( io.as_raw_fd(), hdrs.as_mut_ptr(), bufs.len().min(BATCH_SIZE) as _, ) }; if n == -1 { let e = io::Error::last_os_error(); if e.kind() == io::ErrorKind::Interrupted { continue; } return Err(e); } break n; }; for i in 0..(msg_count as usize) { meta[i] = decode_recv(&names[i], &hdrs[i].msg_hdr, hdrs[i].msg_len as usize); } Ok(msg_count as usize) } #[cfg(any(target_os = "macos", target_os = "ios"))] fn recv(io: SockRef<'_>, bufs: &mut [IoSliceMut<'_>], meta: &mut [RecvMeta]) -> io::Result { let mut name = MaybeUninit::::uninit(); let mut ctrl = cmsg::Aligned(MaybeUninit::<[u8; CMSG_LEN]>::uninit()); let mut hdr = unsafe { mem::zeroed::() }; prepare_recv(&mut bufs[0], &mut name, &mut ctrl, &mut hdr); let n = loop { let n = unsafe { libc::recvmsg(io.as_raw_fd(), &mut hdr, 0) }; if n == -1 { let e = io::Error::last_os_error(); if e.kind() == io::ErrorKind::Interrupted { continue; } return Err(e); } if hdr.msg_flags & libc::MSG_TRUNC != 0 { continue; } break n; }; meta[0] = decode_recv(&name, &hdr, n as usize); Ok(1) } /// Implementation of `recvmmsg` with a fallback /// to `recvmsg` if syscall is not available. /// /// It uses [`libc::syscall`] instead of [`libc::recvmmsg`] /// to avoid linking error on systems where libc does not contain `recvmmsg`. #[cfg(not(any(target_os = "macos", target_os = "ios")))] unsafe fn recvmmsg_with_fallback( sockfd: libc::c_int, msgvec: *mut libc::mmsghdr, vlen: libc::c_uint, ) -> libc::c_int { let flags = 0; let timeout = ptr::null_mut::(); #[cfg(not(target_os = "freebsd"))] { let ret = libc::syscall(libc::SYS_recvmmsg, sockfd, msgvec, vlen, flags, timeout) as libc::c_int; if ret != -1 { return ret; } } // libc on FreeBSD implements `recvmmsg` as a high-level abstraction over `recvmsg`, // thus `SYS_recvmmsg` constant and direct system call do not exist #[cfg(target_os = "freebsd")] { let ret = libc::recvmmsg(sockfd, msgvec, vlen as usize, flags, timeout) as libc::c_int; if ret != -1 { return ret; } } let e = io::Error::last_os_error(); match e.raw_os_error() { Some(libc::ENOSYS) => { // Fallback to `recvmsg`. recvmmsg_fallback(sockfd, msgvec, vlen) } _ => -1, } } /// Fallback implementation of `recvmmsg` using `recvmsg` /// for systems which do not support `recvmmsg` /// such as Linux <2.6.33. #[cfg(not(any(target_os = "macos", target_os = "ios")))] unsafe fn recvmmsg_fallback( sockfd: libc::c_int, msgvec: *mut libc::mmsghdr, vlen: libc::c_uint, ) -> libc::c_int { let flags = 0; if vlen == 0 { return 0; } let n = libc::recvmsg(sockfd, &mut (*msgvec).msg_hdr, flags); if n == -1 { -1 } else { // type of `msg_len` field differs on Linux and FreeBSD, // it is up to the compiler to infer and cast `n` to correct type (*msgvec).msg_len = n as _; 1 } } /// Returns the platforms UDP socket capabilities pub(crate) fn udp_state() -> UdpState { UdpState { max_gso_segments: AtomicUsize::new(gso::max_gso_segments()), gro_segments: gro::gro_segments(), sendmsg_einval: AtomicBool::new(false), } } const CMSG_LEN: usize = 88; fn prepare_msg( transmit: &Transmit, dst_addr: &socket2::SockAddr, hdr: &mut libc::msghdr, iov: &mut libc::iovec, ctrl: &mut cmsg::Aligned<[u8; CMSG_LEN]>, #[allow(unused_variables)] // only used on FreeBSD & macOS encode_src_ip: bool, sendmsg_einval: bool, ) { iov.iov_base = transmit.contents.as_ptr() as *const _ as *mut _; iov.iov_len = transmit.contents.len(); // SAFETY: Casting the pointer to a mutable one is legal, // as sendmsg is guaranteed to not alter the mutable pointer // as per the POSIX spec. See the section on the sys/socket.h // header for details. The type is only mutable in the first // place because it is reused by recvmsg as well. let name = dst_addr.as_ptr() as *mut libc::c_void; let namelen = dst_addr.len(); hdr.msg_name = name as *mut _; hdr.msg_namelen = namelen; hdr.msg_iov = iov; hdr.msg_iovlen = 1; hdr.msg_control = ctrl.0.as_mut_ptr() as _; hdr.msg_controllen = CMSG_LEN as _; let mut encoder = unsafe { cmsg::Encoder::new(hdr) }; let ecn = transmit.ecn.map_or(0, |x| x as libc::c_int); if transmit.destination.is_ipv4() { if !sendmsg_einval { encoder.push(libc::IPPROTO_IP, libc::IP_TOS, ecn as IpTosTy); } } else { encoder.push(libc::IPPROTO_IPV6, libc::IPV6_TCLASS, ecn); } if let Some(segment_size) = transmit.segment_size { gso::set_segment_size(&mut encoder, segment_size as u16); } if let Some(ip) = &transmit.src_ip { match ip { IpAddr::V4(v4) => { #[cfg(target_os = "linux")] { let pktinfo = libc::in_pktinfo { ipi_ifindex: 0, ipi_spec_dst: libc::in_addr { s_addr: u32::from_ne_bytes(v4.octets()), }, ipi_addr: libc::in_addr { s_addr: 0 }, }; encoder.push(libc::IPPROTO_IP, libc::IP_PKTINFO, pktinfo); } #[cfg(any(target_os = "freebsd", target_os = "macos"))] { if encode_src_ip { let addr = libc::in_addr { s_addr: u32::from_ne_bytes(v4.octets()), }; encoder.push(libc::IPPROTO_IP, libc::IP_RECVDSTADDR, addr); } } } IpAddr::V6(v6) => { let pktinfo = libc::in6_pktinfo { ipi6_ifindex: 0, ipi6_addr: libc::in6_addr { s6_addr: v6.octets(), }, }; encoder.push(libc::IPPROTO_IPV6, libc::IPV6_PKTINFO, pktinfo); } } } encoder.finish(); } fn prepare_recv( buf: &mut IoSliceMut, name: &mut MaybeUninit, ctrl: &mut cmsg::Aligned>, hdr: &mut libc::msghdr, ) { hdr.msg_name = name.as_mut_ptr() as _; hdr.msg_namelen = mem::size_of::() as _; hdr.msg_iov = buf as *mut IoSliceMut as *mut libc::iovec; hdr.msg_iovlen = 1; hdr.msg_control = ctrl.0.as_mut_ptr() as _; hdr.msg_controllen = CMSG_LEN as _; hdr.msg_flags = 0; } fn decode_recv( name: &MaybeUninit, hdr: &libc::msghdr, len: usize, ) -> RecvMeta { let name = unsafe { name.assume_init() }; let mut ecn_bits = 0; let mut dst_ip = None; #[allow(unused_mut)] // only mutable on Linux let mut stride = len; let cmsg_iter = unsafe { cmsg::Iter::new(hdr) }; for cmsg in cmsg_iter { match (cmsg.cmsg_level, cmsg.cmsg_type) { // FreeBSD uses IP_RECVTOS here, and we can be liberal because cmsgs are opt-in. (libc::IPPROTO_IP, libc::IP_TOS) | (libc::IPPROTO_IP, libc::IP_RECVTOS) => unsafe { ecn_bits = cmsg::decode::(cmsg); }, (libc::IPPROTO_IPV6, libc::IPV6_TCLASS) => unsafe { // Temporary hack around broken macos ABI. Remove once upstream fixes it. // https://bugreport.apple.com/web/?problemID=48761855 #[allow(clippy::unnecessary_cast)] // cmsg.cmsg_len defined as size_t if cfg!(target_os = "macos") && cmsg.cmsg_len as usize == libc::CMSG_LEN(mem::size_of::() as _) as usize { ecn_bits = cmsg::decode::(cmsg); } else { ecn_bits = cmsg::decode::(cmsg) as u8; } }, #[cfg(target_os = "linux")] (libc::IPPROTO_IP, libc::IP_PKTINFO) => { let pktinfo = unsafe { cmsg::decode::(cmsg) }; dst_ip = Some(IpAddr::V4(Ipv4Addr::from( pktinfo.ipi_addr.s_addr.to_ne_bytes(), ))); } #[cfg(any(target_os = "freebsd", target_os = "macos"))] (libc::IPPROTO_IP, libc::IP_RECVDSTADDR) => { let in_addr = unsafe { cmsg::decode::(cmsg) }; dst_ip = Some(IpAddr::V4(Ipv4Addr::from(in_addr.s_addr.to_ne_bytes()))); } (libc::IPPROTO_IPV6, libc::IPV6_PKTINFO) => { let pktinfo = unsafe { cmsg::decode::(cmsg) }; dst_ip = Some(IpAddr::V6(Ipv6Addr::from(pktinfo.ipi6_addr.s6_addr))); } #[cfg(target_os = "linux")] (libc::SOL_UDP, libc::UDP_GRO) => unsafe { stride = cmsg::decode::(cmsg) as usize; }, _ => {} } } let addr = match libc::c_int::from(name.ss_family) { libc::AF_INET => { // Safety: if the ss_family field is AF_INET then storage must be a sockaddr_in. let addr: &libc::sockaddr_in = unsafe { &*(&name as *const _ as *const libc::sockaddr_in) }; SocketAddr::V4(SocketAddrV4::new( Ipv4Addr::from(addr.sin_addr.s_addr.to_ne_bytes()), u16::from_be(addr.sin_port), )) } libc::AF_INET6 => { // Safety: if the ss_family field is AF_INET6 then storage must be a sockaddr_in6. let addr: &libc::sockaddr_in6 = unsafe { &*(&name as *const _ as *const libc::sockaddr_in6) }; SocketAddr::V6(SocketAddrV6::new( Ipv6Addr::from(addr.sin6_addr.s6_addr), u16::from_be(addr.sin6_port), addr.sin6_flowinfo, addr.sin6_scope_id, )) } _ => unreachable!(), }; RecvMeta { len, stride, addr, ecn: EcnCodepoint::from_bits(ecn_bits), dst_ip, } } #[cfg(not(any(target_os = "macos", target_os = "ios")))] // Chosen somewhat arbitrarily; might benefit from additional tuning. pub(crate) const BATCH_SIZE: usize = 32; #[cfg(any(target_os = "macos", target_os = "ios"))] pub(crate) const BATCH_SIZE: usize = 1; #[inline] pub(crate) fn may_fragment() -> bool { false } #[cfg(target_os = "linux")] mod gso { use super::*; /// Checks whether GSO support is available by setting the UDP_SEGMENT /// option on a socket pub(crate) fn max_gso_segments() -> usize { const GSO_SIZE: libc::c_int = 1500; let socket = match std::net::UdpSocket::bind("[::]:0") .or_else(|_| std::net::UdpSocket::bind("127.0.0.1:0")) { Ok(socket) => socket, Err(_) => return 1, }; // As defined in linux/udp.h // #define UDP_MAX_SEGMENTS (1 << 6UL) match set_socket_option(&socket, libc::SOL_UDP, libc::UDP_SEGMENT, GSO_SIZE) { Ok(()) => 64, Err(_) => 1, } } pub(crate) fn set_segment_size(encoder: &mut cmsg::Encoder, segment_size: u16) { encoder.push(libc::SOL_UDP, libc::UDP_SEGMENT, segment_size); } } #[cfg(not(target_os = "linux"))] mod gso { use super::*; pub(super) fn max_gso_segments() -> usize { 1 } pub(super) fn set_segment_size(_encoder: &mut cmsg::Encoder, _segment_size: u16) { panic!("Setting a segment size is not supported on current platform"); } } #[cfg(target_os = "linux")] mod gro { use super::*; pub(crate) fn gro_segments() -> usize { let socket = match std::net::UdpSocket::bind("[::]:0") .or_else(|_| std::net::UdpSocket::bind("127.0.0.1:0")) { Ok(socket) => socket, Err(_) => return 1, }; // As defined in net/ipv4/udp_offload.c // #define UDP_GRO_CNT_MAX 64 // // NOTE: this MUST be set to UDP_GRO_CNT_MAX to ensure that the receive buffer size // (get_max_udp_payload_size() * gro_segments()) is large enough to hold the largest GRO // list the kernel might potentially produce. See // https://github.com/quinn-rs/quinn/pull/1354. match set_socket_option(&socket, libc::SOL_UDP, libc::UDP_GRO, OPTION_ON) { Ok(()) => 64, Err(_) => 1, } } } fn set_socket_option( socket: &impl AsRawFd, level: libc::c_int, name: libc::c_int, value: libc::c_int, ) -> Result<(), io::Error> { let rc = unsafe { libc::setsockopt( socket.as_raw_fd(), level, name, &value as *const _ as _, mem::size_of_val(&value) as _, ) }; match rc == 0 { true => Ok(()), false => Err(io::Error::last_os_error()), } } const OPTION_ON: libc::c_int = 1; #[cfg(not(target_os = "linux"))] mod gro { pub(super) fn gro_segments() -> usize { 1 } } quinn-udp-0.4.1/src/windows.rs000064400000000000000000000114351046102023000144010ustar 00000000000000use std::{ io::{self, IoSliceMut}, mem, os::windows::io::AsRawSocket, sync::Mutex, time::Instant, }; use windows_sys::Win32::Networking::WinSock; use super::{log_sendmsg_error, RecvMeta, Transmit, UdpSockRef, UdpState, IO_ERROR_LOG_INTERVAL}; /// QUIC-friendly UDP interface for Windows #[derive(Debug)] pub struct UdpSocketState { last_send_error: Mutex, } impl UdpSocketState { pub fn new() -> Self { let now = Instant::now(); Self { last_send_error: Mutex::new(now.checked_sub(2 * IO_ERROR_LOG_INTERVAL).unwrap_or(now)), } } pub fn configure(socket: UdpSockRef<'_>) -> io::Result<()> { socket.0.set_nonblocking(true)?; let addr = socket.0.local_addr()?; let is_ipv6 = addr.as_socket_ipv6().is_some(); let v6only = unsafe { let mut result: u32 = 0; let mut len = mem::size_of_val(&result) as i32; let rc = WinSock::getsockopt( socket.0.as_raw_socket() as _, WinSock::IPPROTO_IPV6, WinSock::IPV6_V6ONLY as _, &mut result as *mut _ as _, &mut len, ); if rc == -1 { return Err(io::Error::last_os_error()); } result != 0 }; let is_ipv4 = addr.as_socket_ipv4().is_some() || !v6only; let sock_true: u32 = 1; if is_ipv4 { let rc = unsafe { WinSock::setsockopt( socket.0.as_raw_socket() as _, WinSock::IPPROTO_IP as _, WinSock::IP_DONTFRAGMENT as _, &sock_true as *const _ as _, mem::size_of_val(&sock_true) as _, ) }; if rc == -1 { return Err(io::Error::last_os_error()); } } if is_ipv6 { let rc = unsafe { WinSock::setsockopt( socket.0.as_raw_socket() as _, WinSock::IPPROTO_IPV6 as _, WinSock::IPV6_DONTFRAG as _, &sock_true as *const _ as _, mem::size_of_val(&sock_true) as _, ) }; if rc == -1 { return Err(io::Error::last_os_error()); } } Ok(()) } pub fn send( &self, socket: UdpSockRef<'_>, _state: &UdpState, transmits: &[Transmit], ) -> Result { let mut sent = 0; for transmit in transmits { match socket.0.send_to( &transmit.contents, &socket2::SockAddr::from(transmit.destination), ) { Ok(_) => { sent += 1; } // We need to report that some packets were sent in this case, so we rely on // errors being either harmlessly transient (in the case of WouldBlock) or // recurring on the next call. Err(_) if sent != 0 => return Ok(sent), Err(e) => { if e.kind() == io::ErrorKind::WouldBlock { return Err(e); } // Other errors are ignored, since they will ususally be handled // by higher level retransmits and timeouts. log_sendmsg_error(&self.last_send_error, e, transmit); sent += 1; } } } Ok(sent) } pub fn recv( &self, socket: UdpSockRef<'_>, bufs: &mut [IoSliceMut<'_>], meta: &mut [RecvMeta], ) -> io::Result { // Safety: both `IoSliceMut` and `MaybeUninitSlice` promise to have the // same layout, that of `iovec`/`WSABUF`. Furthermore `recv_vectored` // promises to not write unitialised bytes to the `bufs` and pass it // directly to the `recvmsg` system call, so this is safe. let bufs = unsafe { &mut *(bufs as *mut [IoSliceMut<'_>] as *mut [socket2::MaybeUninitSlice<'_>]) }; let (len, _flags, addr) = socket.0.recv_from_vectored(bufs)?; meta[0] = RecvMeta { len, stride: len, addr: addr.as_socket().unwrap(), ecn: None, dst_ip: None, }; Ok(1) } } impl Default for UdpSocketState { fn default() -> Self { Self::new() } } /// Returns the platforms UDP socket capabilities pub(crate) fn udp_state() -> super::UdpState { super::UdpState { max_gso_segments: std::sync::atomic::AtomicUsize::new(1), gro_segments: 1, } } pub(crate) const BATCH_SIZE: usize = 1; #[inline] pub(crate) fn may_fragment() -> bool { false }