quinn-udp-0.5.9/.cargo_vcs_info.json0000644000000001470000000000100127710ustar { "git": { "sha1": "6ee883a20cb02968ae627e2ca9396f570d815e86" }, "path_in_vcs": "quinn-udp" }quinn-udp-0.5.9/Cargo.toml0000644000000037420000000000100107730ustar # THIS FILE IS AUTOMATICALLY GENERATED BY CARGO # # When uploading crates to the registry Cargo will automatically # "normalize" Cargo.toml files for maximal compatibility # with all versions of Cargo and also rewrite `path` dependencies # to registry (e.g., crates.io) dependencies. # # If you are reading this file be aware that the original Cargo.toml # will likely look very different (and much more reasonable). # See Cargo.toml.orig for the original contents. [package] edition = "2021" rust-version = "1.71" name = "quinn-udp" version = "0.5.9" build = "build.rs" autolib = false autobins = false autoexamples = false autotests = false autobenches = false description = "UDP sockets with ECN information for the QUIC transport protocol" readme = false keywords = ["quic"] categories = [ "network-programming", "asynchronous", ] license = "MIT OR Apache-2.0" repository = "https://github.com/quinn-rs/quinn" [package.metadata.docs.rs] all-features = true [lib] name = "quinn_udp" path = "src/lib.rs" bench = false [[test]] name = "tests" path = "tests/tests.rs" [[bench]] name = "throughput" path = "benches/throughput.rs" harness = false [dependencies.libc] version = "0.2.158" [dependencies.log] version = "0.4" optional = true [dependencies.socket2] version = "0.5" [dependencies.tracing] version = "0.1.10" features = ["std"] optional = true default-features = false [dev-dependencies.criterion] version = "0.5" features = ["async_tokio"] default-features = false [dev-dependencies.tokio] version = "1.28.1" features = [ "sync", "rt", "rt-multi-thread", "net", ] [build-dependencies.cfg_aliases] version = "0.2" [features] default = [ "tracing", "log", ] direct-log = ["dep:log"] fast-apple-datapath = [] log = ["tracing/log"] [target."cfg(windows)".dependencies.once_cell] version = "1.19" [target."cfg(windows)".dependencies.windows-sys] version = ">=0.52, <=0.59" features = [ "Win32_Foundation", "Win32_System_IO", "Win32_Networking_WinSock", ] quinn-udp-0.5.9/Cargo.toml.orig000064400000000000000000000024531046102023000144520ustar 00000000000000[package] name = "quinn-udp" version = "0.5.9" edition.workspace = true rust-version.workspace = true license.workspace = true repository.workspace = true description = "UDP sockets with ECN information for the QUIC transport protocol" keywords.workspace = true categories.workspace = true workspace = ".." [features] default = ["tracing", "log"] # Configure `tracing` to log events via `log` if no `tracing` subscriber exists. log = ["tracing/log"] direct-log = ["dep:log"] # Use private Apple APIs to send multiple packets in a single syscall. fast-apple-datapath = [] [dependencies] libc = "0.2.158" log = { workspace = true, optional = true } socket2 = { workspace = true } tracing = { workspace = true, optional = true } [target.'cfg(windows)'.dependencies] once_cell = { workspace = true } windows-sys = { workspace = true } [dev-dependencies] criterion = { version = "0.5", default-features = false, features = ["async_tokio"] } tokio = { workspace = true, features = ["rt", "rt-multi-thread", "net"] } [build-dependencies] cfg_aliases = "0.2" [lib] # See https://github.com/bheisler/criterion.rs/blob/master/book/src/faq.md#cargo-bench-gives-unrecognized-option-errors-for-valid-command-line-options bench = false [[bench]] name = "throughput" harness = false [package.metadata.docs.rs] all-features = true quinn-udp-0.5.9/LICENSE-APACHE000064400000000000000000000261351046102023000135120ustar 00000000000000 Apache License Version 2.0, January 2004 http://www.apache.org/licenses/ TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION 1. Definitions. "License" shall mean the terms and conditions for use, reproduction, and distribution as defined by Sections 1 through 9 of this document. "Licensor" shall mean the copyright owner or entity authorized by the copyright owner that is granting the License. "Legal Entity" shall mean the union of the acting entity and all other entities that control, are controlled by, or are under common control with that entity. For the purposes of this definition, "control" means (i) the power, direct or indirect, to cause the direction or management of such entity, whether by contract or otherwise, or (ii) ownership of fifty percent (50%) or more of the outstanding shares, or (iii) beneficial ownership of such entity. "You" (or "Your") shall mean an individual or Legal Entity exercising permissions granted by this License. "Source" form shall mean the preferred form for making modifications, including but not limited to software source code, documentation source, and configuration files. "Object" form shall mean any form resulting from mechanical transformation or translation of a Source form, including but not limited to compiled object code, generated documentation, and conversions to other media types. "Work" shall mean the work of authorship, whether in Source or Object form, made available under the License, as indicated by a copyright notice that is included in or attached to the work (an example is provided in the Appendix below). "Derivative Works" shall mean any work, whether in Source or Object form, that is based on (or derived from) the Work and for which the editorial revisions, annotations, elaborations, or other modifications represent, as a whole, an original work of authorship. For the purposes of this License, Derivative Works shall not include works that remain separable from, or merely link (or bind by name) to the interfaces of, the Work and Derivative Works thereof. "Contribution" shall mean any work of authorship, including the original version of the Work and any modifications or additions to that Work or Derivative Works thereof, that is intentionally submitted to Licensor for inclusion in the Work by the copyright owner or by an individual or Legal Entity authorized to submit on behalf of the copyright owner. For the purposes of this definition, "submitted" means any form of electronic, verbal, or written communication sent to the Licensor or its representatives, including but not limited to communication on electronic mailing lists, source code control systems, and issue tracking systems that are managed by, or on behalf of, the Licensor for the purpose of discussing and improving the Work, but excluding communication that is conspicuously marked or otherwise designated in writing by the copyright owner as "Not a Contribution." "Contributor" shall mean Licensor and any individual or Legal Entity on behalf of whom a Contribution has been received by Licensor and subsequently incorporated within the Work. 2. Grant of Copyright License. Subject to the terms and conditions of this License, each Contributor hereby grants to You a perpetual, worldwide, non-exclusive, no-charge, royalty-free, irrevocable copyright license to reproduce, prepare Derivative Works of, publicly display, publicly perform, sublicense, and distribute the Work and such Derivative Works in Source or Object form. 3. Grant of Patent License. Subject to the terms and conditions of this License, each Contributor hereby grants to You a perpetual, worldwide, non-exclusive, no-charge, royalty-free, irrevocable (except as stated in this section) patent license to make, have made, use, offer to sell, sell, import, and otherwise transfer the Work, where such license applies only to those patent claims licensable by such Contributor that are necessarily infringed by their Contribution(s) alone or by combination of their Contribution(s) with the Work to which such Contribution(s) was submitted. If You institute patent litigation against any entity (including a cross-claim or counterclaim in a lawsuit) alleging that the Work or a Contribution incorporated within the Work constitutes direct or contributory patent infringement, then any patent licenses granted to You under this License for that Work shall terminate as of the date such litigation is filed. 4. Redistribution. You may reproduce and distribute copies of the Work or Derivative Works thereof in any medium, with or without modifications, and in Source or Object form, provided that You meet the following conditions: (a) You must give any other recipients of the Work or Derivative Works a copy of this License; and (b) You must cause any modified files to carry prominent notices stating that You changed the files; and (c) You must retain, in the Source form of any Derivative Works that You distribute, all copyright, patent, trademark, and attribution notices from the Source form of the Work, excluding those notices that do not pertain to any part of the Derivative Works; and (d) If the Work includes a "NOTICE" text file as part of its distribution, then any Derivative Works that You distribute must include a readable copy of the attribution notices contained within such NOTICE file, excluding those notices that do not pertain to any part of the Derivative Works, in at least one of the following places: within a NOTICE text file distributed as part of the Derivative Works; within the Source form or documentation, if provided along with the Derivative Works; or, within a display generated by the Derivative Works, if and wherever such third-party notices normally appear. The contents of the NOTICE file are for informational purposes only and do not modify the License. You may add Your own attribution notices within Derivative Works that You distribute, alongside or as an addendum to the NOTICE text from the Work, provided that such additional attribution notices cannot be construed as modifying the License. You may add Your own copyright statement to Your modifications and may provide additional or different license terms and conditions for use, reproduction, or distribution of Your modifications, or for any such Derivative Works as a whole, provided Your use, reproduction, and distribution of the Work otherwise complies with the conditions stated in this License. 5. Submission of Contributions. Unless You explicitly state otherwise, any Contribution intentionally submitted for inclusion in the Work by You to the Licensor shall be under the terms and conditions of this License, without any additional terms or conditions. Notwithstanding the above, nothing herein shall supersede or modify the terms of any separate license agreement you may have executed with Licensor regarding such Contributions. 6. Trademarks. This License does not grant permission to use the trade names, trademarks, service marks, or product names of the Licensor, except as required for reasonable and customary use in describing the origin of the Work and reproducing the content of the NOTICE file. 7. Disclaimer of Warranty. Unless required by applicable law or agreed to in writing, Licensor provides the Work (and each Contributor provides its Contributions) on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied, including, without limitation, any warranties or conditions of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A PARTICULAR PURPOSE. You are solely responsible for determining the appropriateness of using or redistributing the Work and assume any risks associated with Your exercise of permissions under this License. 8. Limitation of Liability. In no event and under no legal theory, whether in tort (including negligence), contract, or otherwise, unless required by applicable law (such as deliberate and grossly negligent acts) or agreed to in writing, shall any Contributor be liable to You for damages, including any direct, indirect, special, incidental, or consequential damages of any character arising as a result of this License or out of the use or inability to use the Work (including but not limited to damages for loss of goodwill, work stoppage, computer failure or malfunction, or any and all other commercial damages or losses), even if such Contributor has been advised of the possibility of such damages. 9. Accepting Warranty or Additional Liability. While redistributing the Work or Derivative Works thereof, You may choose to offer, and charge a fee for, acceptance of support, warranty, indemnity, or other liability obligations and/or rights consistent with this License. However, in accepting such obligations, You may act only on Your own behalf and on Your sole responsibility, not on behalf of any other Contributor, and only if You agree to indemnify, defend, and hold each Contributor harmless for any liability incurred by, or claims asserted against, such Contributor by reason of your accepting any such warranty or additional liability. END OF TERMS AND CONDITIONS APPENDIX: How to apply the Apache License to your work. To apply the Apache License to your work, attach the following boilerplate notice, with the fields enclosed by brackets "[]" replaced with your own identifying information. (Don't include the brackets!) The text should be enclosed in the appropriate comment syntax for the file format. We also recommend that a file or class name and description of purpose be included on the same "printed page" as the copyright notice for easier identification within third-party archives. Copyright [yyyy] [name of copyright owner] Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. You may obtain a copy of the License at http://www.apache.org/licenses/LICENSE-2.0 Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License. quinn-udp-0.5.9/LICENSE-MIT000064400000000000000000000020501046102023000132100ustar 00000000000000Copyright (c) 2018 The quinn Developers Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions: The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. quinn-udp-0.5.9/benches/throughput.rs000064400000000000000000000111321046102023000157430ustar 00000000000000use std::{ cmp::min, io::{ErrorKind, IoSliceMut}, net::{Ipv4Addr, Ipv6Addr, UdpSocket}, }; use criterion::{criterion_group, criterion_main, Criterion}; use tokio::{io::Interest, runtime::Runtime}; use quinn_udp::{RecvMeta, Transmit, UdpSocketState, BATCH_SIZE}; pub fn criterion_benchmark(c: &mut Criterion) { const TOTAL_BYTES: usize = 10 * 1024 * 1024; const SEGMENT_SIZE: usize = 1280; let rt = Runtime::new().unwrap(); let _guard = rt.enter(); let (send_state, send_socket) = new_socket(); let (recv_state, recv_socket) = new_socket(); let dst_addr = recv_socket.local_addr().unwrap(); let mut permutations = vec![]; for gso_enabled in [ false, #[cfg(any(target_os = "linux", target_os = "windows", apple))] true, ] { for gro_enabled in [false, true] { #[cfg(target_os = "windows")] if gso_enabled && !gro_enabled { // Windows requires receive buffer to fit entire datagram on GRO // enabled socket. // // OS error: "A message sent on a datagram socket was larger // than the internal message buffer or some other network limit, // or the buffer used to receive a datagram into was smaller // than the datagram itself." continue; } for recvmmsg_enabled in [false, true] { permutations.push((gso_enabled, gro_enabled, recvmmsg_enabled)); } } } for (gso_enabled, gro_enabled, recvmmsg_enabled) in permutations { let mut group = c.benchmark_group(format!( "gso_{}_gro_{}_recvmmsg_{}", gso_enabled, gro_enabled, recvmmsg_enabled )); group.throughput(criterion::Throughput::Bytes(TOTAL_BYTES as u64)); let gso_segments = if gso_enabled { send_state.max_gso_segments() } else { 1 }; let msg = vec![0xAB; min(MAX_DATAGRAM_SIZE, SEGMENT_SIZE * gso_segments)]; let transmit = Transmit { destination: dst_addr, ecn: None, contents: &msg, segment_size: gso_enabled.then_some(SEGMENT_SIZE), src_ip: None, }; let gro_segments = if gro_enabled { recv_state.gro_segments() } else { 1 }; let batch_size = if recvmmsg_enabled { BATCH_SIZE } else { 1 }; group.bench_function("throughput", |b| { b.to_async(&rt).iter(|| async { let mut receive_buffers = vec![vec![0; SEGMENT_SIZE * gro_segments]; batch_size]; let mut receive_slices = receive_buffers .iter_mut() .map(|buf| IoSliceMut::new(buf)) .collect::>(); let mut meta = vec![RecvMeta::default(); batch_size]; let mut sent: usize = 0; let mut received: usize = 0; while sent < TOTAL_BYTES { send_socket.writable().await.unwrap(); send_socket .try_io(Interest::WRITABLE, || { send_state.send((&send_socket).into(), &transmit) }) .unwrap(); sent += transmit.contents.len(); while received < sent { recv_socket.readable().await.unwrap(); let n = match recv_socket.try_io(Interest::READABLE, || { recv_state.recv((&recv_socket).into(), &mut receive_slices, &mut meta) }) { Ok(n) => n, // recv.readable() can lead to false positives. Try again. Err(e) if e.kind() == ErrorKind::WouldBlock => continue, e => e.unwrap(), }; received += meta.iter().map(|m| m.len).take(n).sum::(); } } }) }); } } fn new_socket() -> (UdpSocketState, tokio::net::UdpSocket) { let socket = UdpSocket::bind((Ipv6Addr::LOCALHOST, 0)) .or_else(|_| UdpSocket::bind((Ipv4Addr::LOCALHOST, 0))) .unwrap(); ( UdpSocketState::new((&socket).into()).unwrap(), tokio::net::UdpSocket::from_std(socket).unwrap(), ) } criterion_group!(benches, criterion_benchmark); criterion_main!(benches); const MAX_IP_UDP_HEADER_SIZE: usize = 48; const MAX_DATAGRAM_SIZE: usize = u16::MAX as usize - MAX_IP_UDP_HEADER_SIZE; quinn-udp-0.5.9/build.rs000064400000000000000000000014471046102023000132320ustar 00000000000000use cfg_aliases::cfg_aliases; fn main() { // Setup cfg aliases cfg_aliases! { // Platforms apple: { any( target_os = "macos", target_os = "ios", target_os = "tvos", target_os = "visionos" ) }, bsd: { any( target_os = "freebsd", target_os = "openbsd", target_os = "netbsd" ) }, solarish: { any( target_os = "solaris", target_os = "illumos" ) }, // Convenience aliases apple_fast: { all(apple, feature = "fast-apple-datapath") }, apple_slow: { all(apple, not(feature = "fast-apple-datapath")) }, } } quinn-udp-0.5.9/src/cmsg/mod.rs000064400000000000000000000106531046102023000144310ustar 00000000000000use std::{ ffi::{c_int, c_uchar}, mem, ptr, }; #[cfg(unix)] #[path = "unix.rs"] mod imp; #[cfg(windows)] #[path = "windows.rs"] mod imp; pub(crate) use imp::Aligned; /// Helper to encode a series of control messages (native "cmsgs") to a buffer for use in `sendmsg` // like API. /// /// The operation must be "finished" for the native msghdr to be usable, either by calling `finish` /// explicitly or by dropping the `Encoder`. pub(crate) struct Encoder<'a, M: MsgHdr> { hdr: &'a mut M, cmsg: Option<&'a mut M::ControlMessage>, len: usize, } impl<'a, M: MsgHdr> Encoder<'a, M> { /// # Safety /// - `hdr` must contain a suitably aligned pointer to a big enough buffer to hold control messages /// bytes. All bytes of this buffer can be safely written. /// - The `Encoder` must be dropped before `hdr` is passed to a system call, and must not be leaked. pub(crate) unsafe fn new(hdr: &'a mut M) -> Self { Self { cmsg: hdr.cmsg_first_hdr().as_mut(), hdr, len: 0, } } /// Append a control message to the buffer. /// /// # Panics /// - If insufficient buffer space remains. /// - If `T` has stricter alignment requirements than `M::ControlMessage` pub(crate) fn push(&mut self, level: c_int, ty: c_int, value: T) { assert!(mem::align_of::() <= mem::align_of::()); let space = M::ControlMessage::cmsg_space(mem::size_of_val(&value)); assert!( self.hdr.control_len() >= self.len + space, "control message buffer too small. Required: {}, Available: {}", self.len + space, self.hdr.control_len() ); let cmsg = self.cmsg.take().expect("no control buffer space remaining"); cmsg.set( level, ty, M::ControlMessage::cmsg_len(mem::size_of_val(&value)), ); unsafe { ptr::write(cmsg.cmsg_data() as *const T as *mut T, value); } self.len += space; self.cmsg = unsafe { self.hdr.cmsg_nxt_hdr(cmsg).as_mut() }; } /// Finishes appending control messages to the buffer pub(crate) fn finish(self) { // Delegates to the `Drop` impl } } // Statically guarantees that the encoding operation is "finished" before the control buffer is read // by `sendmsg` like API. impl Drop for Encoder<'_, M> { fn drop(&mut self) { self.hdr.set_control_len(self.len as _); } } /// # Safety /// /// `cmsg` must refer to a native cmsg containing a payload of type `T` pub(crate) unsafe fn decode(cmsg: &impl CMsgHdr) -> T { assert!(mem::align_of::() <= mem::align_of::()); debug_assert_eq!(cmsg.len(), C::cmsg_len(mem::size_of::())); ptr::read(cmsg.cmsg_data() as *const T) } pub(crate) struct Iter<'a, M: MsgHdr> { hdr: &'a M, cmsg: Option<&'a M::ControlMessage>, } impl<'a, M: MsgHdr> Iter<'a, M> { /// # Safety /// /// `hdr` must hold a pointer to memory outliving `'a` which can be soundly read for the /// lifetime of the constructed `Iter` and contains a buffer of native cmsgs, i.e. is aligned // for native `cmsghdr`, is fully initialized, and has correct internal links. pub(crate) unsafe fn new(hdr: &'a M) -> Self { Self { hdr, cmsg: hdr.cmsg_first_hdr().as_ref(), } } } impl<'a, M: MsgHdr> Iterator for Iter<'a, M> { type Item = &'a M::ControlMessage; fn next(&mut self) -> Option { let current = self.cmsg.take()?; self.cmsg = unsafe { self.hdr.cmsg_nxt_hdr(current).as_ref() }; Some(current) } } // Helper traits for native types for control messages pub(crate) trait MsgHdr { type ControlMessage: CMsgHdr; fn cmsg_first_hdr(&self) -> *mut Self::ControlMessage; fn cmsg_nxt_hdr(&self, cmsg: &Self::ControlMessage) -> *mut Self::ControlMessage; /// Sets the number of control messages added to this `struct msghdr`. /// /// Note that this is a destructive operation and should only be done as a finalisation /// step. fn set_control_len(&mut self, len: usize); fn control_len(&self) -> usize; } pub(crate) trait CMsgHdr { fn cmsg_len(length: usize) -> usize; fn cmsg_space(length: usize) -> usize; fn cmsg_data(&self) -> *mut c_uchar; fn set(&mut self, level: c_int, ty: c_int, len: usize); fn len(&self) -> usize; } quinn-udp-0.5.9/src/cmsg/unix.rs000064400000000000000000000042231046102023000146310ustar 00000000000000use std::ffi::{c_int, c_uchar}; use super::{CMsgHdr, MsgHdr}; #[derive(Copy, Clone)] #[repr(align(8))] // Conservative bound for align_of pub(crate) struct Aligned(pub(crate) T); /// Helpers for [`libc::msghdr`] impl MsgHdr for libc::msghdr { type ControlMessage = libc::cmsghdr; fn cmsg_first_hdr(&self) -> *mut Self::ControlMessage { unsafe { libc::CMSG_FIRSTHDR(self) } } fn cmsg_nxt_hdr(&self, cmsg: &Self::ControlMessage) -> *mut Self::ControlMessage { unsafe { libc::CMSG_NXTHDR(self, cmsg) } } fn set_control_len(&mut self, len: usize) { self.msg_controllen = len as _; if len == 0 { // netbsd is particular about this being a NULL pointer if there are no control // messages. self.msg_control = std::ptr::null_mut(); } } fn control_len(&self) -> usize { self.msg_controllen as _ } } #[cfg(apple_fast)] impl MsgHdr for crate::imp::msghdr_x { type ControlMessage = libc::cmsghdr; fn cmsg_first_hdr(&self) -> *mut Self::ControlMessage { let selfp = self as *const _ as *mut libc::msghdr; unsafe { libc::CMSG_FIRSTHDR(selfp) } } fn cmsg_nxt_hdr(&self, cmsg: &Self::ControlMessage) -> *mut Self::ControlMessage { let selfp = self as *const _ as *mut libc::msghdr; unsafe { libc::CMSG_NXTHDR(selfp, cmsg) } } fn set_control_len(&mut self, len: usize) { self.msg_controllen = len as _; } fn control_len(&self) -> usize { self.msg_controllen as _ } } /// Helpers for [`libc::cmsghdr`] impl CMsgHdr for libc::cmsghdr { fn cmsg_len(length: usize) -> usize { unsafe { libc::CMSG_LEN(length as _) as usize } } fn cmsg_space(length: usize) -> usize { unsafe { libc::CMSG_SPACE(length as _) as usize } } fn cmsg_data(&self) -> *mut c_uchar { unsafe { libc::CMSG_DATA(self) } } fn set(&mut self, level: c_int, ty: c_int, len: usize) { self.cmsg_level = level as _; self.cmsg_type = ty as _; self.cmsg_len = len as _; } fn len(&self) -> usize { self.cmsg_len as _ } } quinn-udp-0.5.9/src/cmsg/windows.rs000064400000000000000000000053161046102023000153440ustar 00000000000000use std::{ ffi::{c_int, c_uchar}, mem, ptr, }; use windows_sys::Win32::Networking::WinSock; use super::{CMsgHdr, MsgHdr}; #[derive(Copy, Clone)] #[repr(align(8))] // Conservative bound for align_of pub(crate) struct Aligned(pub(crate) T); /// Helpers for [`WinSock::WSAMSG`] // https://learn.microsoft.com/en-us/windows/win32/api/ws2def/ns-ws2def-wsamsg // https://microsoft.github.io/windows-docs-rs/doc/windows/Win32/Networking/WinSock/struct.WSAMSG.html impl MsgHdr for WinSock::WSAMSG { type ControlMessage = WinSock::CMSGHDR; fn cmsg_first_hdr(&self) -> *mut Self::ControlMessage { if self.Control.len as usize >= mem::size_of::() { self.Control.buf as *mut WinSock::CMSGHDR } else { ptr::null_mut::() } } fn cmsg_nxt_hdr(&self, cmsg: &Self::ControlMessage) -> *mut Self::ControlMessage { let next = (cmsg as *const _ as usize + cmsghdr_align(cmsg.cmsg_len)) as *mut WinSock::CMSGHDR; let max = self.Control.buf as usize + self.Control.len as usize; if unsafe { next.offset(1) } as usize > max { ptr::null_mut() } else { next } } fn set_control_len(&mut self, len: usize) { self.Control.len = len as _; } fn control_len(&self) -> usize { self.Control.len as _ } } /// Helpers for [`WinSock::CMSGHDR`] // https://learn.microsoft.com/en-us/windows/win32/api/ws2def/ns-ws2def-wsacmsghdr // https://microsoft.github.io/windows-docs-rs/doc/windows/Win32/Networking/WinSock/struct.CMSGHDR.html impl CMsgHdr for WinSock::CMSGHDR { fn cmsg_len(length: usize) -> usize { cmsgdata_align(mem::size_of::()) + length } fn cmsg_space(length: usize) -> usize { cmsgdata_align(mem::size_of::() + cmsghdr_align(length)) } fn cmsg_data(&self) -> *mut c_uchar { (self as *const _ as usize + cmsgdata_align(mem::size_of::())) as *mut c_uchar } fn set(&mut self, level: c_int, ty: c_int, len: usize) { self.cmsg_level = level as _; self.cmsg_type = ty as _; self.cmsg_len = len as _; } fn len(&self) -> usize { self.cmsg_len as _ } } // Helpers functions for `WinSock::WSAMSG` and `WinSock::CMSGHDR` are based on C macros from // https://github.com/microsoft/win32metadata/blob/main/generation/WinSDK/RecompiledIdlHeaders/shared/ws2def.h#L741 fn cmsghdr_align(length: usize) -> usize { (length + mem::align_of::() - 1) & !(mem::align_of::() - 1) } fn cmsgdata_align(length: usize) -> usize { (length + mem::align_of::() - 1) & !(mem::align_of::() - 1) } quinn-udp-0.5.9/src/fallback.rs000064400000000000000000000064031046102023000144560ustar 00000000000000use std::{ io::{self, IoSliceMut}, sync::Mutex, time::Instant, }; use super::{log_sendmsg_error, RecvMeta, Transmit, UdpSockRef, IO_ERROR_LOG_INTERVAL}; /// Fallback UDP socket interface that stubs out all special functionality /// /// Used when a better implementation is not available for a particular target, at the cost of /// reduced performance compared to that enabled by some target-specific interfaces. #[derive(Debug)] pub struct UdpSocketState { last_send_error: Mutex, } impl UdpSocketState { pub fn new(socket: UdpSockRef<'_>) -> io::Result { socket.0.set_nonblocking(true)?; let now = Instant::now(); Ok(Self { last_send_error: Mutex::new(now.checked_sub(2 * IO_ERROR_LOG_INTERVAL).unwrap_or(now)), }) } /// Sends a [`Transmit`] on the given socket. /// /// This function will only ever return errors of kind [`io::ErrorKind::WouldBlock`]. /// All other errors will be logged and converted to `Ok`. /// /// UDP transmission errors are considered non-fatal because higher-level protocols must /// employ retransmits and timeouts anyway in order to deal with UDP's unreliable nature. /// Thus, logging is most likely the only thing you can do with these errors. /// /// If you would like to handle these errors yourself, use [`UdpSocketState::try_send`] /// instead. pub fn send(&self, socket: UdpSockRef<'_>, transmit: &Transmit<'_>) -> io::Result<()> { match send(socket, transmit) { Ok(()) => Ok(()), Err(e) if e.kind() == io::ErrorKind::WouldBlock => Err(e), Err(e) => { log_sendmsg_error(&self.last_send_error, e, transmit); Ok(()) } } } /// Sends a [`Transmit`] on the given socket without any additional error handling. pub fn try_send(&self, socket: UdpSockRef<'_>, transmit: &Transmit<'_>) -> io::Result<()> { send(socket, transmit) } pub fn recv( &self, socket: UdpSockRef<'_>, bufs: &mut [IoSliceMut<'_>], meta: &mut [RecvMeta], ) -> io::Result { // Safety: both `IoSliceMut` and `MaybeUninitSlice` promise to have the // same layout, that of `iovec`/`WSABUF`. Furthermore `recv_vectored` // promises to not write unitialised bytes to the `bufs` and pass it // directly to the `recvmsg` system call, so this is safe. let bufs = unsafe { &mut *(bufs as *mut [IoSliceMut<'_>] as *mut [socket2::MaybeUninitSlice<'_>]) }; let (len, _flags, addr) = socket.0.recv_from_vectored(bufs)?; meta[0] = RecvMeta { len, stride: len, addr: addr.as_socket().unwrap(), ecn: None, dst_ip: None, }; Ok(1) } #[inline] pub fn max_gso_segments(&self) -> usize { 1 } #[inline] pub fn gro_segments(&self) -> usize { 1 } #[inline] pub fn may_fragment(&self) -> bool { true } } fn send(socket: UdpSockRef<'_>, transmit: &Transmit<'_>) -> io::Result<()> { socket.0.send_to( transmit.contents, &socket2::SockAddr::from(transmit.destination), ) } pub(crate) const BATCH_SIZE: usize = 1; quinn-udp-0.5.9/src/lib.rs000064400000000000000000000165551046102023000134760ustar 00000000000000//! Uniform interface to send and receive UDP packets with advanced features useful for QUIC //! //! This crate exposes kernel UDP stack features available on most modern systems which are required //! for an efficient and conformant QUIC implementation. As of this writing, these are not available //! in std or major async runtimes, and their niche character and complexity are a barrier to adding //! them. Hence, a dedicated crate. //! //! Exposed features include: //! //! - Segmentation offload for bulk send and receive operations, reducing CPU load. //! - Reporting the exact destination address of received packets and specifying explicit source //! addresses for sent packets, allowing responses to be sent from the address that the peer //! expects when there are multiple possibilities. This is common when bound to a wildcard address //! in IPv6 due to [RFC 8981] temporary addresses. //! - [Explicit Congestion Notification], which is required by QUIC to prevent packet loss and reduce //! latency on congested links when supported by the network path. //! - Disabled IP-layer fragmentation, which allows the true physical MTU to be detected and reduces //! risk of QUIC packet loss. //! //! Some features are unavailable in some environments. This can be due to an outdated operating //! system or drivers. Some operating systems may not implement desired features at all, or may not //! yet be supported by the crate. When support is unavailable, functionality will gracefully //! degrade. //! //! [RFC 8981]: https://www.rfc-editor.org/rfc/rfc8981.html //! [Explicit Congestion Notification]: https://www.rfc-editor.org/rfc/rfc3168.html #![warn(unreachable_pub)] #![warn(clippy::use_self)] #[cfg(unix)] use std::os::unix::io::AsFd; #[cfg(windows)] use std::os::windows::io::AsSocket; use std::{ net::{IpAddr, Ipv6Addr, SocketAddr}, sync::Mutex, time::{Duration, Instant}, }; #[cfg(any(unix, windows))] mod cmsg; #[cfg(unix)] #[path = "unix.rs"] mod imp; #[cfg(windows)] #[path = "windows.rs"] mod imp; // No ECN support #[cfg(not(any(unix, windows)))] #[path = "fallback.rs"] mod imp; #[allow(unused_imports, unused_macros)] mod log { #[cfg(all(feature = "direct-log", not(feature = "tracing")))] pub(crate) use log::{debug, error, info, trace, warn}; #[cfg(feature = "tracing")] pub(crate) use tracing::{debug, error, info, trace, warn}; #[cfg(not(any(feature = "direct-log", feature = "tracing")))] mod no_op { macro_rules! trace ( ($($tt:tt)*) => {{}} ); macro_rules! debug ( ($($tt:tt)*) => {{}} ); macro_rules! info ( ($($tt:tt)*) => {{}} ); macro_rules! log_warn ( ($($tt:tt)*) => {{}} ); macro_rules! error ( ($($tt:tt)*) => {{}} ); pub(crate) use {debug, error, info, log_warn as warn, trace}; } #[cfg(not(any(feature = "direct-log", feature = "tracing")))] pub(crate) use no_op::*; } pub use imp::UdpSocketState; /// Number of UDP packets to send/receive at a time pub const BATCH_SIZE: usize = imp::BATCH_SIZE; /// Metadata for a single buffer filled with bytes received from the network /// /// This associated buffer can contain one or more datagrams, see [`stride`]. /// /// [`stride`]: RecvMeta::stride #[derive(Debug, Copy, Clone)] pub struct RecvMeta { /// The source address of the datagram(s) contained in the buffer pub addr: SocketAddr, /// The number of bytes the associated buffer has pub len: usize, /// The size of a single datagram in the associated buffer /// /// When GRO (Generic Receive Offload) is used this indicates the size of a single /// datagram inside the buffer. If the buffer is larger, that is if [`len`] is greater /// then this value, then the individual datagrams contained have their boundaries at /// `stride` increments from the start. The last datagram could be smaller than /// `stride`. /// /// [`len`]: RecvMeta::len pub stride: usize, /// The Explicit Congestion Notification bits for the datagram(s) in the buffer pub ecn: Option, /// The destination IP address which was encoded in this datagram /// /// Populated on platforms: Windows, Linux, Android (API level > 25), /// FreeBSD, OpenBSD, NetBSD, macOS, and iOS. pub dst_ip: Option, } impl Default for RecvMeta { /// Constructs a value with arbitrary fields, intended to be overwritten fn default() -> Self { Self { addr: SocketAddr::new(Ipv6Addr::UNSPECIFIED.into(), 0), len: 0, stride: 0, ecn: None, dst_ip: None, } } } /// An outgoing packet #[derive(Debug, Clone)] pub struct Transmit<'a> { /// The socket this datagram should be sent to pub destination: SocketAddr, /// Explicit congestion notification bits to set on the packet pub ecn: Option, /// Contents of the datagram pub contents: &'a [u8], /// The segment size if this transmission contains multiple datagrams. /// This is `None` if the transmit only contains a single datagram pub segment_size: Option, /// Optional source IP address for the datagram pub src_ip: Option, } /// Log at most 1 IO error per minute const IO_ERROR_LOG_INTERVAL: Duration = std::time::Duration::from_secs(60); /// Logs a warning message when sendmsg fails /// /// Logging will only be performed if at least [`IO_ERROR_LOG_INTERVAL`] /// has elapsed since the last error was logged. #[cfg(any(feature = "tracing", feature = "direct-log"))] fn log_sendmsg_error( last_send_error: &Mutex, err: impl core::fmt::Debug, transmit: &Transmit, ) { let now = Instant::now(); let last_send_error = &mut *last_send_error.lock().expect("poisend lock"); if now.saturating_duration_since(*last_send_error) > IO_ERROR_LOG_INTERVAL { *last_send_error = now; log::warn!( "sendmsg error: {:?}, Transmit: {{ destination: {:?}, src_ip: {:?}, ecn: {:?}, len: {:?}, segment_size: {:?} }}", err, transmit.destination, transmit.src_ip, transmit.ecn, transmit.contents.len(), transmit.segment_size); } } // No-op #[cfg(not(any(feature = "tracing", feature = "direct-log")))] fn log_sendmsg_error(_: &Mutex, _: impl core::fmt::Debug, _: &Transmit) {} /// A borrowed UDP socket /// /// On Unix, constructible via `From`. On Windows, constructible via `From`. // Wrapper around socket2 to avoid making it a public dependency and incurring stability risk pub struct UdpSockRef<'a>(socket2::SockRef<'a>); #[cfg(unix)] impl<'s, S> From<&'s S> for UdpSockRef<'s> where S: AsFd, { fn from(socket: &'s S) -> Self { Self(socket.into()) } } #[cfg(windows)] impl<'s, S> From<&'s S> for UdpSockRef<'s> where S: AsSocket, { fn from(socket: &'s S) -> Self { Self(socket.into()) } } /// Explicit congestion notification codepoint #[repr(u8)] #[derive(Debug, Copy, Clone, Eq, PartialEq)] pub enum EcnCodepoint { #[doc(hidden)] Ect0 = 0b10, #[doc(hidden)] Ect1 = 0b01, #[doc(hidden)] Ce = 0b11, } impl EcnCodepoint { /// Create new object from the given bits pub fn from_bits(x: u8) -> Option { use self::EcnCodepoint::*; Some(match x & 0b11 { 0b10 => Ect0, 0b01 => Ect1, 0b11 => Ce, _ => { return None; } }) } } quinn-udp-0.5.9/src/unix.rs000064400000000000000000001014421046102023000137010ustar 00000000000000#[cfg(not(any(apple, target_os = "openbsd", solarish)))] use std::ptr; use std::{ io::{self, IoSliceMut}, mem::{self, MaybeUninit}, net::{IpAddr, Ipv4Addr, Ipv6Addr, SocketAddr, SocketAddrV4, SocketAddrV6}, os::unix::io::AsRawFd, sync::{ atomic::{AtomicBool, AtomicUsize, Ordering}, Mutex, }, time::Instant, }; use socket2::SockRef; use super::{ cmsg, log_sendmsg_error, EcnCodepoint, RecvMeta, Transmit, UdpSockRef, IO_ERROR_LOG_INTERVAL, }; // Adapted from https://github.com/apple-oss-distributions/xnu/blob/8d741a5de7ff4191bf97d57b9f54c2f6d4a15585/bsd/sys/socket_private.h #[cfg(apple_fast)] #[repr(C)] #[allow(non_camel_case_types)] pub(crate) struct msghdr_x { pub msg_name: *mut libc::c_void, pub msg_namelen: libc::socklen_t, pub msg_iov: *mut libc::iovec, pub msg_iovlen: libc::c_int, pub msg_control: *mut libc::c_void, pub msg_controllen: libc::socklen_t, pub msg_flags: libc::c_int, pub msg_datalen: usize, } #[cfg(apple_fast)] extern "C" { fn recvmsg_x( s: libc::c_int, msgp: *const msghdr_x, cnt: libc::c_uint, flags: libc::c_int, ) -> isize; fn sendmsg_x( s: libc::c_int, msgp: *const msghdr_x, cnt: libc::c_uint, flags: libc::c_int, ) -> isize; } // Defined in netinet6/in6.h on OpenBSD, this is not yet exported by the libc crate // directly. See https://github.com/rust-lang/libc/issues/3704 for when we might be able to // rely on this from the libc crate. #[cfg(any(target_os = "openbsd", target_os = "netbsd"))] const IPV6_DONTFRAG: libc::c_int = 62; #[cfg(not(any(target_os = "openbsd", target_os = "netbsd")))] const IPV6_DONTFRAG: libc::c_int = libc::IPV6_DONTFRAG; #[cfg(target_os = "freebsd")] type IpTosTy = libc::c_uchar; #[cfg(not(any(target_os = "freebsd", target_os = "netbsd")))] type IpTosTy = libc::c_int; /// Tokio-compatible UDP socket with some useful specializations. /// /// Unlike a standard tokio UDP socket, this allows ECN bits to be read and written on some /// platforms. #[derive(Debug)] pub struct UdpSocketState { last_send_error: Mutex, max_gso_segments: AtomicUsize, gro_segments: usize, may_fragment: bool, /// True if we have received EINVAL error from `sendmsg` system call at least once. /// /// If enabled, we assume that old kernel is used and switch to fallback mode. /// In particular, we do not use IP_TOS cmsg_type in this case, /// which is not supported on Linux <3.13 and results in not sending the UDP packet at all. sendmsg_einval: AtomicBool, } impl UdpSocketState { pub fn new(sock: UdpSockRef<'_>) -> io::Result { let io = sock.0; let mut cmsg_platform_space = 0; if cfg!(target_os = "linux") || cfg!(bsd) || cfg!(apple) || cfg!(target_os = "android") || cfg!(solarish) { cmsg_platform_space += unsafe { libc::CMSG_SPACE(mem::size_of::() as _) as usize }; } assert!( CMSG_LEN >= unsafe { libc::CMSG_SPACE(mem::size_of::() as _) as usize } + cmsg_platform_space ); assert!( mem::align_of::() <= mem::align_of::>(), "control message buffers will be misaligned" ); io.set_nonblocking(true)?; let addr = io.local_addr()?; let is_ipv4 = addr.family() == libc::AF_INET as libc::sa_family_t; // mac and ios do not support IP_RECVTOS on dual-stack sockets :( // older macos versions also don't have the flag and will error out if we don't ignore it #[cfg(not(any(target_os = "openbsd", target_os = "netbsd", solarish)))] if is_ipv4 || !io.only_v6()? { if let Err(_err) = set_socket_option(&*io, libc::IPPROTO_IP, libc::IP_RECVTOS, OPTION_ON) { crate::log::debug!("Ignoring error setting IP_RECVTOS on socket: {_err:?}"); } } let mut may_fragment = false; #[cfg(any(target_os = "linux", target_os = "android"))] { // opportunistically try to enable GRO. See gro::gro_segments(). let _ = set_socket_option(&*io, libc::SOL_UDP, gro::UDP_GRO, OPTION_ON); // Forbid IPv4 fragmentation. Set even for IPv6 to account for IPv6 mapped IPv4 addresses. // Set `may_fragment` to `true` if this option is not supported on the platform. may_fragment |= !set_socket_option_supported( &*io, libc::IPPROTO_IP, libc::IP_MTU_DISCOVER, libc::IP_PMTUDISC_PROBE, )?; if is_ipv4 { set_socket_option(&*io, libc::IPPROTO_IP, libc::IP_PKTINFO, OPTION_ON)?; } else { // Set `may_fragment` to `true` if this option is not supported on the platform. may_fragment |= !set_socket_option_supported( &*io, libc::IPPROTO_IPV6, libc::IPV6_MTU_DISCOVER, libc::IPV6_PMTUDISC_PROBE, )?; } } #[cfg(any(target_os = "freebsd", apple))] { if is_ipv4 { // Set `may_fragment` to `true` if this option is not supported on the platform. may_fragment |= !set_socket_option_supported( &*io, libc::IPPROTO_IP, libc::IP_DONTFRAG, OPTION_ON, )?; } } #[cfg(any(bsd, apple, solarish))] // IP_RECVDSTADDR == IP_SENDSRCADDR on FreeBSD // macOS uses only IP_RECVDSTADDR, no IP_SENDSRCADDR on macOS (the same on Solaris) // macOS also supports IP_PKTINFO { if is_ipv4 { set_socket_option(&*io, libc::IPPROTO_IP, libc::IP_RECVDSTADDR, OPTION_ON)?; } } // Options standardized in RFC 3542 if !is_ipv4 { set_socket_option(&*io, libc::IPPROTO_IPV6, libc::IPV6_RECVPKTINFO, OPTION_ON)?; set_socket_option(&*io, libc::IPPROTO_IPV6, libc::IPV6_RECVTCLASS, OPTION_ON)?; // Linux's IP_PMTUDISC_PROBE allows us to operate under interface MTU rather than the // kernel's path MTU guess, but actually disabling fragmentation requires this too. See // __ip6_append_data in ip6_output.c. // Set `may_fragment` to `true` if this option is not supported on the platform. may_fragment |= !set_socket_option_supported(&*io, libc::IPPROTO_IPV6, IPV6_DONTFRAG, OPTION_ON)?; } let now = Instant::now(); Ok(Self { last_send_error: Mutex::new(now.checked_sub(2 * IO_ERROR_LOG_INTERVAL).unwrap_or(now)), max_gso_segments: AtomicUsize::new(gso::max_gso_segments()), gro_segments: gro::gro_segments(), may_fragment, sendmsg_einval: AtomicBool::new(false), }) } /// Sends a [`Transmit`] on the given socket. /// /// This function will only ever return errors of kind [`io::ErrorKind::WouldBlock`]. /// All other errors will be logged and converted to `Ok`. /// /// UDP transmission errors are considered non-fatal because higher-level protocols must /// employ retransmits and timeouts anyway in order to deal with UDP's unreliable nature. /// Thus, logging is most likely the only thing you can do with these errors. /// /// If you would like to handle these errors yourself, use [`UdpSocketState::try_send`] /// instead. pub fn send(&self, socket: UdpSockRef<'_>, transmit: &Transmit<'_>) -> io::Result<()> { match send(self, socket.0, transmit) { Ok(()) => Ok(()), Err(e) if e.kind() == io::ErrorKind::WouldBlock => Err(e), Err(e) => { log_sendmsg_error(&self.last_send_error, e, transmit); Ok(()) } } } /// Sends a [`Transmit`] on the given socket without any additional error handling. pub fn try_send(&self, socket: UdpSockRef<'_>, transmit: &Transmit<'_>) -> io::Result<()> { send(self, socket.0, transmit) } pub fn recv( &self, socket: UdpSockRef<'_>, bufs: &mut [IoSliceMut<'_>], meta: &mut [RecvMeta], ) -> io::Result { recv(socket.0, bufs, meta) } /// The maximum amount of segments which can be transmitted if a platform /// supports Generic Send Offload (GSO). /// /// This is 1 if the platform doesn't support GSO. Subject to change if errors are detected /// while using GSO. #[inline] pub fn max_gso_segments(&self) -> usize { self.max_gso_segments.load(Ordering::Relaxed) } /// The number of segments to read when GRO is enabled. Used as a factor to /// compute the receive buffer size. /// /// Returns 1 if the platform doesn't support GRO. #[inline] pub fn gro_segments(&self) -> usize { self.gro_segments } /// Whether transmitted datagrams might get fragmented by the IP layer /// /// Returns `false` on targets which employ e.g. the `IPV6_DONTFRAG` socket option. #[inline] pub fn may_fragment(&self) -> bool { self.may_fragment } /// Returns true if we previously got an EINVAL error from `sendmsg` syscall. fn sendmsg_einval(&self) -> bool { self.sendmsg_einval.load(Ordering::Relaxed) } /// Sets the flag indicating we got EINVAL error from `sendmsg` syscall. #[cfg(not(any(apple, target_os = "openbsd", target_os = "netbsd")))] fn set_sendmsg_einval(&self) { self.sendmsg_einval.store(true, Ordering::Relaxed) } } #[cfg(not(any(apple, target_os = "openbsd", target_os = "netbsd")))] fn send( #[allow(unused_variables)] // only used on Linux state: &UdpSocketState, io: SockRef<'_>, transmit: &Transmit<'_>, ) -> io::Result<()> { #[allow(unused_mut)] // only mutable on FreeBSD let mut encode_src_ip = true; #[cfg(target_os = "freebsd")] { let addr = io.local_addr()?; let is_ipv4 = addr.family() == libc::AF_INET as libc::sa_family_t; if is_ipv4 { if let Some(socket) = addr.as_socket_ipv4() { encode_src_ip = socket.ip() == &Ipv4Addr::UNSPECIFIED; } } } let mut msg_hdr: libc::msghdr = unsafe { mem::zeroed() }; let mut iovec: libc::iovec = unsafe { mem::zeroed() }; let mut cmsgs = cmsg::Aligned([0u8; CMSG_LEN]); let dst_addr = socket2::SockAddr::from(transmit.destination); prepare_msg( transmit, &dst_addr, &mut msg_hdr, &mut iovec, &mut cmsgs, encode_src_ip, state.sendmsg_einval(), ); loop { let n = unsafe { libc::sendmsg(io.as_raw_fd(), &msg_hdr, 0) }; if n == -1 { let e = io::Error::last_os_error(); match e.kind() { io::ErrorKind::Interrupted => { // Retry the transmission continue; } io::ErrorKind::WouldBlock => return Err(e), _ => { // Some network adapters and drivers do not support GSO. Unfortunately, Linux // offers no easy way for us to detect this short of an EIO or sometimes EINVAL // when we try to actually send datagrams using it. #[cfg(any(target_os = "linux", target_os = "android"))] if let Some(libc::EIO) | Some(libc::EINVAL) = e.raw_os_error() { // Prevent new transmits from being scheduled using GSO. Existing GSO transmits // may already be in the pipeline, so we need to tolerate additional failures. if state.max_gso_segments() > 1 { crate::log::info!( "`libc::sendmsg` failed with {e}; halting segmentation offload" ); state .max_gso_segments .store(1, std::sync::atomic::Ordering::Relaxed); } } // Some arguments to `sendmsg` are not supported. Switch to // fallback mode and retry if we haven't already. if e.raw_os_error() == Some(libc::EINVAL) && !state.sendmsg_einval() { state.set_sendmsg_einval(); prepare_msg( transmit, &dst_addr, &mut msg_hdr, &mut iovec, &mut cmsgs, encode_src_ip, state.sendmsg_einval(), ); continue; } // - EMSGSIZE is expected for MTU probes. Future work might be able to avoid // these by automatically clamping the MTUD upper bound to the interface MTU. if e.raw_os_error() != Some(libc::EMSGSIZE) { return Err(e); } } } } return Ok(()); } } #[cfg(apple_fast)] fn send(state: &UdpSocketState, io: SockRef<'_>, transmit: &Transmit<'_>) -> io::Result<()> { let mut hdrs = unsafe { mem::zeroed::<[msghdr_x; BATCH_SIZE]>() }; let mut iovs = unsafe { mem::zeroed::<[libc::iovec; BATCH_SIZE]>() }; let mut ctrls = [cmsg::Aligned([0u8; CMSG_LEN]); BATCH_SIZE]; let addr = socket2::SockAddr::from(transmit.destination); let segment_size = transmit.segment_size.unwrap_or(transmit.contents.len()); let mut cnt = 0; debug_assert!(transmit.contents.len().div_ceil(segment_size) <= BATCH_SIZE); for (i, chunk) in transmit .contents .chunks(segment_size) .enumerate() .take(BATCH_SIZE) { prepare_msg( &Transmit { destination: transmit.destination, ecn: transmit.ecn, contents: chunk, segment_size: Some(chunk.len()), src_ip: transmit.src_ip, }, &addr, &mut hdrs[i], &mut iovs[i], &mut ctrls[i], true, state.sendmsg_einval(), ); hdrs[i].msg_datalen = chunk.len(); cnt += 1; } loop { let n = unsafe { sendmsg_x(io.as_raw_fd(), hdrs.as_ptr(), cnt as u32, 0) }; if n == -1 { let e = io::Error::last_os_error(); match e.kind() { io::ErrorKind::Interrupted => { // Retry the transmission continue; } io::ErrorKind::WouldBlock => return Err(e), _ => { // - EMSGSIZE is expected for MTU probes. Future work might be able to avoid // these by automatically clamping the MTUD upper bound to the interface MTU. if e.raw_os_error() != Some(libc::EMSGSIZE) { return Err(e); } } } } return Ok(()); } } #[cfg(any(target_os = "openbsd", target_os = "netbsd", apple_slow))] fn send(state: &UdpSocketState, io: SockRef<'_>, transmit: &Transmit<'_>) -> io::Result<()> { let mut hdr: libc::msghdr = unsafe { mem::zeroed() }; let mut iov: libc::iovec = unsafe { mem::zeroed() }; let mut ctrl = cmsg::Aligned([0u8; CMSG_LEN]); let addr = socket2::SockAddr::from(transmit.destination); prepare_msg( transmit, &addr, &mut hdr, &mut iov, &mut ctrl, cfg!(apple) || cfg!(target_os = "openbsd") || cfg!(target_os = "netbsd"), state.sendmsg_einval(), ); loop { let n = unsafe { libc::sendmsg(io.as_raw_fd(), &hdr, 0) }; if n == -1 { let e = io::Error::last_os_error(); match e.kind() { io::ErrorKind::Interrupted => { // Retry the transmission continue; } io::ErrorKind::WouldBlock => return Err(e), _ => { // - EMSGSIZE is expected for MTU probes. Future work might be able to avoid // these by automatically clamping the MTUD upper bound to the interface MTU. if e.raw_os_error() != Some(libc::EMSGSIZE) { return Err(e); } } } } return Ok(()); } } #[cfg(not(any(apple, target_os = "openbsd", target_os = "netbsd", solarish)))] fn recv(io: SockRef<'_>, bufs: &mut [IoSliceMut<'_>], meta: &mut [RecvMeta]) -> io::Result { let mut names = [MaybeUninit::::uninit(); BATCH_SIZE]; let mut ctrls = [cmsg::Aligned(MaybeUninit::<[u8; CMSG_LEN]>::uninit()); BATCH_SIZE]; let mut hdrs = unsafe { mem::zeroed::<[libc::mmsghdr; BATCH_SIZE]>() }; let max_msg_count = bufs.len().min(BATCH_SIZE); for i in 0..max_msg_count { prepare_recv( &mut bufs[i], &mut names[i], &mut ctrls[i], &mut hdrs[i].msg_hdr, ); } let msg_count = loop { let n = unsafe { libc::recvmmsg( io.as_raw_fd(), hdrs.as_mut_ptr(), bufs.len().min(BATCH_SIZE) as _, 0, ptr::null_mut::(), ) }; if n == -1 { let e = io::Error::last_os_error(); if e.kind() == io::ErrorKind::Interrupted { continue; } return Err(e); } break n; }; for i in 0..(msg_count as usize) { meta[i] = decode_recv(&names[i], &hdrs[i].msg_hdr, hdrs[i].msg_len as usize); } Ok(msg_count as usize) } #[cfg(apple_fast)] fn recv(io: SockRef<'_>, bufs: &mut [IoSliceMut<'_>], meta: &mut [RecvMeta]) -> io::Result { let mut names = [MaybeUninit::::uninit(); BATCH_SIZE]; let mut ctrls = [cmsg::Aligned(MaybeUninit::<[u8; CMSG_LEN]>::uninit()); BATCH_SIZE]; let mut hdrs = unsafe { mem::zeroed::<[msghdr_x; BATCH_SIZE]>() }; let max_msg_count = bufs.len().min(BATCH_SIZE); for i in 0..max_msg_count { prepare_recv(&mut bufs[i], &mut names[i], &mut ctrls[i], &mut hdrs[i]); } let msg_count = loop { let n = unsafe { recvmsg_x(io.as_raw_fd(), hdrs.as_mut_ptr(), max_msg_count as _, 0) }; match n { -1 => { let e = io::Error::last_os_error(); if e.kind() == io::ErrorKind::Interrupted { continue; } return Err(e); } n => break n, } }; for i in 0..(msg_count as usize) { meta[i] = decode_recv(&names[i], &hdrs[i], hdrs[i].msg_datalen as usize); } Ok(msg_count as usize) } #[cfg(any(target_os = "openbsd", target_os = "netbsd", solarish, apple_slow))] fn recv(io: SockRef<'_>, bufs: &mut [IoSliceMut<'_>], meta: &mut [RecvMeta]) -> io::Result { let mut name = MaybeUninit::::uninit(); let mut ctrl = cmsg::Aligned(MaybeUninit::<[u8; CMSG_LEN]>::uninit()); let mut hdr = unsafe { mem::zeroed::() }; prepare_recv(&mut bufs[0], &mut name, &mut ctrl, &mut hdr); let n = loop { let n = unsafe { libc::recvmsg(io.as_raw_fd(), &mut hdr, 0) }; if n == -1 { let e = io::Error::last_os_error(); if e.kind() == io::ErrorKind::Interrupted { continue; } return Err(e); } if hdr.msg_flags & libc::MSG_TRUNC != 0 { continue; } break n; }; meta[0] = decode_recv(&name, &hdr, n as usize); Ok(1) } const CMSG_LEN: usize = 88; fn prepare_msg( transmit: &Transmit<'_>, dst_addr: &socket2::SockAddr, #[cfg(not(apple_fast))] hdr: &mut libc::msghdr, #[cfg(apple_fast)] hdr: &mut msghdr_x, iov: &mut libc::iovec, ctrl: &mut cmsg::Aligned<[u8; CMSG_LEN]>, #[allow(unused_variables)] // only used on FreeBSD & macOS encode_src_ip: bool, sendmsg_einval: bool, ) { iov.iov_base = transmit.contents.as_ptr() as *const _ as *mut _; iov.iov_len = transmit.contents.len(); // SAFETY: Casting the pointer to a mutable one is legal, // as sendmsg is guaranteed to not alter the mutable pointer // as per the POSIX spec. See the section on the sys/socket.h // header for details. The type is only mutable in the first // place because it is reused by recvmsg as well. let name = dst_addr.as_ptr() as *mut libc::c_void; let namelen = dst_addr.len(); hdr.msg_name = name as *mut _; hdr.msg_namelen = namelen; hdr.msg_iov = iov; hdr.msg_iovlen = 1; hdr.msg_control = ctrl.0.as_mut_ptr() as _; hdr.msg_controllen = CMSG_LEN as _; let mut encoder = unsafe { cmsg::Encoder::new(hdr) }; let ecn = transmit.ecn.map_or(0, |x| x as libc::c_int); // True for IPv4 or IPv4-Mapped IPv6 let is_ipv4 = transmit.destination.is_ipv4() || matches!(transmit.destination.ip(), IpAddr::V6(addr) if addr.to_ipv4_mapped().is_some()); if is_ipv4 { if !sendmsg_einval { #[cfg(not(target_os = "netbsd"))] { encoder.push(libc::IPPROTO_IP, libc::IP_TOS, ecn as IpTosTy); } } } else { encoder.push(libc::IPPROTO_IPV6, libc::IPV6_TCLASS, ecn); } // Only set the segment size if it is different from the size of the contents. // Some network drivers don't like being told to do GSO even if there is effectively only a single segment. if let Some(segment_size) = transmit .segment_size .filter(|segment_size| *segment_size != transmit.contents.len()) { gso::set_segment_size(&mut encoder, segment_size as u16); } if let Some(ip) = &transmit.src_ip { match ip { IpAddr::V4(v4) => { #[cfg(any(target_os = "linux", target_os = "android"))] { let pktinfo = libc::in_pktinfo { ipi_ifindex: 0, ipi_spec_dst: libc::in_addr { s_addr: u32::from_ne_bytes(v4.octets()), }, ipi_addr: libc::in_addr { s_addr: 0 }, }; encoder.push(libc::IPPROTO_IP, libc::IP_PKTINFO, pktinfo); } #[cfg(any(bsd, apple, solarish))] { if encode_src_ip { let addr = libc::in_addr { s_addr: u32::from_ne_bytes(v4.octets()), }; encoder.push(libc::IPPROTO_IP, libc::IP_RECVDSTADDR, addr); } } } IpAddr::V6(v6) => { let pktinfo = libc::in6_pktinfo { ipi6_ifindex: 0, ipi6_addr: libc::in6_addr { s6_addr: v6.octets(), }, }; encoder.push(libc::IPPROTO_IPV6, libc::IPV6_PKTINFO, pktinfo); } } } encoder.finish(); } #[cfg(not(apple_fast))] fn prepare_recv( buf: &mut IoSliceMut, name: &mut MaybeUninit, ctrl: &mut cmsg::Aligned>, hdr: &mut libc::msghdr, ) { hdr.msg_name = name.as_mut_ptr() as _; hdr.msg_namelen = mem::size_of::() as _; hdr.msg_iov = buf as *mut IoSliceMut as *mut libc::iovec; hdr.msg_iovlen = 1; hdr.msg_control = ctrl.0.as_mut_ptr() as _; hdr.msg_controllen = CMSG_LEN as _; hdr.msg_flags = 0; } #[cfg(apple_fast)] fn prepare_recv( buf: &mut IoSliceMut, name: &mut MaybeUninit, ctrl: &mut cmsg::Aligned>, hdr: &mut msghdr_x, ) { hdr.msg_name = name.as_mut_ptr() as _; hdr.msg_namelen = mem::size_of::() as _; hdr.msg_iov = buf as *mut IoSliceMut as *mut libc::iovec; hdr.msg_iovlen = 1; hdr.msg_control = ctrl.0.as_mut_ptr() as _; hdr.msg_controllen = CMSG_LEN as _; hdr.msg_flags = 0; hdr.msg_datalen = buf.len(); } fn decode_recv( name: &MaybeUninit, #[cfg(not(apple_fast))] hdr: &libc::msghdr, #[cfg(apple_fast)] hdr: &msghdr_x, len: usize, ) -> RecvMeta { let name = unsafe { name.assume_init() }; let mut ecn_bits = 0; let mut dst_ip = None; #[allow(unused_mut)] // only mutable on Linux let mut stride = len; let cmsg_iter = unsafe { cmsg::Iter::new(hdr) }; for cmsg in cmsg_iter { match (cmsg.cmsg_level, cmsg.cmsg_type) { (libc::IPPROTO_IP, libc::IP_TOS) => unsafe { ecn_bits = cmsg::decode::(cmsg); }, // FreeBSD uses IP_RECVTOS here, and we can be liberal because cmsgs are opt-in. #[cfg(not(any(target_os = "openbsd", target_os = "netbsd", solarish)))] (libc::IPPROTO_IP, libc::IP_RECVTOS) => unsafe { ecn_bits = cmsg::decode::(cmsg); }, (libc::IPPROTO_IPV6, libc::IPV6_TCLASS) => unsafe { // Temporary hack around broken macos ABI. Remove once upstream fixes it. // https://bugreport.apple.com/web/?problemID=48761855 #[allow(clippy::unnecessary_cast)] // cmsg.cmsg_len defined as size_t if cfg!(apple) && cmsg.cmsg_len as usize == libc::CMSG_LEN(mem::size_of::() as _) as usize { ecn_bits = cmsg::decode::(cmsg); } else { ecn_bits = cmsg::decode::(cmsg) as u8; } }, #[cfg(any(target_os = "linux", target_os = "android"))] (libc::IPPROTO_IP, libc::IP_PKTINFO) => { let pktinfo = unsafe { cmsg::decode::(cmsg) }; dst_ip = Some(IpAddr::V4(Ipv4Addr::from( pktinfo.ipi_addr.s_addr.to_ne_bytes(), ))); } #[cfg(any(bsd, apple))] (libc::IPPROTO_IP, libc::IP_RECVDSTADDR) => { let in_addr = unsafe { cmsg::decode::(cmsg) }; dst_ip = Some(IpAddr::V4(Ipv4Addr::from(in_addr.s_addr.to_ne_bytes()))); } (libc::IPPROTO_IPV6, libc::IPV6_PKTINFO) => { let pktinfo = unsafe { cmsg::decode::(cmsg) }; dst_ip = Some(IpAddr::V6(Ipv6Addr::from(pktinfo.ipi6_addr.s6_addr))); } #[cfg(any(target_os = "linux", target_os = "android"))] (libc::SOL_UDP, gro::UDP_GRO) => unsafe { stride = cmsg::decode::(cmsg) as usize; }, _ => {} } } let addr = match libc::c_int::from(name.ss_family) { libc::AF_INET => { // Safety: if the ss_family field is AF_INET then storage must be a sockaddr_in. let addr: &libc::sockaddr_in = unsafe { &*(&name as *const _ as *const libc::sockaddr_in) }; SocketAddr::V4(SocketAddrV4::new( Ipv4Addr::from(addr.sin_addr.s_addr.to_ne_bytes()), u16::from_be(addr.sin_port), )) } libc::AF_INET6 => { // Safety: if the ss_family field is AF_INET6 then storage must be a sockaddr_in6. let addr: &libc::sockaddr_in6 = unsafe { &*(&name as *const _ as *const libc::sockaddr_in6) }; SocketAddr::V6(SocketAddrV6::new( Ipv6Addr::from(addr.sin6_addr.s6_addr), u16::from_be(addr.sin6_port), addr.sin6_flowinfo, addr.sin6_scope_id, )) } _ => unreachable!(), }; RecvMeta { len, stride, addr, ecn: EcnCodepoint::from_bits(ecn_bits), dst_ip, } } #[cfg(not(apple_slow))] // Chosen somewhat arbitrarily; might benefit from additional tuning. pub(crate) const BATCH_SIZE: usize = 32; #[cfg(apple_slow)] pub(crate) const BATCH_SIZE: usize = 1; #[cfg(any(target_os = "linux", target_os = "android"))] mod gso { use super::*; #[cfg(not(target_os = "android"))] const UDP_SEGMENT: libc::c_int = libc::UDP_SEGMENT; #[cfg(target_os = "android")] // TODO: Add this to libc const UDP_SEGMENT: libc::c_int = 103; /// Checks whether GSO support is available by setting the UDP_SEGMENT /// option on a socket pub(crate) fn max_gso_segments() -> usize { const GSO_SIZE: libc::c_int = 1500; let socket = match std::net::UdpSocket::bind("[::]:0") .or_else(|_| std::net::UdpSocket::bind((Ipv4Addr::LOCALHOST, 0))) { Ok(socket) => socket, Err(_) => return 1, }; // As defined in linux/udp.h // #define UDP_MAX_SEGMENTS (1 << 6UL) match set_socket_option(&socket, libc::SOL_UDP, UDP_SEGMENT, GSO_SIZE) { Ok(()) => 64, Err(_e) => { crate::log::debug!( "failed to set `UDP_SEGMENT` socket option ({_e}); setting `max_gso_segments = 1`" ); 1 } } } pub(crate) fn set_segment_size(encoder: &mut cmsg::Encoder, segment_size: u16) { encoder.push(libc::SOL_UDP, UDP_SEGMENT, segment_size); } } // On Apple platforms using the `sendmsg_x` call, UDP datagram segmentation is not // offloaded to the NIC or even the kernel, but instead done here in user space in // [`send`]) and then passed to the OS as individual `iovec`s (up to `BATCH_SIZE`). #[cfg(not(any(target_os = "linux", target_os = "android")))] mod gso { use super::*; pub(super) fn max_gso_segments() -> usize { #[cfg(apple_fast)] { BATCH_SIZE } #[cfg(not(apple_fast))] { 1 } } pub(super) fn set_segment_size( #[cfg(not(apple_fast))] _encoder: &mut cmsg::Encoder, #[cfg(apple_fast)] _encoder: &mut cmsg::Encoder, _segment_size: u16, ) { } } #[cfg(any(target_os = "linux", target_os = "android"))] mod gro { use super::*; #[cfg(not(target_os = "android"))] pub(crate) const UDP_GRO: libc::c_int = libc::UDP_GRO; #[cfg(target_os = "android")] // TODO: Add this to libc pub(crate) const UDP_GRO: libc::c_int = 104; pub(crate) fn gro_segments() -> usize { let socket = match std::net::UdpSocket::bind("[::]:0") .or_else(|_| std::net::UdpSocket::bind((Ipv4Addr::LOCALHOST, 0))) { Ok(socket) => socket, Err(_) => return 1, }; // As defined in net/ipv4/udp_offload.c // #define UDP_GRO_CNT_MAX 64 // // NOTE: this MUST be set to UDP_GRO_CNT_MAX to ensure that the receive buffer size // (get_max_udp_payload_size() * gro_segments()) is large enough to hold the largest GRO // list the kernel might potentially produce. See // https://github.com/quinn-rs/quinn/pull/1354. match set_socket_option(&socket, libc::SOL_UDP, UDP_GRO, OPTION_ON) { Ok(()) => 64, Err(_) => 1, } } } /// Returns whether the given socket option is supported on the current platform /// /// Yields `Ok(true)` if the option was set successfully, `Ok(false)` if setting /// the option raised an `ENOPROTOOPT` error, and `Err` for any other error. fn set_socket_option_supported( socket: &impl AsRawFd, level: libc::c_int, name: libc::c_int, value: libc::c_int, ) -> io::Result { match set_socket_option(socket, level, name, value) { Ok(()) => Ok(true), Err(err) if err.raw_os_error() == Some(libc::ENOPROTOOPT) => Ok(false), Err(err) => Err(err), } } fn set_socket_option( socket: &impl AsRawFd, level: libc::c_int, name: libc::c_int, value: libc::c_int, ) -> io::Result<()> { let rc = unsafe { libc::setsockopt( socket.as_raw_fd(), level, name, &value as *const _ as _, mem::size_of_val(&value) as _, ) }; match rc == 0 { true => Ok(()), false => Err(io::Error::last_os_error()), } } const OPTION_ON: libc::c_int = 1; #[cfg(not(any(target_os = "linux", target_os = "android")))] mod gro { pub(super) fn gro_segments() -> usize { 1 } } quinn-udp-0.5.9/src/windows.rs000064400000000000000000000401741046102023000144140ustar 00000000000000use std::{ io::{self, IoSliceMut}, mem, net::{IpAddr, Ipv4Addr}, os::windows::io::AsRawSocket, ptr, sync::Mutex, time::Instant, }; use libc::{c_int, c_uint}; use once_cell::sync::Lazy; use windows_sys::Win32::Networking::WinSock; use crate::{ cmsg::{self, CMsgHdr}, log::debug, log_sendmsg_error, EcnCodepoint, RecvMeta, Transmit, UdpSockRef, IO_ERROR_LOG_INTERVAL, }; /// QUIC-friendly UDP socket for Windows /// /// Unlike a standard Windows UDP socket, this allows ECN bits to be read and written. #[derive(Debug)] pub struct UdpSocketState { last_send_error: Mutex, } impl UdpSocketState { pub fn new(socket: UdpSockRef<'_>) -> io::Result { assert!( CMSG_LEN >= WinSock::CMSGHDR::cmsg_space(mem::size_of::()) + WinSock::CMSGHDR::cmsg_space(mem::size_of::()) + WinSock::CMSGHDR::cmsg_space(mem::size_of::()) ); assert!( mem::align_of::() <= mem::align_of::>(), "control message buffers will be misaligned" ); socket.0.set_nonblocking(true)?; let addr = socket.0.local_addr()?; let is_ipv6 = addr.as_socket_ipv6().is_some(); let v6only = unsafe { let mut result: u32 = 0; let mut len = mem::size_of_val(&result) as i32; let rc = WinSock::getsockopt( socket.0.as_raw_socket() as _, WinSock::IPPROTO_IPV6, WinSock::IPV6_V6ONLY as _, &mut result as *mut _ as _, &mut len, ); if rc == -1 { return Err(io::Error::last_os_error()); } result != 0 }; let is_ipv4 = addr.as_socket_ipv4().is_some() || !v6only; // We don't support old versions of Windows that do not enable access to `WSARecvMsg()` if WSARECVMSG_PTR.is_none() { return Err(io::Error::new( io::ErrorKind::Unsupported, "network stack does not support WSARecvMsg function", )); } if is_ipv4 { set_socket_option( &*socket.0, WinSock::IPPROTO_IP, WinSock::IP_DONTFRAGMENT, OPTION_ON, )?; set_socket_option( &*socket.0, WinSock::IPPROTO_IP, WinSock::IP_PKTINFO, OPTION_ON, )?; set_socket_option(&*socket.0, WinSock::IPPROTO_IP, WinSock::IP_ECN, OPTION_ON)?; } if is_ipv6 { set_socket_option( &*socket.0, WinSock::IPPROTO_IPV6, WinSock::IPV6_DONTFRAG, OPTION_ON, )?; set_socket_option( &*socket.0, WinSock::IPPROTO_IPV6, WinSock::IPV6_PKTINFO, OPTION_ON, )?; set_socket_option( &*socket.0, WinSock::IPPROTO_IPV6, WinSock::IPV6_ECN, OPTION_ON, )?; } let now = Instant::now(); Ok(Self { last_send_error: Mutex::new(now.checked_sub(2 * IO_ERROR_LOG_INTERVAL).unwrap_or(now)), }) } /// Enable or disable receive offloading. /// /// Also referred to as UDP Receive Segment Coalescing Offload (URO) on Windows. /// /// /// /// Disabled by default on Windows due to . pub fn set_gro(&self, socket: UdpSockRef<'_>, enable: bool) -> io::Result<()> { set_socket_option( &*socket.0, WinSock::IPPROTO_UDP, WinSock::UDP_RECV_MAX_COALESCED_SIZE, match enable { // u32 per // https://learn.microsoft.com/en-us/windows/win32/winsock/ipproto-udp-socket-options. // Choice of 2^16 - 1 inspired by msquic. true => u16::MAX as u32, false => 0, }, ) } /// Sends a [`Transmit`] on the given socket. /// /// This function will only ever return errors of kind [`io::ErrorKind::WouldBlock`]. /// All other errors will be logged and converted to `Ok`. /// /// UDP transmission errors are considered non-fatal because higher-level protocols must /// employ retransmits and timeouts anyway in order to deal with UDP's unreliable nature. /// Thus, logging is most likely the only thing you can do with these errors. /// /// If you would like to handle these errors yourself, use [`UdpSocketState::try_send`] /// instead. pub fn send(&self, socket: UdpSockRef<'_>, transmit: &Transmit<'_>) -> io::Result<()> { match send(socket, transmit) { Ok(()) => Ok(()), Err(e) if e.kind() == io::ErrorKind::WouldBlock => Err(e), Err(e) => { log_sendmsg_error(&self.last_send_error, e, transmit); Ok(()) } } } /// Sends a [`Transmit`] on the given socket without any additional error handling. pub fn try_send(&self, socket: UdpSockRef<'_>, transmit: &Transmit<'_>) -> io::Result<()> { send(socket, transmit) } pub fn recv( &self, socket: UdpSockRef<'_>, bufs: &mut [IoSliceMut<'_>], meta: &mut [RecvMeta], ) -> io::Result { let wsa_recvmsg_ptr = WSARECVMSG_PTR.expect("valid function pointer for WSARecvMsg"); // we cannot use [`socket2::MsgHdrMut`] as we do not have access to inner field which holds the WSAMSG let mut ctrl_buf = cmsg::Aligned([0; CMSG_LEN]); let mut source: WinSock::SOCKADDR_INET = unsafe { mem::zeroed() }; let mut data = WinSock::WSABUF { buf: bufs[0].as_mut_ptr(), len: bufs[0].len() as _, }; let ctrl = WinSock::WSABUF { buf: ctrl_buf.0.as_mut_ptr(), len: ctrl_buf.0.len() as _, }; let mut wsa_msg = WinSock::WSAMSG { name: &mut source as *mut _ as *mut _, namelen: mem::size_of_val(&source) as _, lpBuffers: &mut data, Control: ctrl, dwBufferCount: 1, dwFlags: 0, }; let mut len = 0; unsafe { let rc = (wsa_recvmsg_ptr)( socket.0.as_raw_socket() as usize, &mut wsa_msg, &mut len, ptr::null_mut(), None, ); if rc == -1 { return Err(io::Error::last_os_error()); } } let addr = unsafe { let (_, addr) = socket2::SockAddr::try_init(|addr_storage, len| { *len = mem::size_of_val(&source) as _; ptr::copy_nonoverlapping(&source, addr_storage as _, 1); Ok(()) })?; addr.as_socket() }; // Decode control messages (PKTINFO and ECN) let mut ecn_bits = 0; let mut dst_ip = None; let mut stride = len; let cmsg_iter = unsafe { cmsg::Iter::new(&wsa_msg) }; for cmsg in cmsg_iter { const UDP_COALESCED_INFO: i32 = WinSock::UDP_COALESCED_INFO as i32; // [header (len)][data][padding(len + sizeof(data))] -> [header][data][padding] match (cmsg.cmsg_level, cmsg.cmsg_type) { (WinSock::IPPROTO_IP, WinSock::IP_PKTINFO) => { let pktinfo = unsafe { cmsg::decode::(cmsg) }; // Addr is stored in big endian format let ip4 = Ipv4Addr::from(u32::from_be(unsafe { pktinfo.ipi_addr.S_un.S_addr })); dst_ip = Some(ip4.into()); } (WinSock::IPPROTO_IPV6, WinSock::IPV6_PKTINFO) => { let pktinfo = unsafe { cmsg::decode::(cmsg) }; // Addr is stored in big endian format dst_ip = Some(IpAddr::from(unsafe { pktinfo.ipi6_addr.u.Byte })); } (WinSock::IPPROTO_IP, WinSock::IP_ECN) => { // ECN is a C integer https://learn.microsoft.com/en-us/windows/win32/winsock/winsock-ecn ecn_bits = unsafe { cmsg::decode::(cmsg) }; } (WinSock::IPPROTO_IPV6, WinSock::IPV6_ECN) => { // ECN is a C integer https://learn.microsoft.com/en-us/windows/win32/winsock/winsock-ecn ecn_bits = unsafe { cmsg::decode::(cmsg) }; } (WinSock::IPPROTO_UDP, UDP_COALESCED_INFO) => { // Has type u32 (aka DWORD) per // https://learn.microsoft.com/en-us/windows/win32/winsock/ipproto-udp-socket-options stride = unsafe { cmsg::decode::(cmsg) }; } _ => {} } } meta[0] = RecvMeta { len: len as usize, stride: stride as usize, addr: addr.unwrap(), ecn: EcnCodepoint::from_bits(ecn_bits as u8), dst_ip, }; Ok(1) } /// The maximum amount of segments which can be transmitted if a platform /// supports Generic Send Offload (GSO). /// /// This is 1 if the platform doesn't support GSO. Subject to change if errors are detected /// while using GSO. #[inline] pub fn max_gso_segments(&self) -> usize { *MAX_GSO_SEGMENTS } /// The number of segments to read when GRO is enabled. Used as a factor to /// compute the receive buffer size. /// /// Returns 1 if the platform doesn't support GRO. #[inline] pub fn gro_segments(&self) -> usize { // Arbitrary reasonable value inspired by Linux and msquic 64 } #[inline] pub fn may_fragment(&self) -> bool { false } } fn send(socket: UdpSockRef<'_>, transmit: &Transmit<'_>) -> io::Result<()> { // we cannot use [`socket2::sendmsg()`] and [`socket2::MsgHdr`] as we do not have access // to the inner field which holds the WSAMSG let mut ctrl_buf = cmsg::Aligned([0; CMSG_LEN]); let daddr = socket2::SockAddr::from(transmit.destination); let mut data = WinSock::WSABUF { buf: transmit.contents.as_ptr() as *mut _, len: transmit.contents.len() as _, }; let ctrl = WinSock::WSABUF { buf: ctrl_buf.0.as_mut_ptr(), len: ctrl_buf.0.len() as _, }; let mut wsa_msg = WinSock::WSAMSG { name: daddr.as_ptr() as *mut _, namelen: daddr.len(), lpBuffers: &mut data, Control: ctrl, dwBufferCount: 1, dwFlags: 0, }; // Add control messages (ECN and PKTINFO) let mut encoder = unsafe { cmsg::Encoder::new(&mut wsa_msg) }; if let Some(ip) = transmit.src_ip { let ip = std::net::SocketAddr::new(ip, 0); let ip = socket2::SockAddr::from(ip); match ip.family() { WinSock::AF_INET => { let src_ip = unsafe { ptr::read(ip.as_ptr() as *const WinSock::SOCKADDR_IN) }; let pktinfo = WinSock::IN_PKTINFO { ipi_addr: src_ip.sin_addr, ipi_ifindex: 0, }; encoder.push(WinSock::IPPROTO_IP, WinSock::IP_PKTINFO, pktinfo); } WinSock::AF_INET6 => { let src_ip = unsafe { ptr::read(ip.as_ptr() as *const WinSock::SOCKADDR_IN6) }; let pktinfo = WinSock::IN6_PKTINFO { ipi6_addr: src_ip.sin6_addr, ipi6_ifindex: unsafe { src_ip.Anonymous.sin6_scope_id }, }; encoder.push(WinSock::IPPROTO_IPV6, WinSock::IPV6_PKTINFO, pktinfo); } _ => { return Err(io::Error::from(io::ErrorKind::InvalidInput)); } } } // ECN is a C integer https://learn.microsoft.com/en-us/windows/win32/winsock/winsock-ecn let ecn = transmit.ecn.map_or(0, |x| x as c_int); // True for IPv4 or IPv4-Mapped IPv6 let is_ipv4 = transmit.destination.is_ipv4() || matches!(transmit.destination.ip(), IpAddr::V6(addr) if addr.to_ipv4_mapped().is_some()); if is_ipv4 { encoder.push(WinSock::IPPROTO_IP, WinSock::IP_ECN, ecn); } else { encoder.push(WinSock::IPPROTO_IPV6, WinSock::IPV6_ECN, ecn); } // Segment size is a u32 https://learn.microsoft.com/en-us/windows/win32/api/ws2tcpip/nf-ws2tcpip-wsasetudpsendmessagesize if let Some(segment_size) = transmit.segment_size { encoder.push( WinSock::IPPROTO_UDP, WinSock::UDP_SEND_MSG_SIZE, segment_size as u32, ); } encoder.finish(); let mut len = 0; let rc = unsafe { WinSock::WSASendMsg( socket.0.as_raw_socket() as usize, &wsa_msg, 0, &mut len, ptr::null_mut(), None, ) }; match rc { 0 => Ok(()), _ => Err(io::Error::last_os_error()), } } fn set_socket_option( socket: &impl AsRawSocket, level: i32, name: i32, value: u32, ) -> io::Result<()> { let rc = unsafe { WinSock::setsockopt( socket.as_raw_socket() as usize, level, name, &value as *const _ as _, mem::size_of_val(&value) as _, ) }; match rc == 0 { true => Ok(()), false => Err(io::Error::last_os_error()), } } pub(crate) const BATCH_SIZE: usize = 1; // Enough to store max(IP_PKTINFO + IP_ECN, IPV6_PKTINFO + IPV6_ECN) + max(UDP_SEND_MSG_SIZE, UDP_COALESCED_INFO) bytes (header + data) and some extra margin const CMSG_LEN: usize = 128; const OPTION_ON: u32 = 1; // FIXME this could use [`std::sync::OnceLock`] once the MSRV is bumped to 1.70 and upper static WSARECVMSG_PTR: Lazy = Lazy::new(|| { let s = unsafe { WinSock::socket(WinSock::AF_INET as _, WinSock::SOCK_DGRAM as _, 0) }; if s == WinSock::INVALID_SOCKET { debug!( "ignoring WSARecvMsg function pointer due to socket creation error: {}", io::Error::last_os_error() ); return None; } // Detect if OS expose WSARecvMsg API based on // https://github.com/Azure/mio-uds-windows/blob/a3c97df82018086add96d8821edb4aa85ec1b42b/src/stdnet/ext.rs#L601 let guid = WinSock::WSAID_WSARECVMSG; let mut wsa_recvmsg_ptr = None; let mut len = 0; // Safety: Option handles the NULL pointer with a None value let rc = unsafe { WinSock::WSAIoctl( s as _, WinSock::SIO_GET_EXTENSION_FUNCTION_POINTER, &guid as *const _ as *const _, mem::size_of_val(&guid) as u32, &mut wsa_recvmsg_ptr as *mut _ as *mut _, mem::size_of_val(&wsa_recvmsg_ptr) as u32, &mut len, ptr::null_mut(), None, ) }; if rc == -1 { debug!( "ignoring WSARecvMsg function pointer due to ioctl error: {}", io::Error::last_os_error() ); } else if len as usize != mem::size_of::() { debug!("ignoring WSARecvMsg function pointer due to pointer size mismatch"); wsa_recvmsg_ptr = None; } unsafe { WinSock::closesocket(s); } wsa_recvmsg_ptr }); static MAX_GSO_SEGMENTS: Lazy = Lazy::new(|| { let socket = match std::net::UdpSocket::bind("[::]:0") .or_else(|_| std::net::UdpSocket::bind((Ipv4Addr::LOCALHOST, 0))) { Ok(socket) => socket, Err(_) => return 1, }; const GSO_SIZE: c_uint = 1500; match set_socket_option( &socket, WinSock::IPPROTO_UDP, WinSock::UDP_SEND_MSG_SIZE, GSO_SIZE, ) { // Empirically found on Windows 11 x64 Ok(()) => 512, Err(_) => 1, } }); quinn-udp-0.5.9/tests/tests.rs000064400000000000000000000214311046102023000144320ustar 00000000000000#[cfg(not(any(target_os = "openbsd", target_os = "netbsd", solarish)))] use std::net::{SocketAddr, SocketAddrV4, SocketAddrV6}; use std::{ io::IoSliceMut, net::{IpAddr, Ipv4Addr, Ipv6Addr, UdpSocket}, slice, }; use quinn_udp::{EcnCodepoint, RecvMeta, Transmit, UdpSocketState}; use socket2::Socket; #[test] fn basic() { let send = UdpSocket::bind((Ipv6Addr::LOCALHOST, 0)) .or_else(|_| UdpSocket::bind((Ipv4Addr::LOCALHOST, 0))) .unwrap(); let recv = UdpSocket::bind((Ipv6Addr::LOCALHOST, 0)) .or_else(|_| UdpSocket::bind((Ipv4Addr::LOCALHOST, 0))) .unwrap(); let dst_addr = recv.local_addr().unwrap(); test_send_recv( &send.into(), &recv.into(), Transmit { destination: dst_addr, ecn: None, contents: b"hello", segment_size: None, src_ip: None, }, ); } #[test] fn ecn_v6() { let send = Socket::from(UdpSocket::bind((Ipv6Addr::LOCALHOST, 0)).unwrap()); let recv = Socket::from(UdpSocket::bind((Ipv6Addr::LOCALHOST, 0)).unwrap()); for codepoint in [EcnCodepoint::Ect0, EcnCodepoint::Ect1] { test_send_recv( &send, &recv, Transmit { destination: recv.local_addr().unwrap().as_socket().unwrap(), ecn: Some(codepoint), contents: b"hello", segment_size: None, src_ip: None, }, ); } } #[test] #[cfg(not(any(target_os = "openbsd", target_os = "netbsd", solarish)))] fn ecn_v4() { let send = Socket::from(UdpSocket::bind((Ipv4Addr::LOCALHOST, 0)).unwrap()); let recv = Socket::from(UdpSocket::bind((Ipv4Addr::LOCALHOST, 0)).unwrap()); for codepoint in [EcnCodepoint::Ect0, EcnCodepoint::Ect1] { test_send_recv( &send, &recv, Transmit { destination: recv.local_addr().unwrap().as_socket().unwrap(), ecn: Some(codepoint), contents: b"hello", segment_size: None, src_ip: None, }, ); } } #[test] #[cfg(not(any(target_os = "openbsd", target_os = "netbsd", solarish)))] fn ecn_v6_dualstack() { let recv = socket2::Socket::new( socket2::Domain::IPV6, socket2::Type::DGRAM, Some(socket2::Protocol::UDP), ) .unwrap(); recv.set_only_v6(false).unwrap(); // We must use the unspecified address here, rather than a local address, to support dual-stack // mode recv.bind(&socket2::SockAddr::from( "[::]:0".parse::().unwrap(), )) .unwrap(); let recv_v6 = SocketAddr::V6(SocketAddrV6::new( Ipv6Addr::LOCALHOST, recv.local_addr().unwrap().as_socket().unwrap().port(), 0, 0, )); let recv_v4 = SocketAddr::V4(SocketAddrV4::new(Ipv4Addr::LOCALHOST, recv_v6.port())); for (src, dst) in [ (SocketAddr::new(IpAddr::V6(Ipv6Addr::LOCALHOST), 0), recv_v6), (SocketAddr::new(IpAddr::V4(Ipv4Addr::LOCALHOST), 0), recv_v4), ] { dbg!(src, dst); let send = UdpSocket::bind(src).unwrap(); let send = Socket::from(send); for codepoint in [EcnCodepoint::Ect0, EcnCodepoint::Ect1] { test_send_recv( &send, &recv, Transmit { destination: dst, ecn: Some(codepoint), contents: b"hello", segment_size: None, src_ip: None, }, ); } } } #[test] #[cfg(not(any(target_os = "openbsd", target_os = "netbsd", solarish)))] fn ecn_v4_mapped_v6() { let send = socket2::Socket::new( socket2::Domain::IPV6, socket2::Type::DGRAM, Some(socket2::Protocol::UDP), ) .unwrap(); send.set_only_v6(false).unwrap(); send.bind(&socket2::SockAddr::from( "[::]:0".parse::().unwrap(), )) .unwrap(); let recv = UdpSocket::bind((Ipv4Addr::LOCALHOST, 0)).unwrap(); let recv = Socket::from(recv); let recv_v4_mapped_v6 = SocketAddr::V6(SocketAddrV6::new( Ipv4Addr::LOCALHOST.to_ipv6_mapped(), recv.local_addr().unwrap().as_socket().unwrap().port(), 0, 0, )); for codepoint in [EcnCodepoint::Ect0, EcnCodepoint::Ect1] { test_send_recv( &send, &recv, Transmit { destination: recv_v4_mapped_v6, ecn: Some(codepoint), contents: b"hello", segment_size: None, src_ip: None, }, ); } } #[test] #[cfg_attr( not(any(target_os = "linux", target_os = "windows", target_os = "android")), ignore )] fn gso() { let send = UdpSocket::bind((Ipv6Addr::LOCALHOST, 0)) .or_else(|_| UdpSocket::bind((Ipv4Addr::LOCALHOST, 0))) .unwrap(); let recv = UdpSocket::bind((Ipv6Addr::LOCALHOST, 0)) .or_else(|_| UdpSocket::bind((Ipv4Addr::LOCALHOST, 0))) .unwrap(); let max_segments = UdpSocketState::new((&send).into()) .unwrap() .max_gso_segments(); let dst_addr = recv.local_addr().unwrap(); const SEGMENT_SIZE: usize = 128; let msg = vec![0xAB; SEGMENT_SIZE * max_segments]; test_send_recv( &send.into(), &recv.into(), Transmit { destination: dst_addr, ecn: None, contents: &msg, segment_size: Some(SEGMENT_SIZE), src_ip: None, }, ); } fn test_send_recv(send: &Socket, recv: &Socket, transmit: Transmit) { let send_state = UdpSocketState::new(send.into()).unwrap(); let recv_state = UdpSocketState::new(recv.into()).unwrap(); // Reverse non-blocking flag set by `UdpSocketState` to make the test non-racy recv.set_nonblocking(false).unwrap(); send_state.try_send(send.into(), &transmit).unwrap(); let mut buf = [0; u16::MAX as usize]; let mut meta = RecvMeta::default(); let segment_size = transmit.segment_size.unwrap_or(transmit.contents.len()); let expected_datagrams = transmit.contents.len() / segment_size; let mut datagrams = 0; while datagrams < expected_datagrams { let n = recv_state .recv( recv.into(), &mut [IoSliceMut::new(&mut buf)], slice::from_mut(&mut meta), ) .unwrap(); assert_eq!(n, 1); let segments = meta.len / meta.stride; for i in 0..segments { assert_eq!( &buf[(i * meta.stride)..((i + 1) * meta.stride)], &transmit.contents [(datagrams + i) * segment_size..(datagrams + i + 1) * segment_size] ); } datagrams += segments; assert_eq!( meta.addr.port(), send.local_addr().unwrap().as_socket().unwrap().port() ); let send_v6 = send.local_addr().unwrap().as_socket().unwrap().is_ipv6(); let recv_v6 = recv.local_addr().unwrap().as_socket().unwrap().is_ipv6(); let mut addresses = vec![meta.addr.ip()]; // Not populated on every OS. See `RecvMeta::dst_ip` for details. if let Some(addr) = meta.dst_ip { addresses.push(addr); } for addr in addresses { match (send_v6, recv_v6) { (_, false) => assert_eq!(addr, Ipv4Addr::LOCALHOST), // Windows gives us real IPv4 addrs, whereas *nix use IPv6-mapped IPv4 // addrs. Canonicalize to IPv6-mapped for robustness. (false, true) => { assert_eq!(ip_to_v6_mapped(addr), Ipv4Addr::LOCALHOST.to_ipv6_mapped()) } (true, true) => assert!( addr == Ipv6Addr::LOCALHOST || addr == Ipv4Addr::LOCALHOST.to_ipv6_mapped() ), } } let ipv4_or_ipv4_mapped_ipv6 = match transmit.destination.ip() { IpAddr::V4(_) => true, IpAddr::V6(a) => a.to_ipv4_mapped().is_some(), }; // On Android API level <= 25 the IPv4 `IP_TOS` control message is // not supported and thus ECN bits can not be received. if ipv4_or_ipv4_mapped_ipv6 && cfg!(target_os = "android") && std::env::var("API_LEVEL") .ok() .and_then(|v| v.parse::().ok()) .expect("API_LEVEL environment variable to be set on Android") <= 25 { assert_eq!(meta.ecn, None); } else { assert_eq!(meta.ecn, transmit.ecn); } } assert_eq!(datagrams, expected_datagrams); } fn ip_to_v6_mapped(x: IpAddr) -> IpAddr { match x { IpAddr::V4(x) => IpAddr::V6(x.to_ipv6_mapped()), IpAddr::V6(_) => x, } }