pax_global_header00006660000000000000000000000064141777667260014541gustar00rootroot0000000000000052 comment=67a5bf29c76df6ec4f7a641ddfe598ca0159dfab rootlesskit-0.14.6/000077500000000000000000000000001417776672600142135ustar00rootroot00000000000000rootlesskit-0.14.6/.dockerignore000066400000000000000000000000431417776672600166640ustar00rootroot00000000000000Dockerfile .github bin/ _artifact/ rootlesskit-0.14.6/.github/000077500000000000000000000000001417776672600155535ustar00rootroot00000000000000rootlesskit-0.14.6/.github/dependabot.yml000066400000000000000000000002341417776672600204020ustar00rootroot00000000000000version: 2 updates: - package-ecosystem: gomod directory: "/" schedule: interval: daily open-pull-requests-limit: 10 reviewers: - AkihiroSuda rootlesskit-0.14.6/.github/workflows/000077500000000000000000000000001417776672600176105ustar00rootroot00000000000000rootlesskit-0.14.6/.github/workflows/main.yaml000066400000000000000000000156171417776672600214320ustar00rootroot00000000000000name: Main on: [push, pull_request] jobs: test-unit: name: "Unit test" runs-on: ubuntu-latest steps: - name: "Check out" uses: actions/checkout@v2 - name: "Build unit test image" run: DOCKER_BUILDKIT=1 docker build -t rootlesskit:test-unit --target test-unit . - name: "Unit test" run: docker run --rm --privileged rootlesskit:test-unit test-integration: name: "Integration test" runs-on: ubuntu-latest steps: - name: "Check out" uses: actions/checkout@v2 - name: "Build integration test image" run: DOCKER_BUILDKIT=1 docker build -t rootlesskit:test-integration --target test-integration . - name: "Integration test: exit-code" run: docker run --rm --privileged rootlesskit:test-integration ./integration-exit-code.sh - name: "Integration test: propagation" run: docker run --rm --privileged rootlesskit:test-integration ./integration-propagation.sh - name: "Integration test: propagation (with `mount --make-rshared /`)" run: docker run --rm --privileged rootlesskit:test-integration sh -exc "sudo mount --make-rshared / && ./integration-propagation.sh" - name: "Integration test: restart" run: docker run --rm --privileged rootlesskit:test-integration ./integration-restart.sh - name: "Integration test: port" # NOTE: "--net=host" is a bad hack to enable IPv6 run: docker run --rm --net=host --privileged rootlesskit:test-integration ./integration-port.sh - name: "Integration test: IPv6 routing" run: docker run --rm --privileged --sysctl net.ipv6.conf.all.disable_ipv6=0 rootlesskit:test-integration ./integration-ipv6.sh # ===== Benchmark: Network (MTU=1500) ===== - name: "Benchmark: Network (MTU=1500, network driver=slirp4netns)" run: | docker run --rm --security-opt seccomp=unconfined --security-opt apparmor=unconfined --device /dev/net/tun \ rootlesskit:test-integration ./benchmark-iperf3-net.sh slirp4netns 1500 - name: "Benchmark: Network (MTU=1500, network driver=slirp4netns with sandbox and seccomp)" run: | docker run --rm --security-opt seccomp=unconfined --security-opt apparmor=unconfined --device /dev/net/tun \ rootlesskit:test-integration ./benchmark-iperf3-net.sh slirp4netns 1500 --slirp4netns-sandbox=auto --slirp4netns-seccomp=auto # NOTE: MTU greater than 16424 is known not to work for VPNKit. # Also, MTU greather than 4K might not be effective for VPNKit: https://twitter.com/mugofsoup/status/1017665057738641408 - name: "Benchmark: Network (MTU=1500, network driver=vpnkit)" run: | docker run --rm --security-opt seccomp=unconfined --security-opt apparmor=unconfined --device /dev/net/tun \ rootlesskit:test-integration ./benchmark-iperf3-net.sh vpnkit 1500 - name: "Benchmark: Network (MTU=1500, network driver=lxc-user-nic)" run: | docker run --rm --privileged \ rootlesskit:test-integration ./benchmark-iperf3-net.sh lxc-user-nic 1500 - name: "Benchmark: Network (MTU=1500, rootful veth for comparison)" run: | docker run --rm --privileged \ rootlesskit:test-integration ./benchmark-iperf3-net.sh rootful_veth 1500 # ===== Benchmark: Network (MTU=65520) ===== - name: "Benchmark: Network (MTU=65520, network driver=slirp4netns)" run: | docker run --rm --security-opt seccomp=unconfined --security-opt apparmor=unconfined --device /dev/net/tun \ rootlesskit:test-integration ./benchmark-iperf3-net.sh slirp4netns 65520 - name: "Benchmark: Network (MTU=65520, network driver=slirp4netns with sandbox and seccomp)" run: | docker run --rm --security-opt seccomp=unconfined --security-opt apparmor=unconfined --device /dev/net/tun \ rootlesskit:test-integration ./benchmark-iperf3-net.sh slirp4netns 65520 --slirp4netns-sandbox=auto --slirp4netns-seccomp=auto - name: "Benchmark: Network (MTU=65520, network driver=lxc-user-nic)" run: | docker run --rm --privileged \ rootlesskit:test-integration ./benchmark-iperf3-net.sh lxc-user-nic 65520 - name: "Benchmark: Network (MTU=65520, rootful veth for comparison)" run: | docker run --rm --privileged \ rootlesskit:test-integration ./benchmark-iperf3-net.sh rootful_veth 65520 # ===== Benchmark: TCP Ports ===== - name: "Benchmark: TCP Ports (port driver=slirp4netns)" run: | docker run --rm --security-opt seccomp=unconfined --security-opt apparmor=unconfined --device /dev/net/tun \ rootlesskit:test-integration ./benchmark-iperf3-port.sh slirp4netns - name: "Benchmark: TCP Ports (port driver=builtin)" run: | docker run --rm --security-opt seccomp=unconfined --security-opt apparmor=unconfined --device /dev/net/tun \ rootlesskit:test-integration ./benchmark-iperf3-port.sh builtin # ===== Benchmark: UDP Ports ===== - name: "Benchmark: UDP Ports (port driver=builtin)" run: | docker run --rm --security-opt seccomp=unconfined --security-opt apparmor=unconfined --device /dev/net/tun \ rootlesskit:test-integration ./benchmark-iperf3-port-udp.sh builtin test-integration-docker: name: "Integration test (Docker)" runs-on: ubuntu-latest steps: - name: "Check out" uses: actions/checkout@v2 - name: "Build integration test image" # Docker 20.10.x builds RootlessKit with Go 1.16 (as of Docker 20.10.10), so we use Go 1.16 as well here. run: DOCKER_BUILDKIT=1 docker build -t rootlesskit:test-integration-docker --target test-integration-docker --build-arg GO_VERSION=1.16 . - name: "Create a custom network to avoid IP confusion" run: docker network create custom - name: "Docker Integration test: net=slirp4netns, port-driver=builtin" run: | docker run -d --name test --network custom --privileged -e DOCKERD_ROOTLESS_ROOTLESSKIT_NET=slirp4netns -e DOCKERD_ROOTLESS_ROOTLESSKIT_PORT_DRIVER=builtin rootlesskit:test-integration-docker sleep 2 docker exec test docker info docker exec test ./integration-docker.sh docker rm -f test - name: "Docker Integration test: net=slirp4netns, port-driver=slirp4netns" run: | docker run -d --name test --network custom --privileged -e DOCKERD_ROOTLESS_ROOTLESSKIT_NET=slirp4netns -e DOCKERD_ROOTLESS_ROOTLESSKIT_PORT_DRIVER=slirp4netns rootlesskit:test-integration-docker sleep 2 docker exec test docker info docker exec test ./integration-docker.sh docker rm -f test - name: "Docker Integration test: net=vpnkit, port-driver=builtin" run: | docker run -d --name test --network custom --privileged -e DOCKERD_ROOTLESS_ROOTLESSKIT_NET=vpnkit -e DOCKERD_ROOTLESS_ROOTLESSKIT_PORT_DRIVER=builtin rootlesskit:test-integration-docker sleep 2 docker exec test docker info docker exec test ./integration-docker.sh docker rm -f test rootlesskit-0.14.6/.github/workflows/release.yaml000066400000000000000000000044231417776672600221170ustar00rootroot00000000000000# Release guide (since v0.9.0): # 1. Bump up the version string to `vX.Y.Z` (or `vX.Y.Z-beta.W`) in `pkg/version/version.go`. # 2. `git commit -a -s -m vX.Y.Z` # 3. Bump up the version string to `vX.Y.Z+dev` (or `vX.Y.Z-beta.W`+dev) in `pkg/version/version.go`. # 4. `git commit -a -s -m vX.Y.Z+dev` # 5. Open a PR and merge it. # 6. Create a tag `v.X.Y.Z` for the `vX.Y.Z` commit, and push the tag to the upstream: `git push upstream vX.Y.Z` # 7. GitHub Actions automatically ships a draft release with a statically compiled binary: https://github.com/rootless-containers/rootlesskit/releases # If it fails, check the GitHub Actions log: https://github.com/rootless-containers/rootlesskit/actions?query=workflow%3ARelease # 8. Add release notes to the draft release and ship the release. name: Release on: push: tags: - 'v*' jobs: release: runs-on: ubuntu-20.04 steps: - uses: actions/checkout@v2 - name: "Build binaries" run: DOCKER_BUILDKIT=1 docker build -o /tmp/artifact --target cross-artifact . - name: "SHA256SUMS" run: (cd /tmp/artifact; sha256sum *) | tee /tmp/SHA256SUMS - name: "The sha256sum of the SHA256SUMS file" run: sha256sum /tmp/SHA256SUMS - name: "Prepare the release note" run: | tag="${GITHUB_REF##*/}" shasha=$(sha256sum /tmp/SHA256SUMS | awk '{print $1}') cat << EOF | tee /tmp/release-note.txt ${tag} #### Changes (To be documented) #### Install \`\`\` mkdir -p ~/bin curl -sSL https://github.com/${{ github.repository }}/releases/download/${tag}/rootlesskit-\$(uname -m).tar.gz | tar Cxzv ~/bin \`\`\` #### About the binaries The binaries were built automatically on GitHub Actions. See the log to verify SHA256SUMS. https://github.com/${{ github.repository }}/actions/runs/${{ github.run_id }} The sha256sum of the SHA256SUMS file itself is ${shasha} . EOF - name: "Create release" env: GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} run: | tag="${GITHUB_REF##*/}" asset_flags=() for f in /tmp/artifact/* /tmp/SHA256SUMS; do asset_flags+=("-a" "$f"); done hub release create "${asset_flags[@]}" -F /tmp/release-note.txt --draft "${tag}" rootlesskit-0.14.6/.gitignore000066400000000000000000000000201417776672600161730ustar00rootroot00000000000000bin/ _artifact/ rootlesskit-0.14.6/Dockerfile000066400000000000000000000100031417776672600161770ustar00rootroot00000000000000ARG GO_VERSION=1.17 ARG UBUNTU_VERSION=20.04 ARG SHADOW_VERSION=4.8.1 ARG SLIRP4NETNS_VERSION=v1.1.12 ARG VPNKIT_VERSION=0.5.0 ARG DOCKER_VERSION=20.10.10 FROM golang:${GO_VERSION}-alpine AS build RUN apk add --no-cache file make ADD . /go/src/github.com/rootless-containers/rootlesskit WORKDIR /go/src/github.com/rootless-containers/rootlesskit FROM build AS rootlesskit RUN CGO_ENABLED=0 make && file /bin/* | grep -v dynamic FROM scratch AS artifact COPY --from=rootlesskit /go/src/github.com/rootless-containers/rootlesskit/bin/* / FROM build AS cross RUN make cross FROM scratch AS cross-artifact COPY --from=cross /go/src/github.com/rootless-containers/rootlesskit/_artifact/* / # `go test -race` requires non-Alpine FROM golang:${GO_VERSION} AS test-unit RUN apt-get update && apt-get install -y iproute2 netcat-openbsd ADD . /go/src/github.com/rootless-containers/rootlesskit WORKDIR /go/src/github.com/rootless-containers/rootlesskit RUN go mod verify && go vet ./... CMD ["go","test","-v","-race","github.com/rootless-containers/rootlesskit/..."] # idmap runnable without --privileged (but still requires seccomp=unconfined apparmor=unconfined) FROM ubuntu:${UBUNTU_VERSION} AS idmap ENV DEBIAN_FRONTEND=noninteractive RUN apt-get update && apt-get install -y automake autopoint bison gettext git gcc libcap-dev libtool make RUN git clone https://github.com/shadow-maint/shadow.git /shadow WORKDIR /shadow ARG SHADOW_VERSION RUN git pull && git checkout $SHADOW_VERSION RUN ./autogen.sh --disable-nls --disable-man --without-audit --without-selinux --without-acl --without-attr --without-tcb --without-nscd && \ make && \ cp src/newuidmap src/newgidmap /usr/bin FROM djs55/vpnkit:${VPNKIT_VERSION} AS vpnkit FROM ubuntu:${UBUNTU_VERSION} AS test-integration # iproute2: for `ip` command that rootlesskit needs to exec # liblxc-common and lxc-utils: for `lxc-user-nic` binary required for --net=lxc-user-nic # iperf3: only for benchmark purpose # busybox: only for debugging purpose # sudo: only for lxc-user-nic benchmark and rootful veth benchmark (for comparison) # libcap2-bin and curl: used by the RUN instructions in this Dockerfile. RUN apt-get update && apt-get install -y iproute2 liblxc-common lxc-utils iperf3 busybox sudo libcap2-bin curl COPY --from=idmap /usr/bin/newuidmap /usr/bin/newuidmap COPY --from=idmap /usr/bin/newgidmap /usr/bin/newgidmap RUN /sbin/setcap cap_setuid+eip /usr/bin/newuidmap && \ /sbin/setcap cap_setgid+eip /usr/bin/newgidmap && \ useradd --create-home --home-dir /home/user --uid 1000 user && \ mkdir -p /run/user/1000 /etc/lxc && \ echo "user veth lxcbr0 32" > /etc/lxc/lxc-usernet && \ echo "user ALL=(ALL) NOPASSWD:ALL" > /etc/sudoers.d/user COPY --from=artifact /rootlesskit /home/user/bin/ COPY --from=artifact /rootlessctl /home/user/bin/ ARG SLIRP4NETNS_VERSION RUN curl -sSL -o /home/user/bin/slirp4netns https://github.com/rootless-containers/slirp4netns/releases/download/${SLIRP4NETNS_VERSION}/slirp4netns-x86_64 && \ chmod +x /home/user/bin/slirp4netns COPY --from=vpnkit /vpnkit /home/user/bin/vpnkit ADD ./hack /home/user/hack RUN chown -R user:user /run/user/1000 /home/user USER user ENV HOME /home/user ENV USER user ENV XDG_RUNTIME_DIR=/run/user/1000 ENV PATH /home/user/bin:/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin ENV LD_LIBRARY_PATH=/home/user/lib WORKDIR /home/user/hack FROM test-integration AS test-integration-docker COPY --from=artifact /rootlesskit-docker-proxy /home/user/bin/ ARG DOCKER_VERSION RUN curl -fsSL https://download.docker.com/linux/static/stable/x86_64/docker-${DOCKER_VERSION}.tgz | tar xz --strip-components=1 -C /home/user/bin/ RUN curl -fsSL -o /home/user/bin/dockerd-rootless.sh https://raw.githubusercontent.com/moby/moby/v${DOCKER_VERSION}/contrib/dockerd-rootless.sh && \ chmod +x /home/user/bin/dockerd-rootless.sh ENV DOCKERD_ROOTLESS_ROOTLESSKIT_NET=slirp4netns ENV DOCKERD_ROOTLESS_ROOTLESSKIT_PORT_DRIVER=builtin ENV DOCKER_HOST=unix:///run/user/1000/docker.sock RUN mkdir -p /home/user/.local VOLUME /home/user/.local CMD ["dockerd-rootless.sh"] rootlesskit-0.14.6/LICENSE000066400000000000000000000261361417776672600152300ustar00rootroot00000000000000 Apache License Version 2.0, January 2004 http://www.apache.org/licenses/ TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION 1. Definitions. "License" shall mean the terms and conditions for use, reproduction, and distribution as defined by Sections 1 through 9 of this document. "Licensor" shall mean the copyright owner or entity authorized by the copyright owner that is granting the License. "Legal Entity" shall mean the union of the acting entity and all other entities that control, are controlled by, or are under common control with that entity. For the purposes of this definition, "control" means (i) the power, direct or indirect, to cause the direction or management of such entity, whether by contract or otherwise, or (ii) ownership of fifty percent (50%) or more of the outstanding shares, or (iii) beneficial ownership of such entity. "You" (or "Your") shall mean an individual or Legal Entity exercising permissions granted by this License. "Source" form shall mean the preferred form for making modifications, including but not limited to software source code, documentation source, and configuration files. "Object" form shall mean any form resulting from mechanical transformation or translation of a Source form, including but not limited to compiled object code, generated documentation, and conversions to other media types. "Work" shall mean the work of authorship, whether in Source or Object form, made available under the License, as indicated by a copyright notice that is included in or attached to the work (an example is provided in the Appendix below). "Derivative Works" shall mean any work, whether in Source or Object form, that is based on (or derived from) the Work and for which the editorial revisions, annotations, elaborations, or other modifications represent, as a whole, an original work of authorship. For the purposes of this License, Derivative Works shall not include works that remain separable from, or merely link (or bind by name) to the interfaces of, the Work and Derivative Works thereof. "Contribution" shall mean any work of authorship, including the original version of the Work and any modifications or additions to that Work or Derivative Works thereof, that is intentionally submitted to Licensor for inclusion in the Work by the copyright owner or by an individual or Legal Entity authorized to submit on behalf of the copyright owner. For the purposes of this definition, "submitted" means any form of electronic, verbal, or written communication sent to the Licensor or its representatives, including but not limited to communication on electronic mailing lists, source code control systems, and issue tracking systems that are managed by, or on behalf of, the Licensor for the purpose of discussing and improving the Work, but excluding communication that is conspicuously marked or otherwise designated in writing by the copyright owner as "Not a Contribution." "Contributor" shall mean Licensor and any individual or Legal Entity on behalf of whom a Contribution has been received by Licensor and subsequently incorporated within the Work. 2. Grant of Copyright License. Subject to the terms and conditions of this License, each Contributor hereby grants to You a perpetual, worldwide, non-exclusive, no-charge, royalty-free, irrevocable copyright license to reproduce, prepare Derivative Works of, publicly display, publicly perform, sublicense, and distribute the Work and such Derivative Works in Source or Object form. 3. Grant of Patent License. Subject to the terms and conditions of this License, each Contributor hereby grants to You a perpetual, worldwide, non-exclusive, no-charge, royalty-free, irrevocable (except as stated in this section) patent license to make, have made, use, offer to sell, sell, import, and otherwise transfer the Work, where such license applies only to those patent claims licensable by such Contributor that are necessarily infringed by their Contribution(s) alone or by combination of their Contribution(s) with the Work to which such Contribution(s) was submitted. If You institute patent litigation against any entity (including a cross-claim or counterclaim in a lawsuit) alleging that the Work or a Contribution incorporated within the Work constitutes direct or contributory patent infringement, then any patent licenses granted to You under this License for that Work shall terminate as of the date such litigation is filed. 4. Redistribution. You may reproduce and distribute copies of the Work or Derivative Works thereof in any medium, with or without modifications, and in Source or Object form, provided that You meet the following conditions: (a) You must give any other recipients of the Work or Derivative Works a copy of this License; and (b) You must cause any modified files to carry prominent notices stating that You changed the files; and (c) You must retain, in the Source form of any Derivative Works that You distribute, all copyright, patent, trademark, and attribution notices from the Source form of the Work, excluding those notices that do not pertain to any part of the Derivative Works; and (d) If the Work includes a "NOTICE" text file as part of its distribution, then any Derivative Works that You distribute must include a readable copy of the attribution notices contained within such NOTICE file, excluding those notices that do not pertain to any part of the Derivative Works, in at least one of the following places: within a NOTICE text file distributed as part of the Derivative Works; within the Source form or documentation, if provided along with the Derivative Works; or, within a display generated by the Derivative Works, if and wherever such third-party notices normally appear. The contents of the NOTICE file are for informational purposes only and do not modify the License. You may add Your own attribution notices within Derivative Works that You distribute, alongside or as an addendum to the NOTICE text from the Work, provided that such additional attribution notices cannot be construed as modifying the License. You may add Your own copyright statement to Your modifications and may provide additional or different license terms and conditions for use, reproduction, or distribution of Your modifications, or for any such Derivative Works as a whole, provided Your use, reproduction, and distribution of the Work otherwise complies with the conditions stated in this License. 5. Submission of Contributions. Unless You explicitly state otherwise, any Contribution intentionally submitted for inclusion in the Work by You to the Licensor shall be under the terms and conditions of this License, without any additional terms or conditions. Notwithstanding the above, nothing herein shall supersede or modify the terms of any separate license agreement you may have executed with Licensor regarding such Contributions. 6. Trademarks. This License does not grant permission to use the trade names, trademarks, service marks, or product names of the Licensor, except as required for reasonable and customary use in describing the origin of the Work and reproducing the content of the NOTICE file. 7. Disclaimer of Warranty. Unless required by applicable law or agreed to in writing, Licensor provides the Work (and each Contributor provides its Contributions) on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied, including, without limitation, any warranties or conditions of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A PARTICULAR PURPOSE. You are solely responsible for determining the appropriateness of using or redistributing the Work and assume any risks associated with Your exercise of permissions under this License. 8. Limitation of Liability. In no event and under no legal theory, whether in tort (including negligence), contract, or otherwise, unless required by applicable law (such as deliberate and grossly negligent acts) or agreed to in writing, shall any Contributor be liable to You for damages, including any direct, indirect, special, incidental, or consequential damages of any character arising as a result of this License or out of the use or inability to use the Work (including but not limited to damages for loss of goodwill, work stoppage, computer failure or malfunction, or any and all other commercial damages or losses), even if such Contributor has been advised of the possibility of such damages. 9. Accepting Warranty or Additional Liability. While redistributing the Work or Derivative Works thereof, You may choose to offer, and charge a fee for, acceptance of support, warranty, indemnity, or other liability obligations and/or rights consistent with this License. However, in accepting such obligations, You may act only on Your own behalf and on Your sole responsibility, not on behalf of any other Contributor, and only if You agree to indemnify, defend, and hold each Contributor harmless for any liability incurred by, or claims asserted against, such Contributor by reason of your accepting any such warranty or additional liability. END OF TERMS AND CONDITIONS APPENDIX: How to apply the Apache License to your work. To apply the Apache License to your work, attach the following boilerplate notice, with the fields enclosed by brackets "[]" replaced with your own identifying information. (Don't include the brackets!) The text should be enclosed in the appropriate comment syntax for the file format. We also recommend that a file or class name and description of purpose be included on the same "printed page" as the copyright notice for easier identification within third-party archives. Copyright [yyyy] [name of copyright owner] Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. You may obtain a copy of the License at http://www.apache.org/licenses/LICENSE-2.0 Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License. rootlesskit-0.14.6/Makefile000066400000000000000000000016141417776672600156550ustar00rootroot00000000000000GO=go GO_FILES=$(shell find . -name *.go) BINARIES=rootlesskit rootlessctl rootlesskit-docker-proxy .PHONY: all all: $(addprefix bin/, $(BINARIES)) .PHONY: clean clean: $(RM) -r bin/ _artifact/ bin/rootlesskit: $(GO_FILES) $(GO) build -o $@ -v github.com/rootless-containers/rootlesskit/cmd/rootlesskit bin/rootlessctl: $(GO_FILES) $(GO) build -o $@ -v github.com/rootless-containers/rootlesskit/cmd/rootlessctl bin/rootlesskit-docker-proxy: $(GO_FILES) $(GO) build -o $@ -v github.com/rootless-containers/rootlesskit/cmd/rootlesskit-docker-proxy .PHONY: cross cross: ./hack/make-cross.sh BINDIR ?= /usr/local/bin .PHONY: install install: install -D -m 755 $(CURDIR)/bin/rootlesskit $(DESTDIR)$(BINDIR)/rootlesskit install -D -m 755 $(CURDIR)/bin/rootlessctl $(DESTDIR)$(BINDIR)/rootlessctl install -D -m 755 $(CURDIR)/bin/rootlesskit-docker-proxy $(DESTDIR)$(BINDIR)/rootlesskit-docker-proxy rootlesskit-0.14.6/README.md000066400000000000000000000254231417776672600155000ustar00rootroot00000000000000# RootlessKit: Linux-native fakeroot using user namespaces RootlessKit is a Linux-native implementation of "fake root" using [`user_namespaces(7)`](http://man7.org/linux/man-pages/man7/user_namespaces.7.html). The purpose of RootlessKit is to run [Docker and Kubernetes as an unprivileged user (known as "Rootless mode")](https://github.com/rootless-containers/usernetes), so as to protect the real root on the host from potential container-breakout attacks. - [What RootlessKit actually does](#what-rootlesskit-actually-does) - [Similar projects](#similar-projects) - [Projects using RootlessKit](#projects-using-rootlesskit) - [Setup](#setup) - [Requirements](#requirements) - [subuid](#subuid) - [sysctl](#sysctl) - [Usage](#usage) - [Full CLI options](#full-cli-options) - [State directory](#state-directory) - [Environment variables](#environment-variables) - [Additional documents](#additional-documents) ## What RootlessKit actually does RootlessKit creates [`user_namespaces(7)`](http://man7.org/linux/man-pages/man7/user_namespaces.7.html) and [`mount_namespaces(7)`](http://man7.org/linux/man-pages/man7/mount_namespaces.7.html), and executes [`newuidmap(1)`](http://man7.org/linux/man-pages/man1/newuidmap.1.html)/[`newgidmap(1)`](http://man7.org/linux/man-pages/man1/newgidmap.1.html) along with [`subuid(5)`](http://man7.org/linux/man-pages/man5/subuid.5.html) and [`subgid(5)`](http://man7.org/linux/man-pages/man5/subgid.5.html). RootlessKit also supports isolating [`network_namespaces(7)`](http://man7.org/linux/man-pages/man7/network_namespaces.7.html) with userspace NAT using ["slirp"](./docs/network.md). Kernel-mode NAT using SUID-enabled [`lxc-user-nic(1)`](https://linuxcontainers.org/lxc/manpages/man1/lxc-user-nic.1.html) is also experimentally supported. ## Similar projects Tools based on `LD_PRELOAD` (not enough to run rootless containers and yet lacks support for static binaries): * [`fakeroot`](https://wiki.debian.org/FakeRoot) Tools based on `ptrace(2)` (not enough to run rootless containers and yet slow): * [`fakeroot-ng`](https://fakeroot-ng.lingnu.com/) * [`proot`](https://proot-me.github.io/) Tools based on `user_namespaces(7)` (as in RootlessKit, but without support for `--copy-up`, `--net`, ...): * [`unshare -r`](http://man7.org/linux/man-pages/man1/unshare.1.html) * [`podman unshare`](https://github.com/containers/libpod/blob/master/docs/source/markdown/podman-unshare.1.md) * [`become-root`](https://github.com/giuseppe/become-root) ## Projects using RootlessKit Container engines: * [Docker/Moby](https://get.docker.com/rootless) * [Podman](https://podman.io/) (since Podman v1.8.0) * [nerdctl](https://github.com/containerd/nerdctl): Docker-compatible CLI for containerd Container image builders: * [BuildKit](https://github.com/moby/buildkit): Next-generation `docker build` backend Kubernetes distributions: * [Usernetes](https://github.com/rootless-containers/usernetes): Docker & Kubernetes, installable under a non-root user's `$HOME`. * [k3s](https://k3s.io/): Lightweight Kubernetes ## Setup ```console $ go get github.com/rootless-containers/rootlesskit/cmd/rootlesskit $ go get github.com/rootless-containers/rootlesskit/cmd/rootlessctl ``` or just run `make` to make binaries under `./bin` directory. ### Requirements ### subuid * `newuidmap` and `newgidmap` need to be installed on the host. These commands are provided by the `uidmap` package on most distributions. * `/etc/subuid` and `/etc/subgid` should contain more than 65536 sub-IDs. e.g. `penguin:231072:65536`. These files are automatically configured on most distributions. ```console $ id -u 1001 $ whoami penguin $ grep "^$(whoami):" /etc/subuid penguin:231072:65536 $ grep "^$(whoami):" /etc/subgid penguin:231072:65536 ``` See also https://rootlesscontaine.rs/getting-started/common/subuid/ ### sysctl Some distros require setting up sysctl: - Debian (excluding Ubuntu) and Arch: `sudo sh -c "echo 1 > /proc/sys/kernel/unprivileged_userns_clone"` - RHEL/CentOS 7 (excluding RHEL/CentOS 8): `sudo sh -c "echo 28633 > /proc/sys/user/max_user_namespaces"` To persist sysctl configurations, edit `/etc/sysctl.conf` or add a file under `/etc/sysctl.d`. See also https://rootlesscontaine.rs/getting-started/common/sysctl/ ## Usage Inside `rootlesskit bash`, your UID is mapped to 0 but it is not the real root: ```console (host)$ rootlesskit bash (rootlesskit)# id uid=0(root) gid=0(root) groups=0(root),65534(nogroup) (rootlesskit)# ls -l /etc/shadow -rw-r----- 1 nobody nogroup 1050 Aug 21 19:02 /etc/shadow (rootlesskit)# cat /etc/shadow cat: /etc/shadow: Permission denied ``` Environment variables are kept untouched: ```console (host)$ rootlesskit bash (rootlesskit)# echo $USER penguin (rootlesskit)# echo $HOME /home/penguin (rootlesskit)# echo $XDG_RUNTIME_DIR /run/user/1001 ``` Filesystems can be isolated from the host with `--copy-up`: ```console (host)$ rootlesskit --copy-up=/etc bash (rootlesskit)# rm /etc/resolv.conf (rootlesskit)# vi /etc/resolv.conf ``` You can even create network namespaces with [Slirp](./docs/network.md): ```console (host)$ rootlesskit --copy-up=/etc --copy-up=/run --net=slirp4netns --disable-host-loopback bash (rootleesskit)# ip netns add foo ... ``` ## Full CLI options ```console $ rootlesskit --help NAME: rootlesskit - Linux-native fakeroot using user namespaces USAGE: rootlesskit [global options] [arguments...] VERSION: 0.14.0-beta.0 DESCRIPTION: RootlessKit is a Linux-native implementation of "fake root" using user_namespaces(7). Web site: https://github.com/rootless-containers/rootlesskit Examples: # spawn a shell with a new user namespace and a mount namespace rootlesskit bash # make /etc writable rootlesskit --copy-up=/etc bash # set mount propagation to rslave rootlesskit --propagation=rslave bash # create a network namespace with slirp4netns, and expose 80/tcp on the namespace as 8080/tcp on the host rootlesskit --copy-up=/etc --net=slirp4netns --disable-host-loopback --port-driver=builtin -p 127.0.0.1:8080:80/tcp bash Note: RootlessKit requires /etc/subuid and /etc/subgid to be configured by the real root user. See https://rootlesscontaine.rs/getting-started/common/ . OPTIONS: Misc: --debug debug mode (default: false) --help, -h show help (default: false) --version, -v print the version (default: false) Mount: --copy-up value mount a filesystem and copy-up the contents. e.g. "--copy-up=/etc" (typically required for non-host network) --copy-up-mode value copy-up mode [tmpfs+symlink] (default: "tmpfs+symlink") --propagation value mount propagation [rprivate, rslave] (default: "rprivate") Network: --net value network driver [host, slirp4netns, vpnkit, lxc-user-nic(experimental)] (default: "host") --mtu value MTU for non-host network (default: 65520 for slirp4netns, 1500 for others) (default: 0) --cidr value CIDR for slirp4netns network (default: 10.0.2.0/24) --ifname value Network interface name (default: tap0 for slirp4netns and vpnkit, eth0 for lxc-user-nic) --disable-host-loopback prohibit connecting to 127.0.0.1:* on the host namespace (default: false) Network [lxc-user-nic]: --lxc-user-nic-binary value path of lxc-user-nic binary for --net=lxc-user-nic (default: "/usr/lib/x86_64-linux-gnu/lxc/lxc-user-nic") --lxc-user-nic-bridge value lxc-user-nic bridge name (default: "lxcbr0") Network [slirp4netns]: --slirp4netns-binary value path of slirp4netns binary for --net=slirp4netns (default: "slirp4netns") --slirp4netns-sandbox value enable slirp4netns sandbox (experimental) [auto, true, false] (the default is planned to be "auto" in future) (default: "false") --slirp4netns-seccomp value enable slirp4netns seccomp (experimental) [auto, true, false] (the default is planned to be "auto" in future) (default: "false") Network [vpnkit]: --vpnkit-binary value path of VPNKit binary for --net=vpnkit (default: "vpnkit") Port: --port-driver value port driver for non-host network. [none, builtin, slirp4netns] (default: "none") --publish value, -p value publish ports. e.g. "127.0.0.1:8080:80/tcp" Process: --pidns create a PID namespace (default: false) --cgroupns create a cgroup namespace (default: false) --utsns create a UTS namespace (default: false) --ipcns create an IPC namespace (default: false) --reaper value enable process reaper. Requires --pidns. [auto,true,false] (default: "auto") --evacuate-cgroup2 value evacuate processes into the specified subgroup. Requires --pidns and --cgroupns State: --state-dir value state directory ``` ## State directory The following files will be created in the state directory, which can be specified with `--state-dir`: * `lock`: lock file * `child_pid`: decimal PID text that can be used for `nsenter(1)`. * `api.sock`: REST API socket. See [`./docs/api.md`](./docs/api.md) and [`./docs/port.md`](./docs/port.md). If `--state-dir` is not specified, RootlessKit creates a temporary state directory on `/tmp` and removes it on exit. Undocumented files are subject to change. ## Environment variables The following environment variables will be set for the child process: * `ROOTLESSKIT_STATE_DIR` (since v0.3.0): absolute path to the state dir * `ROOTLESSKIT_PARENT_EUID` (since v0.8.0): effective UID * `ROOTLESSKIT_PARENT_EGID` (since v0.8.0): effective GID Undocumented environment variables are subject to change. ## Additional documents - [`./docs/network.md`](./docs/network.md): Networking (`--net`, `--mtu`, `--cidr`, `--disable-host-loopback`, `--slirp4netns-*`, ...) - [`./docs/port.md`](./docs/port.md): Port forwarding (`--port-driver`, `-p`, ...) - [`./docs/mount.md`](./docs/mount.md): Mount (`--propagation`, ...) - [`./docs/process.md`](./docs/process.md): Process (`--pidns`, `--reaper`, `--cgroupns`, `--evacuate-cgroup2`, ...) - [`./docs/api.md`](./docs/api.md): REST API rootlesskit-0.14.6/cmd/000077500000000000000000000000001417776672600147565ustar00rootroot00000000000000rootlesskit-0.14.6/cmd/rootlessctl/000077500000000000000000000000001417776672600173335ustar00rootroot00000000000000rootlesskit-0.14.6/cmd/rootlessctl/info.go000066400000000000000000000025511417776672600206200ustar00rootroot00000000000000package main import ( "context" "encoding/json" "fmt" "github.com/urfave/cli/v2" ) var infoCommand = cli.Command{ Name: "info", Usage: "Show info", ArgsUsage: "[flags]", Flags: []cli.Flag{ &cli.BoolFlag{ Name: "json", Usage: "Prints as JSON", }, }, Action: infoAction, } func infoAction(clicontext *cli.Context) error { w := clicontext.App.Writer c, err := newClient(clicontext) if err != nil { return err } ctx := context.Background() info, err := c.Info(ctx) if err != nil { return err } if clicontext.Bool("json") { m, err := json.MarshalIndent(info, "", " ") if err != nil { return err } fmt.Fprintln(w, string(m)) return nil } fmt.Fprintf(w, "- REST API version: %s\n", info.APIVersion) fmt.Fprintf(w, "- Implementation version: %s\n", info.Version) fmt.Fprintf(w, "- State Directory: %s\n", info.StateDir) fmt.Fprintf(w, "- Child PID: %d\n", info.ChildPID) if info.NetworkDriver != nil { fmt.Fprintf(w, "- Network Driver: %s\n", info.NetworkDriver.Driver) fmt.Fprintf(w, " - DNS: %v\n", info.NetworkDriver.DNS) if info.NetworkDriver.ChildIP != nil { fmt.Fprintf(w, " - IP: %v\n", info.NetworkDriver.ChildIP) } } if info.PortDriver != nil { fmt.Fprintf(w, "- Port Driver: %s\n", info.PortDriver.Driver) fmt.Fprintf(w, " - Supported protocols: %v\n", info.PortDriver.Protos) } return nil } rootlesskit-0.14.6/cmd/rootlessctl/main.go000066400000000000000000000027151417776672600206130ustar00rootroot00000000000000package main import ( "errors" "fmt" "os" "path/filepath" "github.com/sirupsen/logrus" "github.com/urfave/cli/v2" "github.com/rootless-containers/rootlesskit/pkg/api/client" "github.com/rootless-containers/rootlesskit/pkg/version" ) func main() { debug := false app := cli.NewApp() app.Name = "rootlessctl" app.Version = version.Version app.Usage = "RootlessKit API client" app.Flags = []cli.Flag{ &cli.BoolFlag{ Name: "debug", Usage: "debug mode", Destination: &debug, }, &cli.StringFlag{ Name: "socket", Usage: "Path to api.sock (under the \"rootlesskit --state-dir\" directory), defaults to $ROOTLESSKIT_STATE_DIR/api.sock", }, } app.Commands = []*cli.Command{ &listPortsCommand, &addPortsCommand, &removePortsCommand, &infoCommand, } app.Before = func(clicontext *cli.Context) error { if debug { logrus.SetLevel(logrus.DebugLevel) } return nil } if err := app.Run(os.Args); err != nil { if debug { fmt.Fprintf(os.Stderr, "error: %+v\n", err) } else { fmt.Fprintf(os.Stderr, "error: %v\n", err) } os.Exit(1) } } func newClient(clicontext *cli.Context) (client.Client, error) { socketPath := clicontext.String("socket") if socketPath == "" { stateDir := os.Getenv("ROOTLESSKIT_STATE_DIR") if stateDir == "" { return nil, errors.New("please specify --socket or set $ROOTLESSKIT_STATE_DIR") } socketPath = filepath.Join(stateDir, "api.sock") } return client.New(socketPath) } rootlesskit-0.14.6/cmd/rootlessctl/port.go000066400000000000000000000063301417776672600206500ustar00rootroot00000000000000package main import ( "context" "encoding/json" "errors" "fmt" "os" "strconv" "text/tabwriter" "github.com/urfave/cli/v2" "github.com/rootless-containers/rootlesskit/pkg/port" "github.com/rootless-containers/rootlesskit/pkg/port/portutil" ) var listPortsCommand = cli.Command{ Name: "list-ports", Usage: "List ports", ArgsUsage: "[flags]", Flags: []cli.Flag{ &cli.BoolFlag{ Name: "json", Usage: "Prints as JSON", }, }, Action: listPortsAction, } func listPortsAction(clicontext *cli.Context) error { c, err := newClient(clicontext) if err != nil { return err } pm := c.PortManager() ctx := context.Background() portStatuses, err := pm.ListPorts(ctx) if err != nil { return err } if clicontext.Bool("json") { // Marshal per entry, for consistency with add-ports // (and for potential streaming support) for _, p := range portStatuses { m, err := json.Marshal(p) if err != nil { return err } fmt.Println(string(m)) } return nil } w := tabwriter.NewWriter(os.Stdout, 4, 8, 4, ' ', 0) if _, err := fmt.Fprintln(w, "ID\tPROTO\tPARENTIP\tPARENTPORT\tCHILDIP\tCHILDPORT\t"); err != nil { return err } for _, p := range portStatuses { if _, err := fmt.Fprintf(w, "%d\t%s\t%s\t%d\t%s\t%d\t\n", p.ID, p.Spec.Proto, p.Spec.ParentIP, p.Spec.ParentPort, p.Spec.ChildIP, p.Spec.ChildPort); err != nil { return err } } return w.Flush() } var addPortsCommand = cli.Command{ Name: "add-ports", Usage: "Add ports", ArgsUsage: "[flags] PARENTIP:PARENTPORT:CHILDPORT/PROTO [PARENTIP:PARENTPORT:CHILDPORT/PROTO...]", Description: "Add exposed ports. The port spec is similar to `docker run -p`. e.g. \"127.0.0.1:8080:80/tcp\".", Flags: []cli.Flag{ &cli.BoolFlag{ Name: "json", Usage: "Prints as JSON", }, }, Action: addPortsAction, } func addPortsAction(clicontext *cli.Context) error { if clicontext.NArg() < 1 { return errors.New("no port specified") } var portSpecs []port.Spec for _, s := range clicontext.Args().Slice() { sp, err := portutil.ParsePortSpec(s) if err != nil { return err } portSpecs = append(portSpecs, *sp) } c, err := newClient(clicontext) if err != nil { return err } pm := c.PortManager() ctx := context.Background() for _, sp := range portSpecs { portStatus, err := pm.AddPort(ctx, sp) if err != nil { return err } if clicontext.Bool("json") { m, err := json.Marshal(portStatus) if err != nil { return err } fmt.Println(string(m)) } else { fmt.Printf("%d\n", portStatus.ID) } } return nil } var removePortsCommand = cli.Command{ Name: "remove-ports", Usage: "Remove ports", ArgsUsage: "[flags] ID [ID...]", Action: removePortsAction, } func removePortsAction(clicontext *cli.Context) error { if clicontext.NArg() < 1 { return errors.New("no ID specified") } var ids []int for _, s := range clicontext.Args().Slice() { id, err := strconv.Atoi(s) if err != nil { return err } ids = append(ids, id) } c, err := newClient(clicontext) if err != nil { return err } pm := c.PortManager() ctx := context.Background() for _, id := range ids { if err := pm.RemovePort(ctx, id); err != nil { return err } fmt.Printf("%d\n", id) } return nil } rootlesskit-0.14.6/cmd/rootlesskit-docker-proxy/000077500000000000000000000000001417776672600217645ustar00rootroot00000000000000rootlesskit-0.14.6/cmd/rootlesskit-docker-proxy/main.go000066400000000000000000000135211417776672600232410ustar00rootroot00000000000000package main import ( "context" "errors" "flag" "fmt" "log" "net" "os" "os/exec" "os/signal" "path/filepath" "strconv" "strings" "syscall" "github.com/rootless-containers/rootlesskit/pkg/api" "github.com/rootless-containers/rootlesskit/pkg/api/client" "github.com/rootless-containers/rootlesskit/pkg/port" "github.com/sirupsen/logrus" ) const ( realProxy = "docker-proxy" ) // drop-in replacement for docker-proxy. // needs to be executed in the child namespace. func main() { f := os.NewFile(3, "signal-parent") defer f.Close() if err := xmain(f); err != nil { // success: "0\n" (written by realProxy) // error: "1\n" (written by either rootlesskit-docker-proxy or realProxy) fmt.Fprintf(f, "1\n%s", err) log.Fatal(err) } } func isIPv6(ipStr string) bool { ip := net.ParseIP(ipStr) if ip == nil { return false } return ip.To4() == nil } func getPortDriverProtos(info *api.Info) (string, map[string]struct{}, error) { if info.PortDriver == nil { return "", nil, errors.New("no port driver is available") } m := make(map[string]struct{}, len(info.PortDriver.Protos)) for _, p := range info.PortDriver.Protos { m[p] = struct{}{} } return info.PortDriver.Driver, m, nil } type protocolUnsupportedError struct { apiProto string portDriverName string hostIP string hostPort int } func (e *protocolUnsupportedError) Error() string { return fmt.Sprintf("protocol %q is not supported by the RootlessKit port driver %q, discarding request for %q", e.apiProto, e.portDriverName, net.JoinHostPort(e.hostIP, strconv.Itoa(e.hostPort))) } func callRootlessKitAPI(c client.Client, info *api.Info, hostIP string, hostPort int, dockerProxyProto, childIP string) (func() error, error) { // dockerProxyProto is like "tcp", but we need to convert it to "tcp4" or "tcp6" explicitly // for libnetwork >= 20201216 // // See https://github.com/moby/libnetwork/pull/2604/files#diff-8fa48beed55dd033bf8e4f8c40b31cf69d0b2cc5d4bb53cde8594670ea6c938aR20 // See also https://github.com/rootless-containers/rootlesskit/issues/231 apiProto := dockerProxyProto if !strings.HasSuffix(apiProto, "4") && !strings.HasSuffix(apiProto, "6") { if isIPv6(hostIP) { apiProto += "6" } else { apiProto += "4" } } portDriverName, apiProtos, err := getPortDriverProtos(info) if err != nil { return nil, err } if _, ok := apiProtos[apiProto]; !ok { // This happens when apiProto="tcp6", portDriverName="slirp4netns", // because "slirp4netns" port driver does not support listening on IPv6 yet. // // Note that "slirp4netns" port driver is not used by default, // even when network driver is set to "slirp4netns". // // Most users are using "builtin" port driver and will not see this warning. err := &protocolUnsupportedError{ apiProto: apiProto, portDriverName: portDriverName, hostIP: hostIP, hostPort: hostPort, } return nil, err } pm := c.PortManager() p := port.Spec{ Proto: apiProto, ParentIP: hostIP, ParentPort: hostPort, ChildIP: childIP, ChildPort: hostPort, } st, err := pm.AddPort(context.Background(), p) if err != nil { return nil, fmt.Errorf("error while calling PortManager.AddPort(): %w", err) } deferFunc := func() error { if dErr := pm.RemovePort(context.Background(), st.ID); dErr != nil { return fmt.Errorf("error while calling PortManager.RemovePort(): %w", err) } return nil } return deferFunc, nil } func xmain(f *os.File) error { containerIP := flag.String("container-ip", "", "container ip") containerPort := flag.Int("container-port", -1, "container port") hostIP := flag.String("host-ip", "", "host ip") hostPort := flag.Int("host-port", -1, "host port") proto := flag.String("proto", "tcp", "proxy protocol") flag.Parse() stateDir := os.Getenv("ROOTLESSKIT_STATE_DIR") if stateDir == "" { return errors.New("$ROOTLESSKIT_STATE_DIR needs to be set") } socketPath := filepath.Join(stateDir, "api.sock") c, err := client.New(socketPath) if err != nil { return fmt.Errorf("error while connecting to RootlessKit API socket: %w", err) } info, err := c.Info(context.Background()) if err != nil { return fmt.Errorf("failed to call info API, probably RootlessKit binary is too old (needs to be v0.14.0 or later): %w", err) } // use loopback IP as the child IP, when port-driver="builtin" childIP := "127.0.0.1" if isIPv6(*hostIP) { childIP = "::1" } if info.PortDriver.DisallowLoopbackChildIP { // i.e., port-driver="slirp4netns" if info.NetworkDriver.ChildIP == nil { return fmt.Errorf("port driver (%q) does not allow loopback child IP, but network driver (%q) has no non-loopback IP", info.PortDriver.Driver, info.NetworkDriver.Driver) } childIP = info.NetworkDriver.ChildIP.String() } deferFunc, err := callRootlessKitAPI(c, info, *hostIP, *hostPort, *proto, childIP) if deferFunc != nil { defer func() { if dErr := deferFunc(); dErr != nil { logrus.Warn(dErr) } }() } if err != nil { if _, ok := err.(*protocolUnsupportedError); ok { logrus.Warn(err) // exit without executing realProxy (https://github.com/rootless-containers/rootlesskit/issues/250) fmt.Fprint(f, "0\n") return nil } return err } cmd := exec.Command(realProxy, "-container-ip", *containerIP, "-container-port", strconv.Itoa(*containerPort), "-host-ip", childIP, "-host-port", strconv.Itoa(*hostPort), "-proto", *proto) cmd.Stdout = os.Stdout cmd.Stderr = os.Stderr cmd.Env = os.Environ() cmd.ExtraFiles = append(cmd.ExtraFiles, f) cmd.SysProcAttr = &syscall.SysProcAttr{ Pdeathsig: syscall.SIGKILL, } if err := cmd.Start(); err != nil { return fmt.Errorf("error while starting %s: %w", realProxy, err) } ch := make(chan os.Signal, 1) signal.Notify(ch, os.Interrupt) <-ch if err := cmd.Process.Kill(); err != nil { return fmt.Errorf("error while killing %s: %w", realProxy, err) } return nil } rootlesskit-0.14.6/cmd/rootlesskit/000077500000000000000000000000001417776672600173405ustar00rootroot00000000000000rootlesskit-0.14.6/cmd/rootlesskit/category.go000066400000000000000000000025521417776672600215100ustar00rootroot00000000000000package main import ( "fmt" "sort" "github.com/urfave/cli/v2" ) const ( CategoryState = "State" CategoryNetwork = "Network" CategorySlirp4netns = "Network [slirp4netns]" CategoryVPNKit = "Network [vpnkit]" CategoryLXCUserNic = "Network [lxc-user-nic]" CategoryPort = "Port" CategoryMount = "Mount" CategoryProcess = "Process" CategoryMisc = "Misc" ) type CategorizedFlag interface { cli.Flag Category() string } func Categorize(f cli.Flag, category string) CategorizedFlag { return &flag{ Flag: f, category: category, } } type flag struct { cli.Flag category string } func (f *flag) Category() string { return f.category } func formatFlags(flags []cli.Flag) string { var res string m := make(map[string][]cli.Flag) for _, f := range flags { cat := "(Uncategorized)" if x, ok := f.(CategorizedFlag); ok { if cat2 := x.Category(); cat2 != "" { cat = cat2 } } if _, ok := m[cat]; !ok { m[cat] = make([]cli.Flag, 0) } m[cat] = append(m[cat], f) } var catList []string for c := range m { catList = append(catList, c) } sort.Strings(catList) for _, cat := range catList { catFlags, ok := m[cat] if !ok { continue } res += fmt.Sprintf(" %s:\t\n", cat) for _, f := range catFlags { res += fmt.Sprintf(" %s\n", f.String()) } res += " \t\n" } return res } rootlesskit-0.14.6/cmd/rootlesskit/main.go000066400000000000000000000426151417776672600206230ustar00rootroot00000000000000package main import ( "errors" "fmt" "net" "os" "os/exec" "path/filepath" "strings" "syscall" "github.com/sirupsen/logrus" "github.com/urfave/cli/v2" "github.com/rootless-containers/rootlesskit/pkg/child" "github.com/rootless-containers/rootlesskit/pkg/common" "github.com/rootless-containers/rootlesskit/pkg/copyup/tmpfssymlink" "github.com/rootless-containers/rootlesskit/pkg/network/lxcusernic" "github.com/rootless-containers/rootlesskit/pkg/network/slirp4netns" "github.com/rootless-containers/rootlesskit/pkg/network/vpnkit" "github.com/rootless-containers/rootlesskit/pkg/parent" "github.com/rootless-containers/rootlesskit/pkg/port/builtin" "github.com/rootless-containers/rootlesskit/pkg/port/portutil" slirp4netns_port "github.com/rootless-containers/rootlesskit/pkg/port/slirp4netns" "github.com/rootless-containers/rootlesskit/pkg/version" ) func main() { const ( pipeFDEnvKey = "_ROOTLESSKIT_PIPEFD_UNDOCUMENTED" stateDirEnvKey = "ROOTLESSKIT_STATE_DIR" // documented parentEUIDEnvKey = "ROOTLESSKIT_PARENT_EUID" // documented parentEGIDEnvKey = "ROOTLESSKIT_PARENT_EGID" // documented ) iAmChild := os.Getenv(pipeFDEnvKey) != "" debug := false app := cli.NewApp() app.Name = "rootlesskit" app.Version = version.Version app.HideHelpCommand = true app.Usage = "Linux-native fakeroot using user namespaces" app.UsageText = "rootlesskit [global options] [arguments...]" app.Description = `RootlessKit is a Linux-native implementation of "fake root" using user_namespaces(7). Web site: https://github.com/rootless-containers/rootlesskit Examples: # spawn a shell with a new user namespace and a mount namespace rootlesskit bash # make /etc writable rootlesskit --copy-up=/etc bash # set mount propagation to rslave rootlesskit --propagation=rslave bash # create a network namespace with slirp4netns, and expose 80/tcp on the namespace as 8080/tcp on the host rootlesskit --copy-up=/etc --net=slirp4netns --disable-host-loopback --port-driver=builtin -p 127.0.0.1:8080:80/tcp bash Note: RootlessKit requires /etc/subuid and /etc/subgid to be configured by the real root user. See https://rootlesscontaine.rs/getting-started/common/ . ` app.Flags = []cli.Flag{ Categorize(&cli.BoolFlag{ Name: "debug", Usage: "debug mode", Destination: &debug, }, CategoryMisc), Categorize(&cli.StringFlag{ Name: "state-dir", Usage: "state directory", }, CategoryState), Categorize(&cli.StringFlag{ Name: "net", Usage: "network driver [host, slirp4netns, vpnkit, lxc-user-nic(experimental)]", Value: "host", }, CategoryNetwork), Categorize(&cli.StringFlag{ Name: "slirp4netns-binary", Usage: "path of slirp4netns binary for --net=slirp4netns", Value: "slirp4netns", }, CategorySlirp4netns), Categorize(&cli.StringFlag{ Name: "slirp4netns-sandbox", Usage: "enable slirp4netns sandbox (experimental) [auto, true, false] (the default is planned to be \"auto\" in future)", Value: "false", }, CategorySlirp4netns), Categorize(&cli.StringFlag{ Name: "slirp4netns-seccomp", Usage: "enable slirp4netns seccomp (experimental) [auto, true, false] (the default is planned to be \"auto\" in future)", Value: "false", }, CategorySlirp4netns), Categorize(&cli.StringFlag{ Name: "vpnkit-binary", Usage: "path of VPNKit binary for --net=vpnkit", Value: "vpnkit", }, CategoryVPNKit), Categorize(&cli.StringFlag{ Name: "lxc-user-nic-binary", Usage: "path of lxc-user-nic binary for --net=lxc-user-nic", Value: lxcUserNicBin(), }, CategoryLXCUserNic), Categorize(&cli.StringFlag{ Name: "lxc-user-nic-bridge", Usage: "lxc-user-nic bridge name", Value: "lxcbr0", }, CategoryLXCUserNic), Categorize(&cli.IntFlag{ Name: "mtu", Usage: "MTU for non-host network (default: 65520 for slirp4netns, 1500 for others)", Value: 0, // resolved into 65520 for slirp4netns, 1500 for others }, CategoryNetwork), Categorize(&cli.StringFlag{ Name: "cidr", Usage: "CIDR for slirp4netns network (default: 10.0.2.0/24)", }, CategoryNetwork), Categorize(&cli.StringFlag{ Name: "ifname", Usage: "Network interface name (default: tap0 for slirp4netns and vpnkit, eth0 for lxc-user-nic)", }, CategoryNetwork), Categorize(&cli.BoolFlag{ Name: "disable-host-loopback", Usage: "prohibit connecting to 127.0.0.1:* on the host namespace", }, CategoryNetwork), Categorize(&cli.BoolFlag{ Name: "ipv6", Usage: "enable IPv6 routing. Unrelated to port forwarding. Only supported for slirp4netns. (experimental)", }, CategoryNetwork), Categorize(&cli.StringSliceFlag{ Name: "copy-up", Usage: "mount a filesystem and copy-up the contents. e.g. \"--copy-up=/etc\" (typically required for non-host network)", }, CategoryMount), Categorize(&cli.StringFlag{ Name: "copy-up-mode", Usage: "copy-up mode [tmpfs+symlink]", Value: "tmpfs+symlink", }, CategoryMount), Categorize(&cli.StringFlag{ Name: "port-driver", Usage: "port driver for non-host network. [none, builtin, slirp4netns]", Value: "none", }, CategoryPort), Categorize(&cli.StringSliceFlag{ Name: "publish", Aliases: []string{"p"}, Usage: "publish ports. e.g. \"127.0.0.1:8080:80/tcp\"", }, CategoryPort), Categorize(&cli.BoolFlag{ Name: "pidns", Usage: "create a PID namespace", }, CategoryProcess), Categorize(&cli.BoolFlag{ Name: "cgroupns", Usage: "create a cgroup namespace", }, CategoryProcess), Categorize(&cli.BoolFlag{ Name: "utsns", Usage: "create a UTS namespace", }, CategoryProcess), Categorize(&cli.BoolFlag{ Name: "ipcns", Usage: "create an IPC namespace", }, CategoryProcess), Categorize(&cli.StringFlag{ Name: "propagation", Usage: "mount propagation [rprivate, rslave]", Value: "rprivate", }, CategoryMount), Categorize(&cli.StringFlag{ Name: "reaper", Usage: "enable process reaper. Requires --pidns. [auto,true,false]", Value: "auto", }, CategoryProcess), Categorize(&cli.StringFlag{ Name: "evacuate-cgroup2", Usage: "evacuate processes into the specified subgroup. Requires --pidns and --cgroupns", }, CategoryProcess), } app.CustomAppHelpTemplate = `NAME: {{.Name}}{{if .Usage}} - {{.Usage}}{{end}} USAGE: {{if .UsageText}}{{.UsageText}}{{else}}{{.HelpName}} {{if .VisibleFlags}}[global options]{{end}}{{if .Commands}} command [command options]{{end}} {{if .ArgsUsage}}{{.ArgsUsage}}{{else}}[arguments...]{{end}}{{end}}{{if .Version}}{{if not .HideVersion}} VERSION: {{.Version}}{{end}}{{end}}{{if .Description}} DESCRIPTION: {{.Description | nindent 3 | trim}}{{end}} OPTIONS: ` + formatFlags(append(app.Flags, Categorize(cli.HelpFlag, CategoryMisc), Categorize(cli.VersionFlag, CategoryMisc))) app.Before = func(context *cli.Context) error { if debug { logrus.SetLevel(logrus.DebugLevel) } return nil } app.Action = func(clicontext *cli.Context) error { if clicontext.NArg() < 1 { return errors.New("no command specified") } if iAmChild { childOpt, err := createChildOpt(clicontext, pipeFDEnvKey, clicontext.Args().Slice()) if err != nil { return err } return child.Child(childOpt) } parentOpt, err := createParentOpt(clicontext, pipeFDEnvKey, stateDirEnvKey, parentEUIDEnvKey, parentEGIDEnvKey) if err != nil { return err } return parent.Parent(parentOpt) } if err := app.Run(os.Args); err != nil { id := "parent" if iAmChild { id = "child " // padded to len("parent") } if debug { fmt.Fprintf(os.Stderr, "[rootlesskit:%s] error: %+v\n", id, err) } else { fmt.Fprintf(os.Stderr, "[rootlesskit:%s] error: %v\n", id, err) } // propagate the exit code code, ok := common.GetExecExitStatus(err) if !ok { code = 1 } os.Exit(code) } } func parseCIDR(s string) (*net.IPNet, error) { if s == "" { return nil, nil } ip, ipnet, err := net.ParseCIDR(s) if err != nil { return nil, err } if !ip.Equal(ipnet.IP) { return nil, fmt.Errorf("cidr must be like 10.0.2.0/24, not like 10.0.2.100/24") } return ipnet, nil } func createParentOpt(clicontext *cli.Context, pipeFDEnvKey, stateDirEnvKey, parentEUIDEnvKey, parentEGIDEnvKey string) (parent.Opt, error) { var err error opt := parent.Opt{ PipeFDEnvKey: pipeFDEnvKey, StateDirEnvKey: stateDirEnvKey, CreatePIDNS: clicontext.Bool("pidns"), CreateCgroupNS: clicontext.Bool("cgroupns"), CreateUTSNS: clicontext.Bool("utsns"), CreateIPCNS: clicontext.Bool("ipcns"), ParentEUIDEnvKey: parentEUIDEnvKey, ParentEGIDEnvKey: parentEGIDEnvKey, Propagation: clicontext.String("propagation"), EvacuateCgroup2: clicontext.String("evacuate-cgroup2"), } if opt.EvacuateCgroup2 != "" { if !opt.CreateCgroupNS { return opt, errors.New("evacuate-cgroup2 requires --cgroupns") } if !opt.CreatePIDNS { return opt, errors.New("evacuate-cgroup2 requires --pidns") } } opt.StateDir = clicontext.String("state-dir") if opt.StateDir == "" { opt.StateDir, err = os.MkdirTemp("", "rootlesskit") if err != nil { return opt, fmt.Errorf("creating a state directory: %w", err) } } else { opt.StateDir, err = filepath.Abs(opt.StateDir) if err != nil { return opt, err } if err := parent.InitStateDir(opt.StateDir); err != nil { return opt, err } } mtu := clicontext.Int("mtu") if mtu < 0 || mtu > 65521 { // 0 is ok (stands for the driver's default) return opt, fmt.Errorf("mtu must be <= 65521, got %d", mtu) } ipnet, err := parseCIDR(clicontext.String("cidr")) if err != nil { return opt, err } ifname := clicontext.String("ifname") if strings.Contains(ifname, "/") { return opt, errors.New("ifname must not contain \"/\"") } ipv6 := clicontext.Bool("ipv6") if ipv6 { logrus.Warn("ipv6 is experimental") if s := clicontext.String("net"); s != "slirp4netns" { logrus.Warnf("--ipv6 is discarded for --net=%s", s) } } disableHostLoopback := clicontext.Bool("disable-host-loopback") if !disableHostLoopback && clicontext.String("net") != "host" { logrus.Warn("specifying --disable-host-loopback is highly recommended to prohibit connecting to 127.0.0.1:* on the host namespace (requires slirp4netns or VPNKit)") } slirp4netnsAPISocketPath := "" if clicontext.String("port-driver") == "slirp4netns" { slirp4netnsAPISocketPath = filepath.Join(opt.StateDir, ".s4nn.sock") } switch s := clicontext.String("net"); s { case "host": // NOP if mtu != 0 { logrus.Warnf("unsupported mtu for --net=host: %d", mtu) } if ipnet != nil { return opt, errors.New("custom cidr is supported only for --net=slirp4netns") } if ifname != "" { return opt, errors.New("ifname cannot be specified for --net=host") } case "slirp4netns": binary := clicontext.String("slirp4netns-binary") if _, err := exec.LookPath(binary); err != nil { return opt, err } features, err := slirp4netns.DetectFeatures(binary) if err != nil { return opt, err } logrus.Debugf("slirp4netns features %+v", features) if disableHostLoopback && !features.SupportsDisableHostLoopback { // NOTREACHED return opt, errors.New("unsupported slirp4netns version: lacks SupportsDisableHostLoopback") } if slirp4netnsAPISocketPath != "" && !features.SupportsAPISocket { // NOTREACHED return opt, errors.New("unsupported slirp4netns version: lacks SupportsAPISocket") } enableSandbox := false switch s := clicontext.String("slirp4netns-sandbox"); s { case "auto": // this might not work when /etc/resolv.conf is a symlink to a file outside /etc or /run // https://github.com/rootless-containers/slirp4netns/issues/116 enableSandbox = features.SupportsEnableSandbox case "true": enableSandbox = true if !features.SupportsEnableSandbox { // NOTREACHED return opt, errors.New("unsupported slirp4netns version: lacks SupportsEnableSandbox") } case "false", "": // default // NOP default: return opt, fmt.Errorf("unsupported slirp4netns-sandbox mode: %q", s) } enableSeccomp := false switch s := clicontext.String("slirp4netns-seccomp"); s { case "auto": enableSeccomp = features.SupportsEnableSeccomp && features.KernelSupportsEnableSeccomp case "true": enableSeccomp = true if !features.SupportsEnableSeccomp { return opt, errors.New("unsupported slirp4netns version: lacks SupportsEnableSeccomp") } if !features.KernelSupportsEnableSeccomp { return opt, errors.New("kernel doesn't support seccomp") } case "false", "": // default // NOP default: return opt, fmt.Errorf("unsupported slirp4netns-seccomp mode: %q", s) } opt.NetworkDriver, err = slirp4netns.NewParentDriver(&logrusDebugWriter{label: "network/slirp4netns"}, binary, mtu, ipnet, ifname, disableHostLoopback, slirp4netnsAPISocketPath, enableSandbox, enableSeccomp, ipv6) if err != nil { return opt, err } case "vpnkit": if ipnet != nil { return opt, errors.New("custom cidr is supported only for --net=slirp4netns") } binary := clicontext.String("vpnkit-binary") if _, err := exec.LookPath(binary); err != nil { return opt, err } opt.NetworkDriver = vpnkit.NewParentDriver(binary, mtu, ifname, disableHostLoopback) case "lxc-user-nic": logrus.Warn("\"lxc-user-nic\" network driver is experimental") if ipnet != nil { return opt, errors.New("custom cidr is supported only for --net=slirp4netns") } if !disableHostLoopback { logrus.Warn("--disable-host-loopback is implicitly set for lxc-user-nic") } binary := clicontext.String("lxc-user-nic-binary") if _, err := exec.LookPath(binary); err != nil { return opt, err } opt.NetworkDriver, err = lxcusernic.NewParentDriver(binary, mtu, clicontext.String("lxc-user-nic-bridge"), ifname) if err != nil { return opt, err } default: return opt, fmt.Errorf("unknown network mode: %s", s) } switch s := clicontext.String("port-driver"); s { case "none": // NOP if len(clicontext.StringSlice("publish")) != 0 { return opt, fmt.Errorf("port driver %q does not support publishing ports", s) } case "slirp4netns": if clicontext.String("net") != "slirp4netns" { return opt, errors.New("port driver requires slirp4netns network") } opt.PortDriver, err = slirp4netns_port.NewParentDriver(&logrusDebugWriter{label: "port/slirp4netns"}, slirp4netnsAPISocketPath) if err != nil { return opt, err } case "builtin": if opt.NetworkDriver == nil { return opt, errors.New("port driver requires non-host network") } opt.PortDriver, err = builtin.NewParentDriver(&logrusDebugWriter{label: "port/builtin"}, opt.StateDir) if err != nil { return opt, err } default: return opt, fmt.Errorf("unknown port driver: %s", s) } for _, s := range clicontext.StringSlice("publish") { spec, err := portutil.ParsePortSpec(s) if err != nil { return opt, err } if err := portutil.ValidatePortSpec(*spec, nil); err != nil { return opt, err } opt.PublishPorts = append(opt.PublishPorts, *spec) } return opt, nil } type logrusDebugWriter struct { label string } func (w *logrusDebugWriter) Write(p []byte) (int, error) { s := strings.TrimSuffix(string(p), "\n") if w.label != "" { s = w.label + ": " + s } logrus.Debug(s) return len(p), nil } func createChildOpt(clicontext *cli.Context, pipeFDEnvKey string, targetCmd []string) (child.Opt, error) { pidns := clicontext.Bool("pidns") opt := child.Opt{ PipeFDEnvKey: pipeFDEnvKey, TargetCmd: targetCmd, MountProcfs: pidns, Propagation: clicontext.String("propagation"), EvacuateCgroup2: clicontext.String("evacuate-cgroup2") != "", } switch reaperStr := clicontext.String("reaper"); reaperStr { case "auto": opt.Reaper = pidns logrus.Debugf("reaper: auto chosen value: %v", opt.Reaper) case "true": if !pidns { return opt, errors.New("reaper requires --pidns") } opt.Reaper = true case "false": default: return opt, fmt.Errorf("unknown reaper mode: %s", reaperStr) } switch s := clicontext.String("net"); s { case "host": // NOP case "slirp4netns": opt.NetworkDriver = slirp4netns.NewChildDriver() case "vpnkit": opt.NetworkDriver = vpnkit.NewChildDriver() case "lxc-user-nic": opt.NetworkDriver = lxcusernic.NewChildDriver() default: return opt, fmt.Errorf("unknown network mode: %s", s) } opt.CopyUpDirs = clicontext.StringSlice("copy-up") switch s := clicontext.String("copy-up-mode"); s { case "tmpfs+symlink": opt.CopyUpDriver = tmpfssymlink.NewChildDriver() if len(opt.CopyUpDirs) != 0 && (opt.Propagation == "rshared" || opt.Propagation == "shared") { return opt, fmt.Errorf("propagation %s does not support copy-up driver %s", opt.Propagation, s) } default: return opt, fmt.Errorf("unknown copy-up mode: %s", s) } switch s := clicontext.String("port-driver"); s { case "none": // NOP case "slirp4netns": opt.PortDriver = slirp4netns_port.NewChildDriver() case "builtin": opt.PortDriver = builtin.NewChildDriver(&logrusDebugWriter{label: "port/builtin"}) default: return opt, fmt.Errorf("unknown port driver: %s", s) } return opt, nil } func lxcUserNicBin() string { for _, path := range []string{ "/usr/libexec/lxc/lxc-user-nic", // Debian, Fedora "/usr/lib/" + unameM() + "-linux-gnu/lxc/lxc-user-nic", // Ubuntu "/usr/lib/lxc/lxc-user-nic", // Arch Linux } { if _, err := os.Stat(path); err == nil { return path } } return "" } func unameM() string { utsname := syscall.Utsname{} if err := syscall.Uname(&utsname); err != nil { panic(err) } var machine string for _, u8 := range utsname.Machine { if u8 != 0 { machine += string(byte(u8)) } } return machine } rootlesskit-0.14.6/docs/000077500000000000000000000000001417776672600151435ustar00rootroot00000000000000rootlesskit-0.14.6/docs/api.md000066400000000000000000000026411417776672600162410ustar00rootroot00000000000000# REST API RootlessKit listens REST API on `${ROOTLESSKIT_STATE_DIR}/api.sock`. ```console (host)$ rootlesskit --net=slirp4netns --port-driver=builtin bash (rootlesskit)# curl -s --unix-socket "${ROOTLESSKIT_STATE_DIR}/api.sock" http://rootlesskit/v1/info | jq . { "apiVersion": "1.1.0", "version": "0.14.0-beta.0", "stateDir": "/tmp/rootlesskit957151185", "childPID": 157684, "networkDriver": { "driver": "slirp4netns", "dns": [ "10.0.2.3" ] }, "portDriver": { "driver": "builtin", "protos": [ "tcp", "udp" ] } } ``` ## openapi.yaml See [`../pkg/api/openapi.yaml`](../pkg/api/openapi.yaml) ## rootlessctl CLI `rootlessctl` is the CLI for the API. ```console $ rootlessctl --help NAME: rootlessctl - RootlessKit API client USAGE: rootlessctl [global options] command [command options] [arguments...] VERSION: 0.14.0-beta.0 COMMANDS: list-ports List ports add-ports Add ports remove-ports Remove ports info Show info help, h Shows a list of commands or help for one command GLOBAL OPTIONS: --debug debug mode (default: false) --socket value Path to api.sock (under the "rootlesskit --state-dir" directory), defaults to $ROOTLESSKIT_STATE_DIR/api.sock --help, -h show help (default: false) --version, -v print the version (default: false) ``` e.g., `rootlessctl --socket /foo/bar/sock info --json` rootlesskit-0.14.6/docs/mount.md000066400000000000000000000007421417776672600166320ustar00rootroot00000000000000## Mount Propagation The mount namespace created by RootlessKit has `rprivate` propagation by default. Starting with v0.9.0, the propagation can be set to `rslave` by specifying `--propagation=rslave`. The propagation can be also set to `rshared`, but known not to work with `--copy-up`. Note that `rslave` and `rshared` do not work as expected when the host root filesystem isn't mounted with "shared". (Use `findmnt -n -l -o propagation /` to inspect the current mount flag.) rootlesskit-0.14.6/docs/network.md000066400000000000000000000160141417776672600171600ustar00rootroot00000000000000## Network Drivers RootlessKit provides several drivers for providing network connectivity: * `--net=host`: use host network namespace (default) * `--net=slirp4netns`: use [slirp4netns](https://github.com/rootless-containers/slirp4netns) (recommended) * `--net=vpnkit`: use [VPNKit](https://github.com/moby/vpnkit) * `--net=lxc-user-nic`: use `lxc-user-nic` (experimental) [Benchmark: iperf3 from the child to the parent (Mar 8, 2020)](https://github.com/rootless-containers/rootlesskit/runs/492498728): | Driver | MTU=1500 | MTU=65520 |---------------------------------------|------------|------------- |`slirp4netns` | 1.06 Gbps | 7.55 Gbps |`slirp4netns` (with sandbox + seccomp) | 1.05 Gbps | 7.21 Gbps |`vpnkit` | 0.60 Gbps |(Unsupported) |`lxc-user-nic` | 31.4 Gbps | 30.9 Gbps |(rootful veth) | (38.7 Gbps)| (40.8 Gbps) ### `--net=host` (default) `--net=host` does not isolate the network namespace from the host. Pros: * No performance overhead * Supports ICMP Echo (`ping`) when `/proc/sys/net/ipv4/ping_group_range` is configured Cons: * No permission for network-namespaced operations, e.g. creating iptables rules, running `tcpdump` To route ICMP Echo packets (`ping`), you need to write the range of GIDs to [`net.ipv4.ping_group_range`](http://man7.org/linux/man-pages/man7/icmp.7.html). ```console $ sudo sh -c "echo 0 2147483647 > /proc/sys/net/ipv4/ping_group_range" ``` ### `--net=slirp4netns` (recommended) `--net=slirp4netns` isolates the network namespace from the host and launch [slirp4netns](https://github.com/rootless-containers/slirp4netns) for providing usermode networking. Pros: * Possible to perform network-namespaced operations, e.g. creating iptables rules, running `tcpdump` * Supports ICMP Echo (`ping`) when `/proc/sys/net/ipv4/ping_group_range` is configured * Supports hardening using mount namespace and seccomp (`--slirp4netns-sandbox=auto`, `--slirp4netns-seccomp=auto`, since RootlessKit v0.7.0, slirp4netns v0.4.0) * Supports IPv6 routing (`--ipv6`) Cons: * Extra performance overhead (but still faster than `--net=vpnkit`) * Supports only TCP, UDP, and ICMP Echo packets To use `--net=slirp4netns`, you need to install slirp4netns v0.4.0 or later. ```console $ sudo dnf install slirp4netns ``` or ```console $ sudo apt-get install slirp4netns ``` If binary package is not available for your distribution, install from the source: ```console $ git clone https://github.com/rootless-containers/slirp4netns $ cd slirp4netns $ ./autogen.sh && ./configure && make $ cp slirp4netns ~/bin ``` The network is configured as follows by default: * IP: 10.0.2.100/24 * Gateway: 10.0.2.2 * DNS: 10.0.2.3 The network configuration can be changed by specifying custom CIDR, e.g. `--cidr=10.0.3.0/24` (requires slirp4netns v0.3.0+). Specifying `--copy-up=/etc` is highly recommended unless `/etc/resolv.conf` on the host is statically configured. Otherwise `/etc/resolv.conf` in the RootlessKit's mount namespace will be unmounted when `/etc/resolv.conf` on the host is recreated, typically by NetworkManager or systemd-resolved. It is also highly recommended to specyfy`--disable-host-loopback`. Otherwise ports listening on 127.0.0.1 in the host are accessible as 10.0.2.2 in the RootlessKit's network namespace. Example session: ```console $ rootlesskit --net=slirp4netns --copy-up=/etc --disable-host-loopback bash rootlesskit$ ip a 1: lo: mtu 65536 qdisc noqueue state UNKNOWN group default qlen 1000 link/loopback 00:00:00:00:00:00 brd 00:00:00:00:00:00 inet 127.0.0.1/8 scope host lo valid_lft forever preferred_lft forever inet6 ::1/128 scope host valid_lft forever preferred_lft forever 2: tap0: mtu 65520 qdisc fq_codel state UP group default qlen 1000 link/ether 46:dc:8d:09:fd:f2 brd ff:ff:ff:ff:ff:ff inet 10.0.2.100/24 scope global tap0 valid_lft forever preferred_lft forever inet6 fe80::44dc:8dff:fe09:fdf2/64 scope link valid_lft forever preferred_lft forever ootlesskit$ ip r default via 10.0.2.2 dev tap0 10.0.2.0/24 dev tap0 proto kernel scope link src 10.0.2.100 rootlesskit$ cat /etc/resolv.conf nameserver 10.0.2.3 rootlesskit$ curl https://www.google.com ... ``` Starting with RootlessKit v0.7.0 + slirp4netns v0.4.0, `--slirp4netns-sandbox=auto/true/false` (enables mount namespace) and `--slirp4netns-seccomp=auto/true/false` (enables seccomp rules) can be used to harden the slirp4netns process. ### `--net=vpnkit` `--net=vpnkit` isolates the network namespace from the host and launch [VPNKit](https://github.com/moby/vpnkit) for providing usermode networking. Pros: * Possible to perform network-namespaced operations, e.g. creating iptables rules, running `tcpdump` Cons: * Extra performance overhead * Supports only TCP and UDP packets. No support for ICMP Echo (`ping`) unlike `--net=slirp4netns`, even if `/proc/sys/net/ipv4/ping_group_range` is configured. * No support for IPv6. To use `--net=vpnkit`, you need to install VPNkit. ```console $ git clone https://github.com/moby/vpnkit.git $ cd vpnkit $ make $ cp vpnkit.exe ~/bin/vpnkit ``` The network is configured as follows by default: * IP: 192.168.65.3/24 * Gateway: 192.168.65.1 * DNS: 192.168.65.1 As in `--net=slirp4netns`, specifying `--copy-up=/etc` and `--disable-host-loopback` is highly recommended. If `--disable-host-loopback` is not specified, ports listening on 127.0.0.1 in the host are accessible as 192.168.65.2 in the RootlessKit's network namespace. ### `--net=lxc-user-nic` (experimental) `--net=lxc-user-nic` isolates the network namespace from the host and launch [`lxc-user-nic(1)`](https://linuxcontainers.org/lxc/manpages/man1/lxc-user-nic.1.html) SUID binary for providing kernel-mode NAT. Pros: * The least performance overhead * Possible to perform network-namespaced operations, e.g. creating iptables rules, running `tcpdump` * Supports ICMP Echo (`ping`) without `/proc/sys/net/ipv4/ping_group_range` configuration Cons: * Less secure * Needs `/etc/lxc/lxc-usernet` configuration * No support for IPv6. To use `lxc-user-nic`, you need to install `liblxc-common` package: ```console $ sudo apt-get install liblxc-common ``` You also need to set up [`/etc/lxc/lxc-usernet`](https://linuxcontainers.org/lxc/manpages/man5/lxc-usernet.5.html): ``` # USERNAME TYPE BRIDGE COUNT penguin veth lxcbr0 1 ``` The `COUNT` value needs to be increased to run multiple RootlessKit instances with `--net=lxc-user-nic` simultaneously. It may take a few seconds to configure the interface using DHCP. If you start and stop RootlessKit too frequently, you might use up all available DHCP addresses. You might need to reset `/var/lib/misc/dnsmasq.lxcbr0.leases` and restart the `lxc-net` service. Currently, the MAC address is always set to a random address. ## IPv6 The `--ipv6` flag (since v0.14.0, EXPERIMENTAL) enables IPv6 routing for slirp4netns network driver. This flag is unrelated to port forwarding. rootlesskit-0.14.6/docs/port.md000066400000000000000000000050571417776672600164600ustar00rootroot00000000000000# Port Drivers To the ports in the network namespace to the host network namespace, `--port-driver` needs to be specified. The default value is `none` (do not expose ports). | `--port-driver` | Throughput | Source IP |----------------------|-------------|---------- | `slirp4netns` | 6.89 Gbps | Propagated | `socat` (Deprecated) | 7.80 Gbps | Always 127.0.0.1 | `builtin` | 30.0 Gbps | Always 127.0.0.1 ([Benchmark: iperf3 from the parent to the child (Mar 8, 2020)](https://github.com/rootless-containers/rootlesskit/runs/492498728)) The `builtin` driver is fastest, but be aware that the source IP is not propagated and always set to 127.0.0.1. ### Exposing ports For example, to expose 80 in the child as 8080 in the parent: ```console $ rootlesskit --state-dir=/run/user/1001/rootlesskit/foo --net=slirp4netns --disable-host-loopback --copy-up=/etc --port-driver=builtin bash rootlesskit$ rootlessctl --socket=/run/user/1001/rootlesskit/foo/api.sock add-ports 0.0.0.0:8080:80/tcp 1 rootlesskit$ rootlessctl --socket=/run/user/1001/rootlesskit/foo/api.sock list-ports ID PROTO PARENTIP PARENTPORT CHILDPORT 1 tcp 0.0.0.0 8080 80 rootlesskit$ rootlessctl --socket=/run/user/1001/rootlesskit/foo/api.sock remove-ports 1 1 ``` You can also expose ports using `socat` and `nsenter` instead of RootlessKit's port drivers. ```console $ pid=$(cat /run/user/1001/rootlesskit/foo/child_pid) $ socat -t -- TCP-LISTEN:8080,reuseaddr,fork EXEC:"nsenter -U -n -t $pid socat -t -- STDIN TCP4\:127.0.0.1\:80" ``` ### Exposing privileged ports To expose privileged ports (< 1024), add `net.ipv4.ip_unprivileged_port_start=0` to `/etc/sysctl.conf` (or `/etc/sysctl.d`) and run `sudo sysctl --system`. If you are using `builtin` driver, you can expose the privileged ports without changing the sysctl value, but you need to set `CAP_NET_BIND_SERVICE` on `rootlesskit` binary. ```console $ sudo setcap cap_net_bind_service=ep $(pwd rootlesskit) ``` ### Note about IPv6 Specifying `0.0.0.0:8080:80/tcp` may cause listening on IPv6 as well as on IPv4. Same applies to `[::]:8080:80/tcp`. This behavior may sound weird but corresponds to [Go's behavior](https://github.com/golang/go/commit/071908f3d809245eda42bf6eab071c323c67b7d2), so this is not a bug. To specify IPv4 explicitly, use `tcp4` instead of `tcp`, e.g., `0.0.0.0:8080:80/tcp4`. To specify IPv6 explicitly, use `tcp6`, e.g., `[::]:8080:80/tcp6`. The `tcp4` and `tcp6` forms were introduced in RootlessKit v0.14.0. The `tcp6` is currently supported only for `builtin` port driver. rootlesskit-0.14.6/docs/process.md000066400000000000000000000020601417776672600171410ustar00rootroot00000000000000## PID Namespace When `--pidns` (since v0.5.0) is specified, RootlessKit executes the child process in a new PID namespace. The RootlessKit child process becomes the init (PID=1). When RootlessKit terminates, all the processes in the namespace are killed with `SIGKILL`. See also [`pid_namespaces(7)`](http://man7.org/linux/man-pages/man7/pid_namespaces.7.html). ## Cgroup Namespace When `--cgroupns` (since v0.10.0) is specified, RootlessKit executes the child process in a new cgroup namespace. ### Cgroup2 evacuation Cgroup2 evacuation is supported since v0.13.0. e.g., `systemd-run -p Delegate=yes --user -t rootlesskit --cgroupns --pidns --evacuate-cgroup2=evac --net=slirp4netns bash` When the current process belongs to `/foo` group (visible under `/sys/fs/cgroup/foo`) and evacuation group name is like `bar`, - All processes in the `/foo` group are moved to `/foo/bar` group, by writing PIDs into `/sys/fs/cgroup/foo/bar/cgroup.procs` - As many controllers as possible are enabled for `/foo/*` groups, by writing `/sys/fs/cgroup/foo/cgroup.subtree_control` rootlesskit-0.14.6/go.mod000066400000000000000000000012621417776672600153220ustar00rootroot00000000000000module github.com/rootless-containers/rootlesskit go 1.16 require ( github.com/cpuguy83/go-md2man/v2 v2.0.0 // indirect github.com/gofrs/flock v0.8.1 github.com/google/uuid v1.3.0 github.com/gorilla/mux v1.8.0 github.com/insomniacslk/dhcp v0.0.0-20211026125128-ad197bcd36fd github.com/moby/sys/mountinfo v0.5.0 github.com/moby/vpnkit v0.5.0 github.com/russross/blackfriday/v2 v2.1.0 // indirect github.com/sirupsen/logrus v1.8.1 github.com/songgao/water v0.0.0-20200317203138-2b4b6d7c09d8 github.com/stretchr/testify v1.7.0 github.com/urfave/cli/v2 v2.3.0 golang.org/x/net v0.0.0-20211105192438-b53810dc28af // indirect golang.org/x/sys v0.0.0-20211107104306-e0b2ad06fe42 ) rootlesskit-0.14.6/go.sum000066400000000000000000000242711417776672600153540ustar00rootroot00000000000000github.com/BurntSushi/toml v0.3.1/go.mod h1:xHWCNGjB5oqiDr8zfno3MHue2Ht5sIBksp03qcyfWMU= github.com/cpuguy83/go-md2man/v2 v2.0.0-20190314233015-f79a8a8ca69d/go.mod h1:maD7wRr/U5Z6m/iR4s+kqSMx2CaBsrgA7czyZG/E6dU= github.com/cpuguy83/go-md2man/v2 v2.0.0 h1:EoUDS0afbrsXAZ9YQ9jdu/mZ2sXgT1/2yyNng4PGlyM= github.com/cpuguy83/go-md2man/v2 v2.0.0/go.mod h1:maD7wRr/U5Z6m/iR4s+kqSMx2CaBsrgA7czyZG/E6dU= github.com/davecgh/go-spew v1.1.0/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38= github.com/davecgh/go-spew v1.1.1 h1:vj9j/u1bqnvCEfJOwUhtlOARqs3+rkHYY13jYWTU97c= github.com/davecgh/go-spew v1.1.1/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38= github.com/fanliao/go-promise v0.0.0-20141029170127-1890db352a72/go.mod h1:PjfxuH4FZdUyfMdtBio2lsRr1AKEaVPwelzuHuh8Lqc= github.com/gofrs/flock v0.8.1 h1:+gYjHKf32LDeiEEFhQaotPbLuUXjY5ZqxKgXy7n59aw= github.com/gofrs/flock v0.8.1/go.mod h1:F1TvTiK9OcQqauNUHlbJvyl9Qa1QvF/gOUDKA14jxHU= github.com/google/go-cmp v0.2.0/go.mod h1:oXzfMopK8JAjlY9xF4vHSVASa0yLyX7SntLO5aqRK0M= github.com/google/go-cmp v0.3.0/go.mod h1:8QqcDgzrUqlUb/G2PQTWiueGozuR1884gddMywk6iLU= github.com/google/go-cmp v0.3.1/go.mod h1:8QqcDgzrUqlUb/G2PQTWiueGozuR1884gddMywk6iLU= github.com/google/go-cmp v0.4.0/go.mod h1:v8dTdLbMG2kIc/vJvl+f65V22dbkXbowE6jgT/gNBxE= github.com/google/go-cmp v0.5.2/go.mod h1:v8dTdLbMG2kIc/vJvl+f65V22dbkXbowE6jgT/gNBxE= github.com/google/uuid v1.3.0 h1:t6JiXgmwXMjEs8VusXIJk2BXHsn+wx8BZdTaoZ5fu7I= github.com/google/uuid v1.3.0/go.mod h1:TIyPZe4MgqvfeYDBFedMoGGpEw/LqOeaOT+nhxU+yHo= github.com/gopherjs/gopherjs v0.0.0-20181017120253-0766667cb4d1/go.mod h1:wJfORRmW1u3UXTncJ5qlYoELFm8eSnnEO6hX4iZ3EWY= github.com/gorilla/mux v1.8.0 h1:i40aqfkR1h2SlN9hojwV5ZA91wcXFOvkdNIeFDP5koI= github.com/gorilla/mux v1.8.0/go.mod h1:DVbg23sWSpFRCP0SfiEN6jmj59UnW/n46BH5rLB71So= github.com/hugelgupf/socketpair v0.0.0-20190730060125-05d35a94e714/go.mod h1:2Goc3h8EklBH5mspfHFxBnEoURQCGzQQH1ga9Myjvis= github.com/insomniacslk/dhcp v0.0.0-20211026125128-ad197bcd36fd h1:jupbuQFZtwOBg/3EmK91/rGaYFkqCb9bwHOnwn7Cav0= github.com/insomniacslk/dhcp v0.0.0-20211026125128-ad197bcd36fd/go.mod h1:h+MxyHxRg9NH3terB1nfRIUaQEcI0XOVkdR9LNBlp8E= github.com/jsimonetti/rtnetlink v0.0.0-20190606172950-9527aa82566a/go.mod h1:Oz+70psSo5OFh8DBl0Zv2ACw7Esh6pPUphlvZG9x7uw= github.com/jsimonetti/rtnetlink v0.0.0-20200117123717-f846d4f6c1f4/go.mod h1:WGuG/smIU4J/54PblvSbh+xvCZmpJnFgr3ds6Z55XMQ= github.com/jsimonetti/rtnetlink v0.0.0-20201009170750-9c6f07d100c1/go.mod h1:hqoO/u39cqLeBLebZ8fWdE96O7FxrAsRYhnVOdgHxok= github.com/jsimonetti/rtnetlink v0.0.0-20201110080708-d2c240429e6c/go.mod h1:huN4d1phzjhlOsNIjFsw2SVRbwIHj3fJDMEU2SDPTmg= github.com/jtolds/gls v4.20.0+incompatible/go.mod h1:QJZ7F/aHp+rZTRtaJ1ow/lLfFfVYBRgL+9YlvaHOwJU= github.com/mdlayher/ethernet v0.0.0-20190606142754-0394541c37b7/go.mod h1:U6ZQobyTjI/tJyq2HG+i/dfSoFUt8/aZCM+GKtmFk/Y= github.com/mdlayher/netlink v0.0.0-20190409211403-11939a169225/go.mod h1:eQB3mZE4aiYnlUsyGGCOpPETfdQq4Jhsgf1fk3cwQaA= github.com/mdlayher/netlink v1.0.0/go.mod h1:KxeJAFOFLG6AjpyDkQ/iIhxygIUKD+vcwqcnu43w/+M= github.com/mdlayher/netlink v1.1.0/go.mod h1:H4WCitaheIsdF9yOYu8CFmCgQthAPIWZmcKp9uZHgmY= github.com/mdlayher/netlink v1.1.1/go.mod h1:WTYpFb/WTvlRJAyKhZL5/uy69TDDpHHu2VZmb2XgV7o= github.com/mdlayher/raw v0.0.0-20190606142536-fef19f00fc18/go.mod h1:7EpbotpCmVZcu+KCX4g9WaRNuu11uyhiW7+Le1dKawg= github.com/mdlayher/raw v0.0.0-20191009151244-50f2db8cc065/go.mod h1:7EpbotpCmVZcu+KCX4g9WaRNuu11uyhiW7+Le1dKawg= github.com/moby/sys/mountinfo v0.5.0 h1:2Ks8/r6lopsxWi9m58nlwjaeSzUX9iiL1vj5qB/9ObI= github.com/moby/sys/mountinfo v0.5.0/go.mod h1:3bMD3Rg+zkqx8MRYPi7Pyb0Ie97QEBmdxbhnCLlSvSU= github.com/moby/vpnkit v0.5.0 h1:VcDpS9y+PmT9itf+mH5Qdh9GME7ungLMt9yjf9o4REY= github.com/moby/vpnkit v0.5.0/go.mod h1:KyjUrL9cb6ZSNNAUwZfqRjhwwgJ3BJN+kXh0t43WTUQ= github.com/pmezard/go-difflib v1.0.0 h1:4DBwDE0NGyQoBHbLQYPwSUPoCMWR5BEzIk/f1lZbAQM= github.com/pmezard/go-difflib v1.0.0/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4= github.com/russross/blackfriday/v2 v2.0.1/go.mod h1:+Rmxgy9KzJVeS9/2gXHxylqXiyQDYRxCVz55jmeOWTM= github.com/russross/blackfriday/v2 v2.1.0 h1:JIOH55/0cWyOuilr9/qlrm0BSXldqnqwMsf35Ld67mk= github.com/russross/blackfriday/v2 v2.1.0/go.mod h1:+Rmxgy9KzJVeS9/2gXHxylqXiyQDYRxCVz55jmeOWTM= github.com/shurcooL/sanitized_anchor_name v1.0.0/go.mod h1:1NzhyTcUVG4SuEtjjoZeVRXNmyL/1OwPU0+IJeTBvfc= github.com/sirupsen/logrus v1.8.1 h1:dJKuHgqk1NNQlqoA6BTlM1Wf9DOH3NBjQyu0h9+AZZE= github.com/sirupsen/logrus v1.8.1/go.mod h1:yWOB1SBYBC5VeMP7gHvWumXLIWorT60ONWic61uBYv0= github.com/smartystreets/assertions v0.0.0-20180927180507-b2de0cb4f26d/go.mod h1:OnSkiWE9lh6wB0YB77sQom3nweQdgAjqCqsofrRNTgc= github.com/smartystreets/goconvey v1.6.4/go.mod h1:syvi0/a8iFYH4r/RixwvyeAJjdLS9QV7WQ/tjFTllLA= github.com/songgao/water v0.0.0-20200317203138-2b4b6d7c09d8 h1:TG/diQgUe0pntT/2D9tmUCz4VNwm9MfrtPr0SU2qSX8= github.com/songgao/water v0.0.0-20200317203138-2b4b6d7c09d8/go.mod h1:P5HUIBuIWKbyjl083/loAegFkfbFNx5i2qEP4CNbm7E= github.com/stretchr/objx v0.1.0/go.mod h1:HFkY916IF+rwdDfMAkV7OtwuqBVzrE8GR6GFx+wExME= github.com/stretchr/testify v1.2.2/go.mod h1:a8OnRcib4nhh0OaRAV+Yts87kKdq0PP7pXfy6kDkUVs= github.com/stretchr/testify v1.6.1/go.mod h1:6Fq8oRcR53rry900zMqJjRRixrwX3KX962/h/Wwjteg= github.com/stretchr/testify v1.7.0 h1:nwc3DEeHmmLAfoZucVR881uASk0Mfjw8xYJ99tb5CcY= github.com/stretchr/testify v1.7.0/go.mod h1:6Fq8oRcR53rry900zMqJjRRixrwX3KX962/h/Wwjteg= github.com/u-root/uio v0.0.0-20210528114334-82958018845c h1:BFvcl34IGnw8yvJi8hlqLFo9EshRInwWBs2M5fGWzQA= github.com/u-root/uio v0.0.0-20210528114334-82958018845c/go.mod h1:LpEX5FO/cB+WF4TYGY1V5qktpaZLkKkSegbr0V4eYXA= github.com/urfave/cli/v2 v2.3.0 h1:qph92Y649prgesehzOrQjdWyxFOp/QVM+6imKHad91M= github.com/urfave/cli/v2 v2.3.0/go.mod h1:LJmUH05zAU44vOAcrfzZQKsZbVcdbOG8rtL3/XcUArI= golang.org/x/crypto v0.0.0-20190308221718-c2843e01d9a2/go.mod h1:djNgcEr1/C05ACkg1iLfiJU5Ep61QUkGW8qpdssI0+w= golang.org/x/crypto v0.0.0-20200622213623-75b288015ac9/go.mod h1:LzIPMQfyMNhhGPhUkYOs5KpL4U8rLKemX1yGLhDgUto= golang.org/x/net v0.0.0-20190311183353-d8887717615a/go.mod h1:t9HGtf8HONx5eT2rtn7q6eTqICYqUVnKs3thJo3Qplg= golang.org/x/net v0.0.0-20190404232315-eb5bcb51f2a3/go.mod h1:t9HGtf8HONx5eT2rtn7q6eTqICYqUVnKs3thJo3Qplg= golang.org/x/net v0.0.0-20190419010253-1f3472d942ba/go.mod h1:t9HGtf8HONx5eT2rtn7q6eTqICYqUVnKs3thJo3Qplg= golang.org/x/net v0.0.0-20190603091049-60506f45cf65/go.mod h1:HSz+uSET+XFnRR8LxR5pz3Of3rY3CfYBVs4xY44aLks= golang.org/x/net v0.0.0-20190827160401-ba9fcec4b297/go.mod h1:z5CRVTTTmAJ677TzLLGU+0bjPO0LkuOLi4/5GtJWs/s= golang.org/x/net v0.0.0-20191007182048-72f939374954/go.mod h1:z5CRVTTTmAJ677TzLLGU+0bjPO0LkuOLi4/5GtJWs/s= golang.org/x/net v0.0.0-20200202094626-16171245cfb2/go.mod h1:z5CRVTTTmAJ677TzLLGU+0bjPO0LkuOLi4/5GtJWs/s= golang.org/x/net v0.0.0-20201010224723-4f7140c49acb/go.mod h1:sp8m0HH+o8qH0wwXwYZr8TS3Oi6o0r6Gce1SSxlDquU= golang.org/x/net v0.0.0-20201110031124-69a78807bb2b/go.mod h1:sp8m0HH+o8qH0wwXwYZr8TS3Oi6o0r6Gce1SSxlDquU= golang.org/x/net v0.0.0-20211105192438-b53810dc28af h1:SMeNJG/vclJ5wyBBd4xupMsSJIHTd1coW9g7q6KOjmY= golang.org/x/net v0.0.0-20211105192438-b53810dc28af/go.mod h1:9nx3DQGgdP8bBQD5qxJ1jj9UTztislL4KSBs9R2vV5Y= golang.org/x/sys v0.0.0-20190215142949-d0b11bdaac8a/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY= golang.org/x/sys v0.0.0-20190312061237-fead79001313/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= golang.org/x/sys v0.0.0-20190411185658-b44545bcd369/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= golang.org/x/sys v0.0.0-20190412213103-97732733099d/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= golang.org/x/sys v0.0.0-20190418153312-f0ce4c0180be/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= golang.org/x/sys v0.0.0-20190606122018-79a91cf218c4/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= golang.org/x/sys v0.0.0-20190826190057-c7b8b68b1456/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= golang.org/x/sys v0.0.0-20191008105621-543471e840be/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= golang.org/x/sys v0.0.0-20191026070338-33540a1f6037/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= golang.org/x/sys v0.0.0-20200202164722-d101bd2416d5/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= golang.org/x/sys v0.0.0-20200930185726-fdedc70b468f/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= golang.org/x/sys v0.0.0-20201009025420-dfb3f7c4e634/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= golang.org/x/sys v0.0.0-20201101102859-da207088b7d1/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= golang.org/x/sys v0.0.0-20201119102817-f84b799fce68/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= golang.org/x/sys v0.0.0-20210423082822-04245dca01da/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= golang.org/x/sys v0.0.0-20210525143221-35b2ab0089ea/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= golang.org/x/sys v0.0.0-20211025201205-69cdffdb9359/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= golang.org/x/sys v0.0.0-20211107104306-e0b2ad06fe42 h1:G2DDmludOQZoWbpCr7OKDxnl478ZBGMcOhrv+ooX/Q4= golang.org/x/sys v0.0.0-20211107104306-e0b2ad06fe42/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= golang.org/x/term v0.0.0-20201126162022-7de9c90e9dd1/go.mod h1:bj7SfCRtBDWHUb9snDiAeCFNEtKQo2Wmx5Cou7ajbmo= golang.org/x/text v0.3.0/go.mod h1:NqM8EUOU14njkJ3fqMW+pc6Ldnwhi/IjpwHt7yyuwOQ= golang.org/x/text v0.3.3/go.mod h1:5Zoc/QRtKVWzQhOtBMvqHzDpF6irO9z98xDceosuGiQ= golang.org/x/text v0.3.6/go.mod h1:5Zoc/QRtKVWzQhOtBMvqHzDpF6irO9z98xDceosuGiQ= golang.org/x/tools v0.0.0-20180917221912-90fa682c2a6e/go.mod h1:n7NCudcB/nEzxVGmLbDWY5pfWTLqBcC2KZ6jyYvM4mQ= golang.org/x/tools v0.0.0-20190328211700-ab21143f2384/go.mod h1:LCzVGOaR6xXOjkQ3onu1FJEFr0SW1gC7cKk1uF8kGRs= golang.org/x/xerrors v0.0.0-20191204190536-9bdfabe68543/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0= gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405 h1:yhCVgyC4o1eVCa2tZl7eS0r+SDo693bJlVdllGtEeKM= gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0= gopkg.in/yaml.v2 v2.2.3/go.mod h1:hI93XBmqTisBFMUTm0b8Fm+jr3Dg1NNxqwp+5A1VGuI= gopkg.in/yaml.v3 v3.0.0-20200313102051-9f266ea9e77c h1:dUUwHk2QECo/6vqA44rthZ8ie2QXMNeKRTHCNY2nXvo= gopkg.in/yaml.v3 v3.0.0-20200313102051-9f266ea9e77c/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM= rootlesskit-0.14.6/hack/000077500000000000000000000000001417776672600151215ustar00rootroot00000000000000rootlesskit-0.14.6/hack/benchmark-iperf3-net.sh000077500000000000000000000032221417776672600213630ustar00rootroot00000000000000#!/bin/bash source $(realpath $(dirname $0))/common.inc.sh function benchmark::iperf3::slirp4netns() { INFO "[benchmark:iperf3] slirp4netns ($@)" set -x $ROOTLESSKIT --net=slirp4netns $@ -- $IPERF3C 10.0.2.2 set +x } function benchmark::iperf3::vpnkit() { INFO "[benchmark:iperf3] vpnkit ($@)" set -x $ROOTLESSKIT --net=vpnkit $@ -- $IPERF3C 192.168.65.2 set +x } function benchmark::iperf3::lxc-user-nic() { INFO "[benchmark:iperf3] lxc-user-nic ($@)" dev=lxcbr0 set -x # ignore "lxc-net is already running" error sudo /usr/lib/$(uname -m)-linux-gnu/lxc/lxc-net start || true ip=$(ip -4 -o addr show $dev | awk '{print $4}' | cut -d "/" -f 1) $ROOTLESSKIT --net=lxc-user-nic $@ -- $IPERF3C $ip set +x } function benchmark::iperf3::rootful_veth() { INFO "[benchmark:iperf3] rootful_veth ($@) for reference" # only --mtu=MTU is supposed as $@ mtu=$(echo $@ | sed -e s/--mtu=//g) set -x sudo ip netns add foo sudo ip link add foo_veth0 type veth peer name foo_veth1 sudo ip link set foo_veth1 netns foo sudo ip addr add 10.0.42.1/24 dev foo_veth0 sudo ip -netns foo addr add 10.0.42.2/24 dev foo_veth1 sudo ip link set dev foo_veth0 mtu $mtu sudo ip -netns foo link set dev foo_veth1 mtu $mtu sudo ip link set foo_veth0 up sudo ip -netns foo link set foo_veth1 up sudo ip netns exec foo $IPERF3C 10.0.42.1 sudo ip link del foo_veth0 sudo ip netns del foo set +x } if [ $# -lt 2 ]; then ERROR "Usage: $0 NETDRIVER MTU [FLAGS...]" exit 1 fi net=$1 mtu=$2 shift 2 flags=$@ INFO "net=${net} mtu=${mtu} flags=$@" iperf3 -s >/dev/null & iperf3pid=$! function cleanup() { kill $iperf3pid } trap cleanup EXIT benchmark::iperf3::$net --mtu=$mtu $flags rootlesskit-0.14.6/hack/benchmark-iperf3-port-udp.sh000077500000000000000000000013241417776672600223500ustar00rootroot00000000000000#!/bin/bash source $(realpath $(dirname $0))/common.inc.sh function benchmark::iperf3::port::udp() { statedir=$(mktemp -d) INFO "[benchmark:iperf3::port::udp] $@" $ROOTLESSKIT --state-dir=$statedir $@ iperf3 -s >/dev/null & rkpid=$! # wait for socket to be available sleep 3 rootlessctl="rootlessctl --socket=$statedir/api.sock" portids=$($rootlessctl add-ports 127.0.0.1:5201:5201/tcp 127.0.0.1:5201:5201/udp) $rootlessctl list-ports sleep 3 $IPERF3C 127.0.0.1 -u -b 100G $rootlessctl remove-ports $portids kill $rkpid } if [ $# -lt 1 ]; then ERROR "Usage: $0 PORTDRIVER [FLAGS...]" exit 1 fi port=$1 shift 1 flags=$@ benchmark::iperf3::port::udp --net=slirp4netns --mtu=65520 --port-driver=${port} $flags rootlesskit-0.14.6/hack/benchmark-iperf3-port.sh000077500000000000000000000012271417776672600215640ustar00rootroot00000000000000#!/bin/bash source $(realpath $(dirname $0))/common.inc.sh function benchmark::iperf3::port() { statedir=$(mktemp -d) INFO "[benchmark:iperf3::port] $@" $ROOTLESSKIT --state-dir=$statedir $@ iperf3 -s >/dev/null & rkpid=$! # wait for socket to be available sleep 3 rootlessctl="rootlessctl --socket=$statedir/api.sock" portid=$($rootlessctl add-ports 127.0.0.1:5201:5201/tcp) $rootlessctl list-ports $IPERF3C 127.0.0.1 $rootlessctl remove-ports $portid kill $rkpid } if [ $# -lt 1 ]; then ERROR "Usage: $0 PORTDRIVER [FLAGS...]" exit 1 fi port=$1 shift 1 flags=$@ benchmark::iperf3::port --net=slirp4netns --mtu=65520 --port-driver=${port} $flags rootlesskit-0.14.6/hack/common.inc.sh000066400000000000000000000003411417776672600175130ustar00rootroot00000000000000#!/bin/bash set -eu -o pipefail function INFO() { echo -e "\e[104m\e[97m[INFO]\e[49m\e[39m $@" } function ERROR() { echo >&2 -e "\e[101m\e[97m[ERROR]\e[49m\e[39m $@" } ROOTLESSKIT="rootlesskit" IPERF3C="iperf3 -t 30 -c" rootlesskit-0.14.6/hack/integration-docker.sh000077500000000000000000000013371417776672600212540ustar00rootroot00000000000000#!/bin/bash source $(realpath $(dirname $0))/common.inc.sh nonloopback="$(hostname -I | awk '{print $1}')" docker rm -f nginx >/dev/null 2>&1 || true CURL="curl -fsSL" set -x docker run -d --name=nginx -p 8080:80 nginx:alpine sleep 2 $CURL "http://127.0.0.1:8080" $CURL "http://${nonloopback}:8080" docker rm -f nginx docker run -d --name=nginx -p 127.0.0.1:8080:80 nginx:alpine sleep 2 $CURL "http://127.0.0.1:8080" $CURL "http://${nonloopback}:8080" && ( ERROR "should fail"; exit 1 ) docker rm -f nginx docker run -d --name=nginx -p "${nonloopback}:8080:80" nginx:alpine sleep 2 $CURL "http://127.0.0.1:8080" && ( ERROR "should fail"; exit 1 ) $CURL "http://${nonloopback}:8080" docker rm -f nginx INFO "===== PASSING =====" rootlesskit-0.14.6/hack/integration-evacuate-cgroup2.sh000077500000000000000000000007301417776672600231550ustar00rootroot00000000000000#!/bin/bash source $(realpath $(dirname $0))/common.inc.sh # Test requires systemd, so skipped on CI. # Should work on both unified mode and "hybrid" mode. # NOTE: extra sed is for eliminating tty escape sequence group="$(systemd-run --user -t -q -- $ROOTLESSKIT --cgroupns --pidns --evacuate-cgroup2=evac grep -oP '0::\K.*' /proc/self/cgroup | sed 's/[^[:print:]]//g')" if [ "$group" != "/evac" ]; then ERROR "expected group \"/evac\", got \"${group}\"." exit 1 fi rootlesskit-0.14.6/hack/integration-exit-code.sh000077500000000000000000000021121417776672600216560ustar00rootroot00000000000000#!/bin/bash source $(realpath $(dirname $0))/common.inc.sh function test_exit_code() { args="$@" INFO "Testig exit status for args=${args}" set +e for f in 0 42; do $ROOTLESSKIT $args sh -exc "exit $f" >/dev/null 2>&1 code=$? if [ $code != $f ]; then ERROR "expected code $f, got $code" exit 1 fi done } test_exit_code --pidns=false test_exit_code --pidns=true --reaper=auto test_exit_code --pidns=true --reaper=true test_exit_code --pidns=true --reaper=false function test_signal() { args="$@" INFO "Testig signal for args=${args}" set +e tmp=$(mktemp -d) $ROOTLESSKIT --state-dir=${tmp}/state $args sleep infinity >${tmp}/out 2>&1 & pid=$! sleep 1 kill -SIGUSR1 $(cat ${tmp}/state/child_pid) wait $pid code=$? if [ $code != 255 ]; then ERROR "expected code 255, got $code" exit 1 fi if ! grep -q "user defined signal 1" ${tmp}/out; then ERROR "didn't get SIGUSR1?" cat ${tmp}/out exit 1 fi rm -rf $tmp } test_signal --pidns=false test_signal --pidns=true --reaper=auto test_signal --pidns=true --reaper=true test_signal --pidns=true --reaper=false rootlesskit-0.14.6/hack/integration-ipv6.sh000077500000000000000000000010661417776672600206700ustar00rootroot00000000000000#!/bin/bash source $(realpath $(dirname $0))/common.inc.sh set -x parent_ipv6="fdaa:aaaa:aaaa::1" parent_dummy="dummy42" sudo ip link add ${parent_dummy} type dummy sudo ip link set dev ${parent_dummy} up sudo ip addr add "${parent_ipv6}/64" dev ${parent_dummy} tmp=$(mktemp -d) echo "hello ipv6" >${tmp}/index.html busybox httpd -f -p "[${parent_ipv6}]:8080" -h "${tmp}" & pid=$! $ROOTLESSKIT \ --net=slirp4netns \ --ipv6 \ sh -euc "sleep 3; exec curl -fsSL http://[${parent_ipv6}]:8080" kill -9 $pid || true sudo ip link del ${parent_dummy} rm -rf ${tmp} rootlesskit-0.14.6/hack/integration-port.sh000077500000000000000000000057551417776672600210010ustar00rootroot00000000000000#!/bin/bash source $(realpath $(dirname $0))/common.inc.sh # test_port PORT_DRIVER CURL_URL EXPECTATION [ROOTLESSKIT ARGS...] function test_port() { args="$@" port_driver="$1" curl_url="$2" expectation="$3" shift shift shift rootlesskit_args="$@" INFO "Testing port_driver=\"${port_driver}\" curl_url=\"${curl_url}\" expectation=\"${expectation}\" rootlesskit_args=\"${rootlesskit_args}\"" tmp=$(mktemp -d) state_dir=${tmp}/state html_dir=${tmp}/html mkdir -p ${html_dir} echo "test_port ($args)" >${html_dir}/index.html $ROOTLESSKIT \ --state-dir=${state_dir} \ --net=slirp4netns \ --disable-host-loopback \ --copy-up=/etc \ --port-driver=${port_driver} \ ${rootlesskit_args} \ busybox httpd -f -v -p 80 -h ${html_dir} \ 2>&1 & pid=$! sleep 1 set +e curl -fsSL ${curl_url} code=$? set -e if [ "${expectation}" = "should success" ]; then if [ ${code} != 0 ]; then ERROR "curl exited with ${code}" exit ${code} fi elif [ "${expectation}" = "should fail" ]; then if [ ${code} = 0 ]; then ERROR "curl should not success" exit 1 fi else ERROR "internal error" exit 1 fi INFO "Test pasing, stopping httpd (\"exit status 255\" is negligible here)" kill -SIGTERM $(cat ${state_dir}/child_pid) wait $pid >/dev/null 2>&1 || true rm -rf $tmp } INFO "===== Port driver: builtin =====" INFO "=== protocol \"tcp\" listens on both v4 and v6 ===" test_port builtin http://127.0.0.1:8080 "should success" -p 0.0.0.0:8080:80/tcp test_port builtin http://[::1]:8080 "should success" -p 0.0.0.0:8080:80/tcp INFO "=== protocol \"tcp4\" is strictly v4-only ===" test_port builtin http://127.0.0.1:8080 "should success" -p 0.0.0.0:8080:80/tcp4 test_port builtin http://[::1]:8080 "should fail" -p 0.0.0.0:8080:80/tcp4 INFO "=== protocol \"tcp6\" is strictly v6-only ===" test_port builtin http://127.0.0.1:8080 "should fail" -p [::]:8080:80/tcp6 test_port builtin http://[::1]:8080 "should success" -p [::]:8080:80/tcp6 INFO "=== v6-to-v6 ===" test_port builtin http://[::1]:8080 "should success" -p [::]:8080:[::1]:80/tcp6 test_port builtin http://[::1]:8080 "should success" -p [::]:8080:[::1]:80/tcp INFO "=== v6-to-v4 ===" test_port builtin http://[::1]:8080 "should success" -p [::]:8080:[127.0.0.1]:80/tcp6 test_port builtin http://[::1]:8080 "should success" -p [::]:8080:[127.0.0.1]:80/tcp INFO "=== v4-to-v6 ===" test_port builtin http://127.0.0.1:8080 "should success" -p 0.0.0.0:8080:[::1]:80/tcp4 test_port builtin http://127.0.0.1:8080 "should success" -p 0.0.0.0:8080:[::1]:80/tcp INFO "=== \"tcp4\" and \"tcp6\" do not conflict ===" test_port builtin http://127.0.0.1:8080 "should success" -p 0.0.0.0:8080:80/tcp4 -p [::]:8080:80/tcp6 INFO "===== Port driver: slirp4netns (IPv4 only)=====" INFO "=== protocol \"tcp\" listens on v4 ===" test_port slirp4netns http://127.0.0.1:8080 "should success" -p 0.0.0.0:8080:80/tcp INFO "=== protocol \"tcp4\" is strictly v4-only ===" test_port slirp4netns http://[::1]:8080 "should fail" -p 0.0.0.0:8080:80/tcp4 INFO "===== PASSING =====" rootlesskit-0.14.6/hack/integration-propagation.sh000077500000000000000000000026501417776672600223270ustar00rootroot00000000000000#!/bin/bash source $(realpath $(dirname $0))/common.inc.sh function test_propagation() { propagation=$1 INFO "Testing --propagation=$propagation" d=$(mktemp -d) state=$d/state $ROOTLESSKIT --state-dir=$state --propagation=$propagation -- sleep infinity & job=$! until test -f $state/child_pid; do sleep 0.1; done pid=$(cat $state/child_pid) mkdir -p $d/a touch $d/a/before_mount sudo mount -t tmpfs none $d/a touch $d/a/after_mount case $propagation in private | rprivate) test -f /proc/$pid/root/$d/a/before_mount test ! -f /proc/$pid/root/$d/a/after_mount ;; slave | rslave | shared | rshared) test ! -f /proc/$pid/root/$d/a/before_mount test -f /proc/$pid/root/$d/a/after_mount ;; *) ERROR "Unknown propagation $propagation" exit 1 ;; esac sudo umount $d/a kill $job wait rm -rf $d INFO "Testing --propagation=$propagation with copy-up" case $propagation in private | rprivate | slave | rslave) $ROOTLESSKIT --propagation=$propagation --copy-up=/run echo test ;; shared | rshared) INFO "(skipping, because known not to work)" ;; *) ERROR "Unknown propagation $propagation" exit 1 ;; esac } test_propagation private test_propagation rprivate if findmnt -n -l -o propagation / | grep shared >/dev/null; then test_propagation slave test_propagation rslave test_propagation shared test_propagation rshared else INFO "the propagation of / is not shared; skipping non-private tests" fi rootlesskit-0.14.6/hack/integration-restart.sh000077500000000000000000000011441417776672600214650ustar00rootroot00000000000000#!/bin/bash source $(realpath $(dirname $0))/common.inc.sh # 220: "state dir gets broken when the parent process gets SIGKILLED and then restarted && --state-dir is set explicitly && --port-driver is set" INFO "Test for https://github.com/rootless-containers/rootlesskit/issues/220" state_dir=$(mktemp -d) $ROOTLESSKIT --state-dir=${state_dir} --port-driver=builtin --net=slirp4netns sleep infinity & pid=$! sleep 2 kill -9 $pid # make sure API socket is functional after killing the parent and restarting. $ROOTLESSKIT --state-dir=${state_dir} --port-driver=builtin --net=slirp4netns rootlessctl list-ports rootlesskit-0.14.6/hack/make-cross.sh000077500000000000000000000006141417776672600175250ustar00rootroot00000000000000#!/bin/sh set -eux cd "$(dirname $0)/.." CGO_ENABLED=0 export CGO_ENABLED rm -rf _artifact mkdir -p _artifact x() { goarch="$1" uname_m="$2" rm -rf bin GOARCH="$goarch" make all file bin/* | grep -v dynamic (cd bin && tar czvf "../_artifact/rootlesskit-${uname_m}.tar.gz" *) } x amd64 x86_64 x arm64 aarch64 x s390x s390x x ppc64le ppc64le GOARM=7 export GOARM x arm armv7l rm -rf bin rootlesskit-0.14.6/pkg/000077500000000000000000000000001417776672600147745ustar00rootroot00000000000000rootlesskit-0.14.6/pkg/api/000077500000000000000000000000001417776672600155455ustar00rootroot00000000000000rootlesskit-0.14.6/pkg/api/api.go000066400000000000000000000025041417776672600166460ustar00rootroot00000000000000package api import "net" const ( // Version of the REST API, not implementation version. // See openapi.yaml for the definition. Version = "1.1.1" ) // ErrorJSON is returned with "application/json" content type and non-2XX status code type ErrorJSON struct { Message string `json:"message"` } // Info is the structure returned by `GET /info` type Info struct { APIVersion string `json:"apiVersion"` // REST API version Version string `json:"version"` // Implementation version StateDir string `json:"stateDir"` ChildPID int `json:"childPID"` NetworkDriver *NetworkDriverInfo `json:"networkDriver,omitempty"` PortDriver *PortDriverInfo `json:"portDriver,omitempty"` } // NetworkDriverInfo in Info type NetworkDriverInfo struct { Driver string `json:"driver"` DNS []net.IP `json:"dns,omitempty"` ChildIP net.IP `json:"childIP,omitempty"` // since API v1.1.1 (RootlessKit v0.14.1) DynamicChildIP bool `json:"dynamicChildIP,omitempty"` // since API v1.1.1 } // PortDriverInfo in Info type PortDriverInfo struct { Driver string `json:"driver"` Protos []string `json:"protos"` DisallowLoopbackChildIP bool `json:"disallowLoopbackChildIP,omitempty"` // since API v1.1.1 } rootlesskit-0.14.6/pkg/api/client/000077500000000000000000000000001417776672600170235ustar00rootroot00000000000000rootlesskit-0.14.6/pkg/api/client/client.go000066400000000000000000000116531417776672600206360ustar00rootroot00000000000000package client import ( "bytes" "context" "encoding/json" "errors" "fmt" "io" "net" "net/http" "os" "github.com/rootless-containers/rootlesskit/pkg/api" "github.com/rootless-containers/rootlesskit/pkg/port" ) type Client interface { HTTPClient() *http.Client PortManager() port.Manager Info(context.Context) (*api.Info, error) } // New creates a client. // socketPath is a path to the UNIX socket, without unix:// prefix. func New(socketPath string) (Client, error) { if _, err := os.Stat(socketPath); err != nil { return nil, err } hc := &http.Client{ Transport: &http.Transport{ DialContext: func(ctx context.Context, _, _ string) (net.Conn, error) { var d net.Dialer return d.DialContext(ctx, "unix", socketPath) }, }, } return NewWithHTTPClient(hc), nil } func NewWithHTTPClient(hc *http.Client) Client { return &client{ Client: hc, version: "v1", dummyHost: "rootlesskit", } } type client struct { *http.Client // version is always "v1" // TODO(AkihiroSuda): negotiate the version version string dummyHost string } func (c *client) HTTPClient() *http.Client { return c.Client } func (c *client) PortManager() port.Manager { return &portManager{ client: c, } } func (c *client) Info(ctx context.Context) (*api.Info, error) { u := fmt.Sprintf("http://%s/%s/info", c.dummyHost, c.version) req, err := http.NewRequest("GET", u, nil) if err != nil { return nil, err } req = req.WithContext(ctx) resp, err := c.HTTPClient().Do(req) if err != nil { return nil, err } defer resp.Body.Close() if err := successful(resp); err != nil { return nil, err } var info api.Info dec := json.NewDecoder(resp.Body) if err := dec.Decode(&info); err != nil { return nil, err } return &info, nil } func readAtMost(r io.Reader, maxBytes int) ([]byte, error) { lr := &io.LimitedReader{ R: r, N: int64(maxBytes), } b, err := io.ReadAll(lr) if err != nil { return b, err } if lr.N == 0 { return b, fmt.Errorf("expected at most %d bytes, got more", maxBytes) } return b, nil } // HTTPStatusErrorBodyMaxLength specifies the maximum length of HTTPStatusError.Body const HTTPStatusErrorBodyMaxLength = 64 * 1024 // HTTPStatusError is created from non-2XX HTTP response type HTTPStatusError struct { // StatusCode is non-2XX status code StatusCode int // Body is at most HTTPStatusErrorBodyMaxLength Body string } // Error implements error. // If e.Body is a marshalled string of api.ErrorJSON, Error returns ErrorJSON.Message . // Otherwise Error returns a human-readable string that contains e.StatusCode and e.Body. func (e *HTTPStatusError) Error() string { if e.Body != "" && len(e.Body) < HTTPStatusErrorBodyMaxLength { var ej api.ErrorJSON if json.Unmarshal([]byte(e.Body), &ej) == nil { return ej.Message } } return fmt.Sprintf("unexpected HTTP status %s, body=%q", http.StatusText(e.StatusCode), e.Body) } func successful(resp *http.Response) error { if resp == nil { return errors.New("nil response") } if resp.StatusCode/100 != 2 { b, _ := readAtMost(resp.Body, HTTPStatusErrorBodyMaxLength) return &HTTPStatusError{ StatusCode: resp.StatusCode, Body: string(b), } } return nil } type portManager struct { *client } func (pm *portManager) AddPort(ctx context.Context, spec port.Spec) (*port.Status, error) { m, err := json.Marshal(spec) if err != nil { return nil, err } u := fmt.Sprintf("http://%s/%s/ports", pm.client.dummyHost, pm.client.version) req, err := http.NewRequest("POST", u, bytes.NewReader(m)) if err != nil { return nil, err } req.Header.Set("Content-Type", "application/json") req = req.WithContext(ctx) resp, err := pm.client.HTTPClient().Do(req) if err != nil { return nil, err } defer resp.Body.Close() if err := successful(resp); err != nil { return nil, err } dec := json.NewDecoder(resp.Body) var status port.Status if err := dec.Decode(&status); err != nil { return nil, err } return &status, nil } func (pm *portManager) ListPorts(ctx context.Context) ([]port.Status, error) { u := fmt.Sprintf("http://%s/%s/ports", pm.client.dummyHost, pm.client.version) req, err := http.NewRequest("GET", u, nil) if err != nil { return nil, err } req = req.WithContext(ctx) resp, err := pm.client.HTTPClient().Do(req) if err != nil { return nil, err } defer resp.Body.Close() if err := successful(resp); err != nil { return nil, err } var statuses []port.Status dec := json.NewDecoder(resp.Body) if err := dec.Decode(&statuses); err != nil { return nil, err } return statuses, nil } func (pm *portManager) RemovePort(ctx context.Context, id int) error { u := fmt.Sprintf("http://%s/%s/ports/%d", pm.client.dummyHost, pm.client.version, id) req, err := http.NewRequest("DELETE", u, nil) if err != nil { return err } req = req.WithContext(ctx) resp, err := pm.client.HTTPClient().Do(req) if err != nil { return err } defer resp.Body.Close() if err := successful(resp); err != nil { return err } return nil } rootlesskit-0.14.6/pkg/api/openapi.yaml000066400000000000000000000106211417776672600200640ustar00rootroot00000000000000# When you made a change to this YAML, please validate with https://editor.swagger.io openapi: 3.0.3 info: version: 1.1.1 title: RootlessKit API servers: - url: 'http://rootlesskit/v1' description: Local UNIX socket server. The host part of the URL is ignored. paths: # /info: API >= 1.1.0 /info: get: responses: '200': description: Info. Available since API 1.1.0. content: application/json: schema: $ref: '#/components/schemas/Info' /ports: get: responses: '200': description: An array of PortStatus content: application/json: schema: $ref: '#/components/schemas/PortStatuses' post: requestBody: required: true content: application/json: schema: $ref: '#/components/schemas/PortSpec' responses: '201': description: PortStatus with ID content: application/json: schema: $ref: '#/components/schemas/PortStatus' '/ports/{id}': delete: parameters: - name: id in: path required: true schema: type: integer format: int64 responses: '200': description: Null response components: schemas: Proto: type: string description: "protocol for listening. Corresponds to Go's net.Listen. The strings with \"4\" and \"6\" suffixes were introduced in API 1.1.0." enum: - tcp - tcp4 - tcp6 - udp - udp4 - udp6 - sctp - sctp4 - sctp6 PortSpec: required: - proto properties: proto: $ref: '#/components/schemas/Proto' parentIP: type: string parentPort: type: integer format: int32 minimum: 1 maximum: 65535 childIP: type: string # future version may support requests with parentPort<=0 for automatic port assignment childPort: type: integer format: int32 minimum: 1 maximum: 65535 PortStatus: required: - id properties: id: type: integer format: int64 spec: $ref: '#/components/schemas/PortSpec' PortStatuses: type: array items: $ref: '#/components/schemas/PortStatus' # Info: API >= 1.1.0 Info: required: - apiVersion - version - stateDir - childPID properties: apiVersion: type: string description: "API version, without \"v\" prefix" example: "1.1.0" version: type: string description: "Implementation version, without \"v\" prefix" example: "0.42.0-beta.1+dev" stateDir: type: string description: "state dir" example: "/run/user/1000/rootlesskit" childPID: type: integer description: "child PID" example: 10042 networkDriver: $ref: '#/components/schemas/NetworkDriverInfo' portDriver: $ref: '#/components/schemas/PortDriverInfo' NetworkDriverInfo: required: - driver properties: driver: type: string description: "network driver. Empty when --net=host." example: "slirp4netns" # TODO: return TAP info dns: type: array description: "DNS addresses" items: type: string example: ["10.0.2.3"] childIP: type: string description: "Child IP (v4)" example: "10.0.2.100" dynamicChildIP: type: boolean description: "Child IP may change" PortDriverInfo: required: - driver - supportedProtos properties: driver: type: string description: "port driver" example: "builtin" protos: type: array description: "The supported protocol strings for listening ports" example: ["tcp","udp"] items: $ref: '#/components/schemas/Proto' disallowLoopbackChildIP: type: boolean description: "If this field is set to true, loopback IP such as 127.0.0.1 cannot be specified as a child IP" rootlesskit-0.14.6/pkg/api/router/000077500000000000000000000000001417776672600170655ustar00rootroot00000000000000rootlesskit-0.14.6/pkg/api/router/router.go000066400000000000000000000102621417776672600207350ustar00rootroot00000000000000package router import ( "context" "encoding/json" "errors" "fmt" "net/http" "strconv" "github.com/gorilla/mux" "github.com/rootless-containers/rootlesskit/pkg/api" "github.com/rootless-containers/rootlesskit/pkg/port" "github.com/rootless-containers/rootlesskit/pkg/version" ) // NetworkDriver is implemented by network.ParentDriver type NetworkDriver interface { Info(context.Context) (*api.NetworkDriverInfo, error) } // PortDriver is implemented by port.ParentDriver type PortDriver interface { Info(context.Context) (*api.PortDriverInfo, error) port.Manager } type Backend struct { StateDir string ChildPID int // NetworkDriver can be nil NetworkDriver NetworkDriver // PortDriver MUST be thread-safe. // PortDriver can be nil PortDriver PortDriver } func (b *Backend) onError(w http.ResponseWriter, r *http.Request, err error, ec int) { w.WriteHeader(ec) w.Header().Set("Content-Type", "application/json") // it is safe to return the err to the client, because the client is reliable e := api.ErrorJSON{ Message: err.Error(), } _ = json.NewEncoder(w).Encode(e) } func (b *Backend) onPortDriverNil(w http.ResponseWriter, r *http.Request) { b.onError(w, r, errors.New("no PortDriver is available"), http.StatusBadRequest) } // GetPorts is handler for GET /v{N}/ports func (b *Backend) GetPorts(w http.ResponseWriter, r *http.Request) { if b.PortDriver == nil { b.onPortDriverNil(w, r) return } ports, err := b.PortDriver.ListPorts(context.TODO()) if err != nil { b.onError(w, r, err, http.StatusInternalServerError) return } m, err := json.Marshal(ports) if err != nil { b.onError(w, r, err, http.StatusInternalServerError) return } w.Header().Set("Content-Type", "application/json") w.WriteHeader(http.StatusOK) w.Write(m) } // PostPort is the handler for POST /v{N}/ports func (b *Backend) PostPort(w http.ResponseWriter, r *http.Request) { if b.PortDriver == nil { b.onPortDriverNil(w, r) return } decoder := json.NewDecoder(r.Body) var portSpec port.Spec if err := decoder.Decode(&portSpec); err != nil { b.onError(w, r, err, http.StatusBadRequest) return } portStatus, err := b.PortDriver.AddPort(context.TODO(), portSpec) if err != nil { b.onError(w, r, err, http.StatusBadRequest) return } m, err := json.Marshal(portStatus) if err != nil { b.onError(w, r, err, http.StatusInternalServerError) return } w.Header().Set("Content-Type", "application/json") w.WriteHeader(http.StatusCreated) w.Write(m) } // DeletePort is the handler for POST /v{N}/ports/{id} func (b *Backend) DeletePort(w http.ResponseWriter, r *http.Request) { if b.PortDriver == nil { b.onPortDriverNil(w, r) return } idStr, ok := mux.Vars(r)["id"] if !ok { b.onError(w, r, errors.New("id not specified"), http.StatusBadRequest) return } id, err := strconv.Atoi(idStr) if err != nil { b.onError(w, r, fmt.Errorf("bad id %s: %w", idStr, err), http.StatusBadRequest) return } if err := b.PortDriver.RemovePort(context.TODO(), id); err != nil { b.onError(w, r, err, http.StatusBadRequest) return } w.WriteHeader(http.StatusOK) } func (b *Backend) GetInfo(w http.ResponseWriter, r *http.Request) { info := &api.Info{ APIVersion: api.Version, Version: version.Version, StateDir: b.StateDir, ChildPID: b.ChildPID, } if b.NetworkDriver != nil { ndInfo, err := b.NetworkDriver.Info(context.Background()) if err != nil { b.onError(w, r, err, http.StatusInternalServerError) return } info.NetworkDriver = ndInfo } if b.PortDriver != nil { pdInfo, err := b.PortDriver.Info(context.Background()) if err != nil { b.onError(w, r, err, http.StatusInternalServerError) return } info.PortDriver = pdInfo } m, err := json.Marshal(info) if err != nil { b.onError(w, r, err, http.StatusInternalServerError) return } w.Header().Set("Content-Type", "application/json") w.WriteHeader(http.StatusOK) w.Write(m) } func AddRoutes(r *mux.Router, b *Backend) { v1 := r.PathPrefix("/v1").Subrouter() v1.Path("/ports").Methods("GET").HandlerFunc(b.GetPorts) v1.Path("/ports").Methods("POST").HandlerFunc(b.PostPort) v1.Path("/ports/{id}").Methods("DELETE").HandlerFunc(b.DeletePort) v1.Path("/info").Methods("GET").HandlerFunc(b.GetInfo) } rootlesskit-0.14.6/pkg/child/000077500000000000000000000000001417776672600160575ustar00rootroot00000000000000rootlesskit-0.14.6/pkg/child/child.go000066400000000000000000000253041417776672600174750ustar00rootroot00000000000000package child import ( "context" "errors" "fmt" "os" "os/exec" "os/signal" "runtime" "strconv" "syscall" "github.com/sirupsen/logrus" "golang.org/x/sys/unix" "github.com/rootless-containers/rootlesskit/pkg/common" "github.com/rootless-containers/rootlesskit/pkg/copyup" "github.com/rootless-containers/rootlesskit/pkg/msgutil" "github.com/rootless-containers/rootlesskit/pkg/network" "github.com/rootless-containers/rootlesskit/pkg/port" "github.com/rootless-containers/rootlesskit/pkg/sigproxy" sigproxysignal "github.com/rootless-containers/rootlesskit/pkg/sigproxy/signal" ) var propagationStates = map[string]uintptr{ "private": uintptr(unix.MS_PRIVATE), "rprivate": uintptr(unix.MS_REC | unix.MS_PRIVATE), "shared": uintptr(unix.MS_SHARED), "rshared": uintptr(unix.MS_REC | unix.MS_SHARED), "slave": uintptr(unix.MS_SLAVE), "rslave": uintptr(unix.MS_REC | unix.MS_SLAVE), } func createCmd(targetCmd []string) (*exec.Cmd, error) { var args []string if len(targetCmd) > 1 { args = targetCmd[1:] } cmd := exec.Command(targetCmd[0], args...) cmd.Stdin = os.Stdin cmd.Stdout = os.Stdout cmd.Stderr = os.Stderr cmd.Env = os.Environ() cmd.SysProcAttr = &syscall.SysProcAttr{ Pdeathsig: syscall.SIGKILL, } return cmd, nil } // mountSysfs is needed for mounting /sys/class/net // when netns is unshared. func mountSysfs(hostNetwork, evacuateCgroup2 bool) error { const cgroupDir = "/sys/fs/cgroup" if hostNetwork { if evacuateCgroup2 { // We need to mount tmpfs before cgroup2 to avoid EBUSY if err := unix.Mount("none", cgroupDir, "tmpfs", 0, ""); err != nil { return fmt.Errorf("failed to mount tmpfs on %s: %w", cgroupDir, err) } if err := unix.Mount("none", cgroupDir, "cgroup2", 0, ""); err != nil { return fmt.Errorf("failed to mount cgroup2 on %s: %w", cgroupDir, err) } } // NOP return nil } tmp, err := os.MkdirTemp("/tmp", "rksys") if err != nil { return fmt.Errorf("creating a directory under /tmp: %w", err) } defer os.RemoveAll(tmp) if !evacuateCgroup2 { if err := unix.Mount(cgroupDir, tmp, "", uintptr(unix.MS_BIND|unix.MS_REC), ""); err != nil { return fmt.Errorf("failed to create bind mount on %s: %w", cgroupDir, err) } } if err := unix.Mount("none", "/sys", "sysfs", 0, ""); err != nil { // when the sysfs in the parent namespace is RO, // we can't mount RW sysfs even in the child namespace. // https://github.com/rootless-containers/rootlesskit/pull/23#issuecomment-429292632 // https://github.com/torvalds/linux/blob/9f203e2f2f065cd74553e6474f0ae3675f39fb0f/fs/namespace.c#L3326-L3328 logrus.Warnf("failed to mount sysfs, falling back to read-only mount: %v", err) if err := unix.Mount("none", "/sys", "sysfs", uintptr(unix.MS_RDONLY), ""); err != nil { // when /sys/firmware is masked, even RO sysfs can't be mounted logrus.Warnf("failed to mount sysfs: %v", err) } } if evacuateCgroup2 { if err := unix.Mount("none", cgroupDir, "cgroup2", 0, ""); err != nil { return fmt.Errorf("failed to mount cgroup2 on %s: %w", cgroupDir, err) } } else { if err := unix.Mount(tmp, cgroupDir, "", uintptr(unix.MS_MOVE), ""); err != nil { return fmt.Errorf("failed to move mount point from %s to %s: %w", tmp, cgroupDir, err) } } return nil } func mountProcfs() error { if err := unix.Mount("none", "/proc", "proc", 0, ""); err != nil { logrus.Warnf("failed to mount procfs, falling back to read-only mount: %v", err) if err := unix.Mount("none", "/proc", "proc", uintptr(unix.MS_RDONLY), ""); err != nil { logrus.Warnf("failed to mount procfs: %v", err) } } return nil } func activateLoopback() error { cmds := [][]string{ {"ip", "link", "set", "lo", "up"}, } if err := common.Execs(os.Stderr, os.Environ(), cmds); err != nil { return fmt.Errorf("executing %v: %w", cmds, err) } return nil } func activateDev(dev, ip string, netmask int, gateway string, mtu int) error { cmds := [][]string{ {"ip", "link", "set", dev, "up"}, {"ip", "link", "set", "dev", dev, "mtu", strconv.Itoa(mtu)}, {"ip", "addr", "add", ip + "/" + strconv.Itoa(netmask), "dev", dev}, {"ip", "route", "add", "default", "via", gateway, "dev", dev}, } if err := common.Execs(os.Stderr, os.Environ(), cmds); err != nil { return fmt.Errorf("executing %v: %w", cmds, err) } return nil } func setupCopyDir(driver copyup.ChildDriver, dirs []string) (bool, error) { if driver != nil { etcWasCopied := false copied, err := driver.CopyUp(dirs) for _, d := range copied { if d == "/etc" { etcWasCopied = true break } } return etcWasCopied, err } if len(dirs) != 0 { return false, errors.New("copy-up driver is not specified") } return false, nil } func setupNet(msg common.Message, etcWasCopied bool, driver network.ChildDriver) error { // HostNetwork if driver == nil { return nil } if err := activateLoopback(); err != nil { return err } dev, err := driver.ConfigureNetworkChild(&msg.Network) if err != nil { return err } if err := activateDev(dev, msg.Network.IP, msg.Network.Netmask, msg.Network.Gateway, msg.Network.MTU); err != nil { return err } if etcWasCopied { if err := writeResolvConf(msg.Network.DNS); err != nil { return err } if err := writeEtcHosts(); err != nil { return err } } else { logrus.Warn("Mounting /etc/resolv.conf without copying-up /etc. " + "Note that /etc/resolv.conf in the namespace will be unmounted when it is recreated on the host. " + "Unless /etc/resolv.conf is statically configured, copying-up /etc is highly recommended. " + "Please refer to RootlessKit documentation for further information.") if err := mountResolvConf(msg.StateDir, msg.Network.DNS); err != nil { return err } if err := mountEtcHosts(msg.StateDir); err != nil { return err } } return nil } type Opt struct { PipeFDEnvKey string // needs to be set TargetCmd []string // needs to be set NetworkDriver network.ChildDriver // nil for HostNetwork CopyUpDriver copyup.ChildDriver // cannot be nil if len(CopyUpDirs) != 0 CopyUpDirs []string PortDriver port.ChildDriver MountProcfs bool // needs to be set if (and only if) parent.Opt.CreatePIDNS is set Propagation string // mount propagation type Reaper bool EvacuateCgroup2 bool // needs to correspond to parent.Opt.EvacuateCgroup2 is set } func Child(opt Opt) error { if opt.PipeFDEnvKey == "" { return errors.New("pipe FD env key is not set") } pipeFDStr := os.Getenv(opt.PipeFDEnvKey) if pipeFDStr == "" { return fmt.Errorf("%s is not set", opt.PipeFDEnvKey) } pipeFD, err := strconv.Atoi(pipeFDStr) if err != nil { return fmt.Errorf("unexpected fd value: %s: %w", pipeFDStr, err) } pipeR := os.NewFile(uintptr(pipeFD), "") var msg common.Message if _, err := msgutil.UnmarshalFromReader(pipeR, &msg); err != nil { return fmt.Errorf("parsing message from fd %d: %w", pipeFD, err) } logrus.Debugf("child: got msg from parent: %+v", msg) if msg.Stage == 0 { // the parent has configured the child's uid_map and gid_map, but the child doesn't have caps here. // so we exec the child again to obtain caps. // PID should be kept. if err = syscall.Exec("/proc/self/exe", os.Args, os.Environ()); err != nil { return err } panic("should not reach here") } if msg.Stage != 1 { return fmt.Errorf("expected stage 1, got stage %d", msg.Stage) } // The parent calls child with Pdeathsig, but it is cleared when newuidmap SUID binary is called // https://github.com/rootless-containers/rootlesskit/issues/65#issuecomment-492343646 runtime.LockOSThread() err = unix.Prctl(unix.PR_SET_PDEATHSIG, uintptr(unix.SIGKILL), 0, 0, 0) runtime.UnlockOSThread() if err != nil { return err } os.Unsetenv(opt.PipeFDEnvKey) if err := pipeR.Close(); err != nil { return fmt.Errorf("failed to close fd %d: %w", pipeFD, err) } if msg.StateDir == "" { return errors.New("got empty StateDir") } if err := setMountPropagation(opt.Propagation); err != nil { return err } etcWasCopied, err := setupCopyDir(opt.CopyUpDriver, opt.CopyUpDirs) if err != nil { return err } if err := mountSysfs(opt.NetworkDriver == nil, opt.EvacuateCgroup2); err != nil { return err } if err := setupNet(msg, etcWasCopied, opt.NetworkDriver); err != nil { return err } if opt.MountProcfs { if err := mountProcfs(); err != nil { return err } } portQuitCh := make(chan struct{}) portErrCh := make(chan error) if opt.PortDriver != nil { go func() { portErrCh <- opt.PortDriver.RunChildDriver(msg.Port.Opaque, portQuitCh) }() } cmd, err := createCmd(opt.TargetCmd) if err != nil { return err } if opt.Reaper { if err := runAndReap(cmd); err != nil { return fmt.Errorf("command %v exited: %w", opt.TargetCmd, err) } } else { if err := cmd.Start(); err != nil { return fmt.Errorf("command %v exited: %w", opt.TargetCmd, err) } sigc := sigproxy.ForwardAllSignals(context.TODO(), cmd.Process.Pid) defer sigproxysignal.StopCatch(sigc) if err := cmd.Wait(); err != nil { return fmt.Errorf("command %v exited: %w", opt.TargetCmd, err) } } if opt.PortDriver != nil { portQuitCh <- struct{}{} return <-portErrCh } return nil } func setMountPropagation(propagation string) error { flags, ok := propagationStates[propagation] if ok { if err := unix.Mount("none", "/", "", flags, ""); err != nil { return fmt.Errorf("failed to share mount point: /: %w", err) } } return nil } func runAndReap(cmd *exec.Cmd) error { c := make(chan os.Signal, 32) signal.Notify(c, syscall.SIGCHLD) cmd.SysProcAttr.Setsid = true if err := cmd.Start(); err != nil { return err } sigc := sigproxy.ForwardAllSignals(context.TODO(), cmd.Process.Pid) defer sigproxysignal.StopCatch(sigc) result := make(chan error) go func() { defer close(result) for cEntry := range c { logrus.Debugf("reaper: got signal %q", cEntry) if wsPtr := reap(cmd.Process.Pid); wsPtr != nil { ws := *wsPtr if ws.Exited() && ws.ExitStatus() == 0 { result <- nil continue } var resultErr common.ErrorWithSys = &reaperErr{ ws: ws, } result <- resultErr } } }() return <-result } func reap(myPid int) *syscall.WaitStatus { var res *syscall.WaitStatus for { var ws syscall.WaitStatus pid, err := syscall.Wait4(-1, &ws, syscall.WNOHANG, nil) logrus.Debugf("reaper: got ws=%+v, pid=%d, err=%+v", ws, pid, err) if err != nil || pid <= 0 { break } if pid == myPid { res = &ws } } return res } type reaperErr struct { ws syscall.WaitStatus } func (e *reaperErr) Sys() interface{} { return e.ws } func (e *reaperErr) Error() string { if e.ws.Exited() { return fmt.Sprintf("exit status %d", e.ws.ExitStatus()) } if e.ws.Signaled() { return fmt.Sprintf("signal: %s", e.ws.Signal()) } return fmt.Sprintf("exited with WAITSTATUS=0x%08x", e.ws) } rootlesskit-0.14.6/pkg/child/hosts.go000066400000000000000000000030361417776672600175500ustar00rootroot00000000000000package child import ( "fmt" "os" "path/filepath" "golang.org/x/sys/unix" ) // generateEtcHosts makes sure the current hostname is resolved into // 127.0.0.1 or ::1, not into the host eth0 IP address. // // Note that /etc/hosts is not used by nslookup/dig. (Use `getent ahostsv4` instead.) func generateEtcHosts() ([]byte, error) { etcHosts, err := os.ReadFile("/etc/hosts") if err != nil { return nil, err } hostname, err := os.Hostname() if err != nil { return nil, err } // FIXME: no need to add the entry if already added s := fmt.Sprintf("%s\n127.0.0.1 %s\n::1 %s\n", string(etcHosts), hostname, hostname) return []byte(s), nil } // writeEtcHosts is akin to writeResolvConf // TODO: dedupe func writeEtcHosts() error { newEtcHosts, err := generateEtcHosts() if err != nil { return err } // remove copied-up link _ = os.Remove("/etc/hosts") if err := os.WriteFile("/etc/hosts", newEtcHosts, 0644); err != nil { return fmt.Errorf("writing /etc/hosts: %w", err) } return nil } // mountEtcHosts is akin to mountResolvConf // TODO: dedupe func mountEtcHosts(tempDir string) error { newEtcHosts, err := generateEtcHosts() if err != nil { return err } myEtcHosts := filepath.Join(tempDir, "hosts") if err := os.WriteFile(myEtcHosts, newEtcHosts, 0644); err != nil { return fmt.Errorf("writing %s: %w", myEtcHosts, err) } if err := unix.Mount(myEtcHosts, "/etc/hosts", "", uintptr(unix.MS_BIND), ""); err != nil { return fmt.Errorf("failed to create bind mount /etc/hosts for %s: %w", myEtcHosts, err) } return nil } rootlesskit-0.14.6/pkg/child/resolvconf.go000066400000000000000000000026351417776672600205740ustar00rootroot00000000000000package child import ( "fmt" "os" "path/filepath" "golang.org/x/sys/unix" ) func generateResolvConf(dns string) []byte { return []byte("nameserver " + dns + "\n") } func writeResolvConf(dns string) error { // remove copied-up link _ = os.Remove("/etc/resolv.conf") if err := os.WriteFile("/etc/resolv.conf", generateResolvConf(dns), 0644); err != nil { return fmt.Errorf("writing %s: %w", "/etc/resolv.conf", err) } return nil } // mountResolvConf does not work when /etc/resolv.conf is a managed by // systemd or NetworkManager, because our bind-mounted /etc/resolv.conf (in our namespaces) // is unexpectedly unmounted when /etc/resolv.conf is recreated in the initial initial namespace. // // If /etc/resolv.conf is a symlink, e.g. to ../run/systemd/resolve/stub-resolv.conf, // our bind-mounted /etc/resolv.conf is still unmounted when /run/systemd/resolve/stub-resolv.conf is recreated. // // Use writeResolvConf with copying-up /etc for most cases. func mountResolvConf(tempDir, dns string) error { myResolvConf := filepath.Join(tempDir, "resolv.conf") if err := os.WriteFile(myResolvConf, generateResolvConf(dns), 0644); err != nil { return fmt.Errorf("writing %s: %w", myResolvConf, err) } if err := unix.Mount(myResolvConf, "/etc/resolv.conf", "", uintptr(unix.MS_BIND), ""); err != nil { return fmt.Errorf("failed to create bind mount /etc/resolv.conf for %s: %w", myResolvConf, err) } return nil } rootlesskit-0.14.6/pkg/common/000077500000000000000000000000001417776672600162645ustar00rootroot00000000000000rootlesskit-0.14.6/pkg/common/common.go000066400000000000000000000002701417776672600201020ustar00rootroot00000000000000package common func Seq(fns []func() error) func() error { return func() error { for _, fn := range fns { if err := fn(); err != nil { return err } } return nil } } rootlesskit-0.14.6/pkg/common/exec.go000066400000000000000000000016721417776672600175450ustar00rootroot00000000000000package common import ( "errors" "io" "os/exec" "syscall" "github.com/sirupsen/logrus" ) // ErrorWithSys is implemented by *exec.ExitError and *child.reaperErr type ErrorWithSys interface { error Sys() interface{} } func GetExecExitStatus(err error) (int, bool) { err = errors.Unwrap(err) if err == nil { return 0, false } exitErr, ok := err.(ErrorWithSys) if !ok { return 0, false } status, ok := exitErr.Sys().(syscall.WaitStatus) if !ok { return 0, false } return status.ExitStatus(), true } func Execs(o io.Writer, env []string, cmds [][]string) error { for _, cmd := range cmds { var args []string if len(cmd) > 1 { args = cmd[1:] } x := exec.Command(cmd[0], args...) x.Stdin = nil x.Stdout = o x.Stderr = o x.Env = env x.SysProcAttr = &syscall.SysProcAttr{ Pdeathsig: syscall.SIGKILL, } logrus.Debugf("executing %v", cmd) if err := x.Run(); err != nil { return err } } return nil } rootlesskit-0.14.6/pkg/common/message.go000066400000000000000000000013231417776672600202360ustar00rootroot00000000000000package common // Message is sent from the parent to the child // as JSON, with uint32le length header. type Message struct { Stage int // 0 for Message 0, 1 for Message 1 Message0 Message1 } // Message0 is sent after setting up idmap type Message0 struct { } // Message 1 is sent after setting up other stuff type Message1 struct { // StateDir cannot be empty StateDir string Network NetworkMessage Port PortMessage } // NetworkMessage is empty for HostNetwork. type NetworkMessage struct { Dev string IP string Netmask int Gateway string DNS string MTU int // Opaque strings are specific to driver Opaque map[string]string } type PortMessage struct { Opaque map[string]string } rootlesskit-0.14.6/pkg/copyup/000077500000000000000000000000001417776672600163135ustar00rootroot00000000000000rootlesskit-0.14.6/pkg/copyup/copyup.go000066400000000000000000000001231417776672600201550ustar00rootroot00000000000000package copyup type ChildDriver interface { CopyUp([]string) ([]string, error) } rootlesskit-0.14.6/pkg/copyup/tmpfssymlink/000077500000000000000000000000001417776672600210535ustar00rootroot00000000000000rootlesskit-0.14.6/pkg/copyup/tmpfssymlink/tmpfssymlink.go000066400000000000000000000046261417776672600241520ustar00rootroot00000000000000package tmpfssymlink import ( "errors" "fmt" "os" "path/filepath" "golang.org/x/sys/unix" "github.com/rootless-containers/rootlesskit/pkg/copyup" ) func NewChildDriver() copyup.ChildDriver { return &childDriver{} } type childDriver struct { } func (d *childDriver) CopyUp(dirs []string) ([]string, error) { // we create bind0 outside of StateDir so as to allow // copying up /run with stateDir=/run/user/1001/rootlesskit/default. bind0, err := os.MkdirTemp("/tmp", "rootlesskit-b") if err != nil { return nil, fmt.Errorf("creating bind0 directory under /tmp: %w", err) } defer os.RemoveAll(bind0) var copied []string for _, d := range dirs { d := filepath.Clean(d) if d == "/tmp" { // TODO: we can support copy-up /tmp by changing bind0TempDir return copied, errors.New("/tmp cannot be copied up") } if err := unix.Mount(d, bind0, "", uintptr(unix.MS_BIND|unix.MS_REC), ""); err != nil { return copied, fmt.Errorf("failed to create bind mount on %s: %w", d, err) } if err := unix.Mount("none", d, "tmpfs", 0, ""); err != nil { return copied, fmt.Errorf("failed to mount tmpfs on %s: %w", d, err) } bind1, err := os.MkdirTemp(d, ".ro") if err != nil { return copied, fmt.Errorf("creating a directory under %s: %w", d, err) } if err := unix.Mount(bind0, bind1, "", uintptr(unix.MS_MOVE), ""); err != nil { return copied, fmt.Errorf("failed to move mount point from %s to %s: %w", bind0, bind1, err) } files, err := os.ReadDir(bind1) if err != nil { return copied, fmt.Errorf("reading dir %s: %w", bind1, err) } for _, f := range files { fFull := filepath.Join(bind1, f.Name()) var symlinkSrc string if f.Type()&os.ModeSymlink != 0 { symlinkSrc, err = os.Readlink(fFull) if err != nil { return copied, fmt.Errorf("reading dir %s: %w", fFull, err) } } else { symlinkSrc = filepath.Join(filepath.Base(bind1), f.Name()) } symlinkDst := filepath.Join(d, f.Name()) // `mount` may create extra `/etc/mtab` after mounting empty tmpfs on /etc // https://github.com/rootless-containers/rootlesskit/issues/45 if err = os.RemoveAll(symlinkDst); err != nil { return copied, fmt.Errorf("removing %s: %w", symlinkDst, err) } if err := os.Symlink(symlinkSrc, symlinkDst); err != nil { return copied, fmt.Errorf("symlinking %s to %s: %w", symlinkSrc, symlinkDst, err) } } copied = append(copied, d) } return copied, nil } rootlesskit-0.14.6/pkg/msgutil/000077500000000000000000000000001417776672600164605ustar00rootroot00000000000000rootlesskit-0.14.6/pkg/msgutil/msgutil.go000066400000000000000000000026731417776672600205030ustar00rootroot00000000000000// Package msgutil provides utility for JSON message with uint32le header package msgutil import ( "bytes" "encoding/binary" "encoding/json" "fmt" "io" ) const ( maxLength = 1 << 16 ) func MarshalToWriter(w io.Writer, x interface{}) (int, error) { b, err := json.Marshal(x) if err != nil { return 0, err } if len(b) > maxLength { return 0, fmt.Errorf("bad message length: %d (max: %d)", len(b), maxLength) } h := make([]byte, 4) binary.LittleEndian.PutUint32(h, uint32(len(b))) return w.Write(append(h, b...)) } func UnmarshalFromReader(r io.Reader, x interface{}) (int, error) { hdr := make([]byte, 4) n, err := r.Read(hdr) if err != nil { return n, err } if n != 4 { return n, fmt.Errorf("read %d bytes, expected 4 bytes", n) } bLen := binary.LittleEndian.Uint32(hdr) if bLen > maxLength || bLen < 1 { return n, fmt.Errorf("bad message length: %d (max: %d)", bLen, maxLength) } b := make([]byte, bLen) n, err = r.Read(b) if err != nil { return 4 + n, err } if n != int(bLen) { return 4 + n, fmt.Errorf("read %d bytes, expected %d bytes", n, bLen) } return 4 + n, json.Unmarshal(b, x) } func Marshal(x interface{}) ([]byte, error) { var b bytes.Buffer _, err := MarshalToWriter(&b, x) return b.Bytes(), err } func Unmarshal(b []byte, x interface{}) error { n, err := UnmarshalFromReader(bytes.NewReader(b), x) if n != len(b) { return fmt.Errorf("read %d bytes, expected %d bytes", n, len(b)) } return err } rootlesskit-0.14.6/pkg/msgutil/msgutil_test.go000066400000000000000000000007171417776672600215370ustar00rootroot00000000000000package msgutil import ( "encoding/hex" "testing" ) func TestMarshal(t *testing.T) { emptyStruct := struct{}{} fooStruct := struct{ Foo string }{Foo: "hello"} testCases := []struct { x interface{} }{ { x: nil, }, { x: 42, }, { x: &emptyStruct, }, { x: &fooStruct, }, } for i, tc := range testCases { b, err := Marshal(tc.x) if err != nil { t.Fatal(err) } t.Logf("%d: marshal %+v\n%s", i, tc.x, hex.Dump(b)) } } rootlesskit-0.14.6/pkg/network/000077500000000000000000000000001417776672600164655ustar00rootroot00000000000000rootlesskit-0.14.6/pkg/network/iputils/000077500000000000000000000000001417776672600201565ustar00rootroot00000000000000rootlesskit-0.14.6/pkg/network/iputils/iputils.go000066400000000000000000000007601417776672600222010ustar00rootroot00000000000000package iputils import ( "encoding/binary" "fmt" "math" "net" ) func AddIPInt(ip net.IP, i int) (net.IP, error) { ip = ip.To4() if ip == nil { return nil, fmt.Errorf("expected IPv4 address, got %s", ip.String()) } ui32 := binary.BigEndian.Uint32(ip) resInt64 := int64(ui32) + int64(i) if resInt64 > int64(math.MaxUint32) { return nil, fmt.Errorf("%s + %d overflows", ip.String(), i) } res := make(net.IP, 4) binary.BigEndian.PutUint32(res, uint32(resInt64)) return res, nil } rootlesskit-0.14.6/pkg/network/iputils/iputils_test.go000066400000000000000000000014561417776672600232430ustar00rootroot00000000000000package iputils import ( "net" "testing" ) func TestAddIPInt(t *testing.T) { type testCase struct { s string i int expected string } testCases := []testCase{ { "10.0.2.0", 100, "10.0.2.100", }, { "255.255.255.100", 155, "255.255.255.255", }, { "255.255.255.100", 156, "", }, } for i, tc := range testCases { ip := net.ParseIP(tc.s) if ip == nil { t.Fatalf("invalid IP: %q", tc.s) } gotIP, err := AddIPInt(ip, tc.i) if tc.expected == "" { if err == nil { t.Fatalf("#%d: expected error, got no error", i) } } else { if err != nil { t.Fatalf("#%d: expected no error, got %q", i, err) } got := gotIP.String() if got != tc.expected { t.Fatalf("#%d: expected %q, got %q", i, tc.expected, got) } } } } rootlesskit-0.14.6/pkg/network/lxcusernic/000077500000000000000000000000001417776672600206445ustar00rootroot00000000000000rootlesskit-0.14.6/pkg/network/lxcusernic/lxcusernic.go000066400000000000000000000107471417776672600233630ustar00rootroot00000000000000package lxcusernic import ( "context" "errors" "fmt" "net" "os" "os/exec" "strconv" "strings" "time" "github.com/insomniacslk/dhcp/dhcpv4" "github.com/insomniacslk/dhcp/dhcpv4/client4" "github.com/sirupsen/logrus" "github.com/rootless-containers/rootlesskit/pkg/api" "github.com/rootless-containers/rootlesskit/pkg/common" "github.com/rootless-containers/rootlesskit/pkg/network" ) func NewParentDriver(binary string, mtu int, bridge, ifname string) (network.ParentDriver, error) { if binary == "" { return nil, errors.New("got empty binary") } if mtu < 0 { return nil, errors.New("got negative mtu") } if mtu == 0 { mtu = 1500 } if bridge == "" { return nil, errors.New("got empty bridge") } if ifname == "" { ifname = "eth0" } return &parentDriver{ binary: binary, mtu: mtu, bridge: bridge, ifname: ifname, }, nil } type parentDriver struct { binary string mtu int bridge string ifname string } const DriverName = "lxc-user-nic" func (d *parentDriver) Info(ctx context.Context) (*api.NetworkDriverInfo, error) { return &api.NetworkDriverInfo{ Driver: DriverName, // TODO: fill DNS // TODO: fill IP DynamicChildIP: true, }, nil } func (d *parentDriver) MTU() int { return d.mtu } func (d *parentDriver) ConfigureNetwork(childPID int, stateDir string) (*common.NetworkMessage, func() error, error) { var cleanups []func() error dummyLXCPath := "/dev/null" dummyLXCName := "dummy" cmd := exec.Command(d.binary, "create", dummyLXCPath, dummyLXCName, strconv.Itoa(childPID), "veth", d.bridge, d.ifname) b, err := cmd.CombinedOutput() if err != nil { return nil, common.Seq(cleanups), fmt.Errorf("%s failed: %s: %w", d.binary, string(b), err) } netmsg := common.NetworkMessage{ Dev: d.ifname, // IP, Netmask, Gateway, and DNS are configured in Child (via DHCP) MTU: d.mtu, } return &netmsg, common.Seq(cleanups), nil } func NewChildDriver() network.ChildDriver { return &childDriver{} } type childDriver struct { } func exchangeDHCP(c *client4.Client, dev string) (*dhcpv4.DHCPv4, error) { logrus.Debugf("exchanging DHCP messages using %s, may take a few seconds", dev) var ( ps []*dhcpv4.DHCPv4 err error ) for { ps, err = c.Exchange(dev) if err != nil { // `github.com/insomniacslk/dhcp` does not use errors.Wrap, // so we need to compare the string. if strings.Contains(err.Error(), "interrupted system call") { // Retry on EINTR continue } return nil, fmt.Errorf("could not exchange DHCP with %s: %w", dev, err) } break } if len(ps) < 1 { return nil, errors.New("got empty DHCP message") } var ack *dhcpv4.DHCPv4 for i, p := range ps { logrus.Debugf("DHCP message %d: %s", i, p.Summary()) if p.MessageType() == dhcpv4.MessageTypeAck { ack = p } } if ack == nil { return nil, errors.New("did not get DHCPACK") } return ack, nil } func (d *childDriver) ConfigureNetworkChild(netmsg *common.NetworkMessage) (string, error) { dev := netmsg.Dev if dev == "" { return "", errors.New("could not determine the dev") } cmds := [][]string{ // FIXME(AkihiroSuda): this should be moved to pkg/child? {"ip", "link", "set", dev, "up"}, } if err := common.Execs(os.Stderr, os.Environ(), cmds); err != nil { return "", fmt.Errorf("executing %v: %w", cmds, err) } c := client4.NewClient() c.ReadTimeout = 30 * time.Second c.WriteTimeout = 30 * time.Second p, err := exchangeDHCP(c, dev) if err != nil { return "", err } if p.YourIPAddr.Equal(net.IPv4zero) { return "", errors.New("got zero YourIPAddr") } if len(p.Router()) == 0 { return "", errors.New("got no Router") } if len(p.DNS()) == 0 { return "", errors.New("got no DNS") } netmsg.IP = p.YourIPAddr.To4().String() netmask, _ := p.SubnetMask().Size() netmsg.Netmask = netmask netmsg.Gateway = p.Router()[0].To4().String() netmsg.DNS = p.DNS()[0].To4().String() go dhcpRenewRoutine(c, dev, p.YourIPAddr.To4(), p.IPAddressLeaseTime(time.Hour)) return dev, nil } func dhcpRenewRoutine(c *client4.Client, dev string, initialIP net.IP, lease time.Duration) { for { if lease <= 0 { return } logrus.Debugf("DHCP lease=%s, sleeping lease * 0.9", lease) time.Sleep(time.Duration(float64(lease) * 0.9)) p, err := exchangeDHCP(c, dev) if err != nil { panic(err) } ip := p.YourIPAddr.To4() if !ip.Equal(initialIP) { // FIXME(AkihiroSuda): unlikely to happen for LXC usecase but good to consider supporting panic(fmt.Errorf("expected to retain %s, got %s", initialIP, ip)) } lease = p.IPAddressLeaseTime(lease) } } rootlesskit-0.14.6/pkg/network/network.go000066400000000000000000000013561417776672600205120ustar00rootroot00000000000000package network import ( "context" "github.com/rootless-containers/rootlesskit/pkg/api" "github.com/rootless-containers/rootlesskit/pkg/common" ) // ParentDriver is called from the parent namespace type ParentDriver interface { Info(ctx context.Context) (*api.NetworkDriverInfo, error) // MTU returns MTU MTU() int // ConfigureNetwork sets up Slirp, updates msg, and returns destructor function. ConfigureNetwork(childPID int, stateDir string) (netmsg *common.NetworkMessage, cleanup func() error, err error) } // ChildDriver is called from the child namespace type ChildDriver interface { // netmsg MAY be modified. // devName is like "tap" or "eth0" ConfigureNetworkChild(netmsg *common.NetworkMessage) (devName string, err error) } rootlesskit-0.14.6/pkg/network/parentutils/000077500000000000000000000000001417776672600210375ustar00rootroot00000000000000rootlesskit-0.14.6/pkg/network/parentutils/parentutils.go000066400000000000000000000011541417776672600237410ustar00rootroot00000000000000package parentutils import ( "fmt" "os" "strconv" "github.com/rootless-containers/rootlesskit/pkg/common" ) func PrepareTap(pid int, tap string) error { cmds := [][]string{ nsenter(pid, []string{"ip", "tuntap", "add", "name", tap, "mode", "tap"}), nsenter(pid, []string{"ip", "link", "set", tap, "up"}), } if err := common.Execs(os.Stderr, os.Environ(), cmds); err != nil { return fmt.Errorf("executing %v: %w", cmds, err) } return nil } func nsenter(pid int, cmd []string) []string { return append([]string{"nsenter", "-t", strconv.Itoa(pid), "-n", "-m", "-U", "--preserve-credentials"}, cmd...) } rootlesskit-0.14.6/pkg/network/slirp4netns/000077500000000000000000000000001417776672600207525ustar00rootroot00000000000000rootlesskit-0.14.6/pkg/network/slirp4netns/slirp4netns.go000066400000000000000000000230601417776672600235670ustar00rootroot00000000000000package slirp4netns import ( "context" "errors" "fmt" "io" "net" "os" "os/exec" "strconv" "strings" "sync" "syscall" "time" "github.com/sirupsen/logrus" "golang.org/x/sys/unix" "github.com/rootless-containers/rootlesskit/pkg/api" "github.com/rootless-containers/rootlesskit/pkg/common" "github.com/rootless-containers/rootlesskit/pkg/network" "github.com/rootless-containers/rootlesskit/pkg/network/iputils" "github.com/rootless-containers/rootlesskit/pkg/network/parentutils" ) type Features struct { // SupportsEnableIPv6 --enable-ipv6 (v0.2.0) SupportsEnableIPv6 bool // SupportsCIDR --cidr (v0.3.0) SupportsCIDR bool // SupportsDisableHostLoopback --disable-host-loopback (v0.3.0) SupportsDisableHostLoopback bool // SupportsAPISocket --api-socket (v0.3.0) SupportsAPISocket bool // SupportsEnableSandbox --enable-sandbox (v0.4.0) SupportsEnableSandbox bool // SupportsEnableSeccomp --enable-seccomp (v0.4.0) SupportsEnableSeccomp bool // KernelSupportsSeccomp whether the kernel supports slirp4netns --enable-seccomp KernelSupportsEnableSeccomp bool } func DetectFeatures(binary string) (*Features, error) { if binary == "" { return nil, errors.New("got empty slirp4netns binary") } realBinary, err := exec.LookPath(binary) if err != nil { return nil, fmt.Errorf("slirp4netns binary %q is not installed: %w", binary, err) } cmd := exec.Command(realBinary, "--help") cmd.Env = os.Environ() b, err := cmd.CombinedOutput() s := string(b) if err != nil { return nil, fmt.Errorf( "command \"%s --help\" failed, make sure slirp4netns v0.4.0+ is installed: %q: %w", realBinary, s, err, ) } if !strings.Contains(s, "--netns-type") { // We don't use --netns-type, but we check the presence of --netns-type to // ensure slirp4netns >= v0.4.0: https://github.com/rootless-containers/rootlesskit/issues/143 return nil, errors.New("slirp4netns seems older than v0.4.0") } kernelSupportsEnableSeccomp := false if unix.Prctl(unix.PR_GET_SECCOMP, 0, 0, 0, 0) != unix.EINVAL { kernelSupportsEnableSeccomp = unix.Prctl(unix.PR_SET_SECCOMP, unix.SECCOMP_MODE_FILTER, 0, 0, 0) != unix.EINVAL } f := Features{ SupportsEnableIPv6: strings.Contains(s, "--enable-ipv6"), SupportsCIDR: strings.Contains(s, "--cidr"), SupportsDisableHostLoopback: strings.Contains(s, "--disable-host-loopback"), SupportsAPISocket: strings.Contains(s, "--api-socket"), SupportsEnableSandbox: strings.Contains(s, "--enable-sandbox"), SupportsEnableSeccomp: strings.Contains(s, "--enable-seccomp"), KernelSupportsEnableSeccomp: kernelSupportsEnableSeccomp, } return &f, nil } // NewParentDriver instantiates new parent driver. // Requires slirp4netns v0.4.0 or later. func NewParentDriver(logWriter io.Writer, binary string, mtu int, ipnet *net.IPNet, ifname string, disableHostLoopback bool, apiSocketPath string, enableSandbox, enableSeccomp, enableIPv6 bool) (network.ParentDriver, error) { if binary == "" { return nil, errors.New("got empty slirp4netns binary") } if mtu < 0 { return nil, errors.New("got negative mtu") } if mtu == 0 { mtu = 65520 } if ifname == "" { ifname = "tap0" } features, err := DetectFeatures(binary) if err != nil { return nil, err } if enableIPv6 && !features.SupportsEnableIPv6 { return nil, errors.New("this version of slirp4netns does not support --enable-sandbox") } if ipnet != nil && !features.SupportsCIDR { return nil, errors.New("this version of slirp4netns does not support --cidr") } if disableHostLoopback && !features.SupportsDisableHostLoopback { return nil, errors.New("this version of slirp4netns does not support --disable-host-loopback") } if apiSocketPath != "" && !features.SupportsAPISocket { return nil, errors.New("this version of slirp4netns does not support --api-socket") } if enableSandbox && !features.SupportsEnableSandbox { return nil, errors.New("this version of slirp4netns does not support --enable-sandbox") } if enableSeccomp && !features.SupportsEnableSeccomp { return nil, errors.New("this version of slirp4netns does not support --enable-seccomp") } if enableSeccomp && !features.KernelSupportsEnableSeccomp { return nil, errors.New("kernel does not support seccomp") } return &parentDriver{ logWriter: logWriter, binary: binary, mtu: mtu, ipnet: ipnet, disableHostLoopback: disableHostLoopback, apiSocketPath: apiSocketPath, enableSandbox: enableSandbox, enableSeccomp: enableSeccomp, enableIPv6: enableIPv6, ifname: ifname, }, nil } type parentDriver struct { logWriter io.Writer binary string mtu int ipnet *net.IPNet disableHostLoopback bool apiSocketPath string enableSandbox bool enableSeccomp bool enableIPv6 bool ifname string infoMu sync.RWMutex info func() *api.NetworkDriverInfo } const DriverName = "slirp4netns" func (d *parentDriver) Info(ctx context.Context) (*api.NetworkDriverInfo, error) { d.infoMu.RLock() infoFn := d.info d.infoMu.RUnlock() if infoFn == nil { return &api.NetworkDriverInfo{ Driver: DriverName, }, nil } return infoFn(), nil } func (d *parentDriver) MTU() int { return d.mtu } func (d *parentDriver) ConfigureNetwork(childPID int, stateDir string) (*common.NetworkMessage, func() error, error) { tap := d.ifname var cleanups []func() error if err := parentutils.PrepareTap(childPID, tap); err != nil { return nil, common.Seq(cleanups), fmt.Errorf("setting up tap %s: %w", tap, err) } readyR, readyW, err := os.Pipe() if err != nil { return nil, common.Seq(cleanups), err } defer readyR.Close() defer readyW.Close() // -r: readyFD (requires slirp4netns >= v0.4.0: https://github.com/rootless-containers/rootlesskit/issues/143) opts := []string{"--mtu", strconv.Itoa(d.mtu), "-r", "3"} if d.disableHostLoopback { opts = append(opts, "--disable-host-loopback") } if d.ipnet != nil { opts = append(opts, "--cidr", d.ipnet.String()) } if d.apiSocketPath != "" { opts = append(opts, "--api-socket", d.apiSocketPath) } if d.enableSandbox { opts = append(opts, "--enable-sandbox") } if d.enableSeccomp { opts = append(opts, "--enable-seccomp") } if d.enableIPv6 { opts = append(opts, "--enable-ipv6") } cmd := exec.Command(d.binary, append(opts, []string{strconv.Itoa(childPID), tap}...)...) // FIXME: Stdout doen't seem captured cmd.Stdout = d.logWriter cmd.Stderr = d.logWriter cmd.SysProcAttr = &syscall.SysProcAttr{ Pdeathsig: syscall.SIGKILL, } cmd.ExtraFiles = append(cmd.ExtraFiles, readyW) cleanups = append(cleanups, func() error { logrus.Debugf("killing slirp4netns") if cmd.Process != nil { _ = cmd.Process.Kill() } wErr := cmd.Wait() logrus.Debugf("killed slirp4netns: %v", wErr) return nil }) if err := cmd.Start(); err != nil { return nil, common.Seq(cleanups), fmt.Errorf("executing %v: %w", cmd, err) } if err := waitForReadyFD(cmd.Process.Pid, readyR); err != nil { return nil, common.Seq(cleanups), fmt.Errorf("waiting for ready fd (%v): %w", cmd, err) } netmsg := common.NetworkMessage{ Dev: tap, MTU: d.mtu, } if d.ipnet != nil { // TODO: get the actual configuration via slirp4netns API? x, err := iputils.AddIPInt(d.ipnet.IP, 100) if err != nil { return nil, common.Seq(cleanups), err } netmsg.IP = x.String() netmsg.Netmask, _ = d.ipnet.Mask.Size() x, err = iputils.AddIPInt(d.ipnet.IP, 2) if err != nil { return nil, common.Seq(cleanups), err } netmsg.Gateway = x.String() x, err = iputils.AddIPInt(d.ipnet.IP, 3) if err != nil { return nil, common.Seq(cleanups), err } netmsg.DNS = x.String() } else { netmsg.IP = "10.0.2.100" netmsg.Netmask = 24 netmsg.Gateway = "10.0.2.2" netmsg.DNS = "10.0.2.3" } d.infoMu.Lock() d.info = func() *api.NetworkDriverInfo { return &api.NetworkDriverInfo{ Driver: DriverName, DNS: []net.IP{net.ParseIP(netmsg.DNS)}, ChildIP: net.ParseIP(netmsg.IP), DynamicChildIP: false, } } d.infoMu.Unlock() return &netmsg, common.Seq(cleanups), nil } // waitForReady is from libpod // https://github.com/containers/libpod/blob/e6b843312b93ddaf99d0ef94a7e60ff66bc0eac8/libpod/networking_linux.go#L272-L308 func waitForReadyFD(cmdPid int, r *os.File) error { b := make([]byte, 16) for { if err := r.SetDeadline(time.Now().Add(1 * time.Second)); err != nil { return fmt.Errorf("error setting slirp4netns pipe timeout: %w", err) } if _, err := r.Read(b); err == nil { break } else { if os.IsTimeout(err) { // Check if the process is still running. var status syscall.WaitStatus pid, err := syscall.Wait4(cmdPid, &status, syscall.WNOHANG, nil) if err != nil { return fmt.Errorf("failed to read slirp4netns process status: %w", err) } if pid != cmdPid { continue } if status.Exited() { return errors.New("slirp4netns failed") } if status.Signaled() { return errors.New("slirp4netns killed by signal") } continue } return fmt.Errorf("failed to read from slirp4netns sync pipe: %w", err) } } return nil } func NewChildDriver() network.ChildDriver { return &childDriver{} } type childDriver struct { } func (d *childDriver) ConfigureNetworkChild(netmsg *common.NetworkMessage) (string, error) { tap := netmsg.Dev if tap == "" { return "", errors.New("could not determine the preconfigured tap") } // tap is created and "up". // IP stuff and MTU are not configured by the parent here, // and they are up to the child. return tap, nil } rootlesskit-0.14.6/pkg/network/vpnkit/000077500000000000000000000000001417776672600200005ustar00rootroot00000000000000rootlesskit-0.14.6/pkg/network/vpnkit/vpnkit.go000066400000000000000000000154131417776672600216460ustar00rootroot00000000000000package vpnkit import ( "context" "errors" "fmt" "io" "net" "os" "os/exec" "path/filepath" "strconv" "sync" "syscall" "time" "github.com/google/uuid" "github.com/moby/vpnkit/go/pkg/vmnet" "github.com/sirupsen/logrus" "github.com/songgao/water" "github.com/rootless-containers/rootlesskit/pkg/api" "github.com/rootless-containers/rootlesskit/pkg/common" "github.com/rootless-containers/rootlesskit/pkg/network" ) func NewParentDriver(binary string, mtu int, ifname string, disableHostLoopback bool) network.ParentDriver { if binary == "" { panic("got empty vpnkit binary") } if mtu < 0 { panic("got negative mtu") } if mtu == 0 { mtu = 1500 } if mtu != 1500 { logrus.Warnf("vpnkit is known to have issues with non-1500 MTU (current: %d), see https://github.com/rootless-containers/rootlesskit/issues/6#issuecomment-403531453", mtu) // NOTE: iperf3 stops working with MTU >= 16425 } if ifname == "" { ifname = "tap0" } return &parentDriver{ binary: binary, mtu: mtu, ifname: ifname, disableHostLoopback: disableHostLoopback, } } const ( DriverName = "vpnkit" opaqueMAC = "vpnkit.mac" opaqueSocket = "vpnkit.socket" opaqueUUID = "vpnkit.uuid" ) type parentDriver struct { binary string mtu int ifname string disableHostLoopback bool infoMu sync.RWMutex info func() *api.NetworkDriverInfo } func (d *parentDriver) Info(ctx context.Context) (*api.NetworkDriverInfo, error) { d.infoMu.RLock() infoFn := d.info d.infoMu.RUnlock() if infoFn == nil { return &api.NetworkDriverInfo{ Driver: DriverName, }, nil } return infoFn(), nil } func (d *parentDriver) MTU() int { return d.mtu } func (d *parentDriver) ConfigureNetwork(childPID int, stateDir string) (*common.NetworkMessage, func() error, error) { var cleanups []func() error vpnkitSocket := filepath.Join(stateDir, "vpnkit-ethernet.sock") vpnkitCtx, vpnkitCancel := context.WithCancel(context.Background()) vpnkitCmd := exec.CommandContext(vpnkitCtx, d.binary, "--ethernet", vpnkitSocket, "--mtu", strconv.Itoa(d.mtu)) if d.disableHostLoopback { vpnkitCmd.Args = append(vpnkitCmd.Args, "--host-ip", "0.0.0.0") } vpnkitCmd.SysProcAttr = &syscall.SysProcAttr{ Pdeathsig: syscall.SIGKILL, } cleanups = append(cleanups, func() error { logrus.Debugf("killing vpnkit") vpnkitCancel() wErr := vpnkitCmd.Wait() logrus.Debugf("killed vpnkit: %v", wErr) return nil }) if err := vpnkitCmd.Start(); err != nil { return nil, common.Seq(cleanups), fmt.Errorf("executing %v: %w", vpnkitCmd, err) } ctx, cancel := context.WithTimeout(context.Background(), 10*time.Second) cleanups = append(cleanups, func() error { cancel(); return nil }) vmnet, err := waitForVPNKit(ctx, vpnkitSocket) if err != nil { return nil, common.Seq(cleanups), fmt.Errorf("connecting to %s: %w", vpnkitSocket, err) } cleanups = append(cleanups, func() error { return vmnet.Close() }) vifUUID := uuid.New() logrus.Debugf("connecting to VPNKit vmnet at %s as %s", vpnkitSocket, vifUUID) // No context.WithTimeout..? vif, err := vmnet.ConnectVif(vifUUID) if err != nil { return nil, common.Seq(cleanups), fmt.Errorf("connecting to %s with uuid %s: %w", vpnkitSocket, vifUUID, err) } logrus.Debugf("connected to VPNKit vmnet") // TODO: support configuration netmsg := common.NetworkMessage{ Dev: d.ifname, IP: vif.IP.String(), Netmask: 24, Gateway: "192.168.65.1", DNS: "192.168.65.1", MTU: d.mtu, Opaque: map[string]string{ opaqueMAC: vif.ClientMAC.String(), opaqueSocket: vpnkitSocket, opaqueUUID: vifUUID.String(), }, } d.infoMu.Lock() d.info = func() *api.NetworkDriverInfo { return &api.NetworkDriverInfo{ Driver: DriverName, DNS: []net.IP{net.ParseIP(netmsg.DNS)}, ChildIP: net.ParseIP(netmsg.IP), DynamicChildIP: false, } } d.infoMu.Unlock() return &netmsg, common.Seq(cleanups), nil } func waitForVPNKit(ctx context.Context, socket string) (*vmnet.Vmnet, error) { retried := 0 for { vmnet, err := vmnet.New(ctx, socket) if err == nil { return vmnet, nil } sleepTime := (retried % 100) * 10 * int(time.Microsecond) select { case <-ctx.Done(): return nil, fmt.Errorf("last error: %v: %w", err, ctx.Err()) case <-time.After(time.Duration(sleepTime)): } retried++ } } func NewChildDriver() network.ChildDriver { return &childDriver{} } type childDriver struct { } func (d *childDriver) ConfigureNetworkChild(netmsg *common.NetworkMessage) (tap string, err error) { tapName := netmsg.Dev if tapName == "" { return "", errors.New("no dev is set") } macStr := netmsg.Opaque[opaqueMAC] socket := netmsg.Opaque[opaqueSocket] uuidStr := netmsg.Opaque[opaqueUUID] if macStr == "" { return "", errors.New("no VPNKit MAC is set") } if socket == "" { return "", errors.New("no VPNKit socket is set") } if uuidStr == "" { return "", errors.New("no VPNKit UUID is set") } return startVPNKitRoutines(context.TODO(), tapName, macStr, socket, uuidStr) } func startVPNKitRoutines(ctx context.Context, tapName, macStr, socket, uuidStr string) (string, error) { cmds := [][]string{ {"ip", "tuntap", "add", "name", tapName, "mode", "tap"}, {"ip", "link", "set", tapName, "address", macStr}, // IP stuff and MTU are configured in activateTap() in pkg/child/child.go } if err := common.Execs(os.Stderr, os.Environ(), cmds); err != nil { return "", fmt.Errorf("executing %v: %w", cmds, err) } tap, err := water.New( water.Config{ DeviceType: water.TAP, PlatformSpecificParams: water.PlatformSpecificParams{ Name: tapName, }, }) if err != nil { return "", fmt.Errorf("creating tap %s: %w", tapName, err) } if tap.Name() != tapName { return "", fmt.Errorf("expected %q, got %q: %w", tapName, tap.Name(), err) } vmnet, err := vmnet.New(ctx, socket) if err != nil { return "", err } vifUUID, err := uuid.Parse(uuidStr) if err != nil { return "", err } vif, err := vmnet.ConnectVif(vifUUID) if err != nil { return "", err } go tap2vif(vif, tap) go vif2tap(tap, vif) return tapName, nil } func tap2vif(vif *vmnet.Vif, r io.Reader) { b := make([]byte, 65536) for { n, err := r.Read(b) if err != nil { if errors.Is(err, io.EOF) { return } panic(fmt.Errorf("tap2vif: read: %w", err)) } if err := vif.Write(b[:n]); err != nil { if errors.Is(err, io.EOF) { return } panic(fmt.Errorf("tap2vif: write: %w", err)) } } } func vif2tap(w io.Writer, vif *vmnet.Vif) { for { b, err := vif.Read() if err != nil { if errors.Is(err, io.EOF) { return } panic(fmt.Errorf("vif2tap: read: %w", err)) } if _, err := w.Write(b); err != nil { if errors.Is(err, io.EOF) { return } panic(fmt.Errorf("vif2tap: write: %w", err)) } } } rootlesskit-0.14.6/pkg/parent/000077500000000000000000000000001417776672600162655ustar00rootroot00000000000000rootlesskit-0.14.6/pkg/parent/cgrouputil/000077500000000000000000000000001417776672600204625ustar00rootroot00000000000000rootlesskit-0.14.6/pkg/parent/cgrouputil/cgrouputil.go000066400000000000000000000065361417776672600232200ustar00rootroot00000000000000package cgrouputil import ( "errors" "fmt" "os" "path/filepath" "strings" "github.com/moby/sys/mountinfo" "github.com/sirupsen/logrus" ) // EvacuateCgroup2 evacuates cgroup2. Must be called in the parent PID namespace. // // When the current process belongs to "/foo" group (visible under "/sys/fs/cgroup/foo") and evac is like "bar", // - All processes in the "/foo" group are moved to "/foo/bar" group, by writing PIDs into "/sys/fs/cgroup/foo/bar/cgroup.procs" // - As many controllers as possible are enabled for "/foo/*" groups, by writing "/sys/fs/cgroup/foo/cgroup.subtree_control" // // Returns nil when cgroup2 is not enabled. // Ported from https://github.com/rootless-containers/usernetes/commit/46ad812db7489914897ff8b1774f2fab0efda62b func EvacuateCgroup2(evac string) error { if evac == "" { return errors.New("got empty evacuation group name") } if strings.Contains(evac, "/") { return fmt.Errorf("unexpected evacuation group name %q: must not contain \"/\"", evac) } mountpoint := findCgroup2Mountpoint() if mountpoint == "" { logrus.Warn("cgroup2 is not mounted. cgroup2 evacuation is discarded.") return nil } oldGroup := getCgroup2(os.Getpid()) if mountpoint == "" { logrus.Warn("process is not running with cgroup2. cgroup2 evacuation is discarded.") return nil } newGroup := filepath.Join(oldGroup, evac) oldPath := filepath.Join(mountpoint, oldGroup) newPath := filepath.Join(mountpoint, newGroup) if err := os.MkdirAll(newPath, 0755); err != nil { return err } // evacuate existing procs from oldGroup to newGroup, so that we can enable all controllers including threaded ones cgroupProcsBytes, err := os.ReadFile(filepath.Join(oldPath, "cgroup.procs")) if err != nil { return err } for _, pidStr := range strings.Split(string(cgroupProcsBytes), "\n") { if pidStr == "" || pidStr == "0" { continue } if err := os.WriteFile(filepath.Join(newPath, "cgroup.procs"), []byte(pidStr), 0644); err != nil { logrus.WithError(err).Warnf("failed to move process %s to cgroup %q", pidStr, newGroup) } } // enable controllers for all subgroups under the oldGroup controllerBytes, err := os.ReadFile(filepath.Join(oldPath, "cgroup.controllers")) if err != nil { return err } for _, controller := range strings.Fields(string(controllerBytes)) { logrus.Debugf("enabling controller %q", controller) if err := os.WriteFile(filepath.Join(oldPath, "cgroup.subtree_control"), []byte("+"+controller), 0644); err != nil { logrus.WithError(err).Warnf("failed to enable controller %q", controller) } } return nil } func findCgroup2Mountpoint() string { f := mountinfoFSTypeFilter("cgroup2") mounts, err := mountinfo.GetMounts(f) if err != nil { logrus.WithError(err).Warn("failed to find mountpoint for cgroup2") return "" } if len(mounts) == 0 { return "" } if len(mounts) != 1 { logrus.Warnf("expected single mountpoint for cgroup2, got %d", len(mounts)) } return mounts[0].Mountpoint } func getCgroup2(pid int) string { p := fmt.Sprintf("/proc/%d/cgroup", pid) b, err := os.ReadFile(p) if err != nil { logrus.WithError(err).Warnf("failed to read %q", p) return "" } return getCgroup2FromProcPidCgroup(b) } func getCgroup2FromProcPidCgroup(b []byte) string { for _, l := range strings.Split(string(b), "\n") { if strings.HasPrefix(l, "0::") { return strings.TrimPrefix(l, "0::") } } return "" } rootlesskit-0.14.6/pkg/parent/cgrouputil/moby_sys_mountinfo_tmphack.go000066400000000000000000000022231417776672600264610ustar00rootroot00000000000000package cgrouputil import ( "fmt" "reflect" "strings" "github.com/moby/sys/mountinfo" ) // mountinfoFSType returns m.Fstype on mountinfo v0.1.3, // returns m.FSType on mountinfo v0.4.0. func mountinfoFSType(m *mountinfo.Info) (string, bool) { elem := reflect.ValueOf(m).Elem() for i := 0; i < elem.NumField(); i++ { typeField := elem.Type().Field(i) name := typeField.Name typ := typeField.Type.String() if strings.ToLower(name) == "fstype" && typ == "string" { value := elem.Field(i).String() return value, true } } return "", false } // mountinfoFSTypeFilter is reimplementation of mountinfo.FSTypeFilter. // Temporary solution for supporting both moby/sys/mountinfo@v0.1.3 and @v0.4.0 . // Will be removed after downstream projects stop using @v0.1.3 . func mountinfoFSTypeFilter(fstype ...string) mountinfo.FilterFunc { return func(m *mountinfo.Info) (bool, bool) { mFSType, ok := mountinfoFSType(m) if !ok { panic(fmt.Errorf("failed to get Fstype (FSType) of %+v", m)) } for _, t := range fstype { if mFSType == t { return false, false // don't skeep, keep going } } return true, false // skip, keep going } } rootlesskit-0.14.6/pkg/parent/idtools/000077500000000000000000000000001417776672600177425ustar00rootroot00000000000000rootlesskit-0.14.6/pkg/parent/idtools/idtools.go000066400000000000000000000155171417776672600217570ustar00rootroot00000000000000// Package idtools is forked from https://github.com/moby/moby/tree/298ba5b13150bfffe8414922a951a7a793276d31/pkg/idtools package idtools import ( "bufio" "fmt" "os" "strconv" "strings" ) // IDMap contains a single entry for user namespace range remapping. An array // of IDMap entries represents the structure that will be provided to the Linux // kernel for creating a user namespace. type IDMap struct { ContainerID int `json:"container_id"` HostID int `json:"host_id"` Size int `json:"size"` } type subIDRange struct { Start int Length int } type ranges []subIDRange func (e ranges) Len() int { return len(e) } func (e ranges) Swap(i, j int) { e[i], e[j] = e[j], e[i] } func (e ranges) Less(i, j int) bool { return e[i].Start < e[j].Start } const ( subuidFileName = "/etc/subuid" subgidFileName = "/etc/subgid" ) // GetRootUIDGID retrieves the remapped root uid/gid pair from the set of maps. // If the maps are empty, then the root uid/gid will default to "real" 0/0 func GetRootUIDGID(uidMap, gidMap []IDMap) (int, int, error) { uid, err := toHost(0, uidMap) if err != nil { return -1, -1, err } gid, err := toHost(0, gidMap) if err != nil { return -1, -1, err } return uid, gid, nil } // toContainer takes an id mapping, and uses it to translate a // host ID to the remapped ID. If no map is provided, then the translation // assumes a 1-to-1 mapping and returns the passed in id func toContainer(hostID int, idMap []IDMap) (int, error) { if idMap == nil { return hostID, nil } for _, m := range idMap { if (hostID >= m.HostID) && (hostID <= (m.HostID + m.Size - 1)) { contID := m.ContainerID + (hostID - m.HostID) return contID, nil } } return -1, fmt.Errorf("Host ID %d cannot be mapped to a container ID", hostID) } // toHost takes an id mapping and a remapped ID, and translates the // ID to the mapped host ID. If no map is provided, then the translation // assumes a 1-to-1 mapping and returns the passed in id # func toHost(contID int, idMap []IDMap) (int, error) { if idMap == nil { return contID, nil } for _, m := range idMap { if (contID >= m.ContainerID) && (contID <= (m.ContainerID + m.Size - 1)) { hostID := m.HostID + (contID - m.ContainerID) return hostID, nil } } return -1, fmt.Errorf("Container ID %d cannot be mapped to a host ID", contID) } // Identity is either a UID and GID pair or a SID (but not both) type Identity struct { UID int GID int SID string } // IdentityMapping contains a mappings of UIDs and GIDs type IdentityMapping struct { uids []IDMap gids []IDMap } // NewIdentityMapping takes a requested user and group name and // using the data from /etc/sub{uid,gid} ranges, creates the // proper uid and gid remapping ranges for that user/group pair func NewIdentityMapping(uid int, username string) (*IdentityMapping, error) { subuidRanges, err := parseSubuid(uid, username) if err != nil { return nil, err } subgidRanges, err := parseSubgid(uid, username) if err != nil { return nil, err } if len(subuidRanges) == 0 { return nil, fmt.Errorf("No subuid ranges found for user %d (%q)", uid, username) } if len(subgidRanges) == 0 { return nil, fmt.Errorf("No subgid ranges found for user %d (%q)", uid, username) } return &IdentityMapping{ uids: createIDMap(subuidRanges), gids: createIDMap(subgidRanges), }, nil } // NewIDMappingsFromMaps creates a new mapping from two slices // Deprecated: this is a temporary shim while transitioning to IDMapping func NewIDMappingsFromMaps(uids []IDMap, gids []IDMap) *IdentityMapping { return &IdentityMapping{uids: uids, gids: gids} } // RootPair returns a uid and gid pair for the root user. The error is ignored // because a root user always exists, and the defaults are correct when the uid // and gid maps are empty. func (i *IdentityMapping) RootPair() Identity { uid, gid, _ := GetRootUIDGID(i.uids, i.gids) return Identity{UID: uid, GID: gid} } // ToHost returns the host UID and GID for the container uid, gid. // Remapping is only performed if the ids aren't already the remapped root ids func (i *IdentityMapping) ToHost(pair Identity) (Identity, error) { var err error target := i.RootPair() if pair.UID != target.UID { target.UID, err = toHost(pair.UID, i.uids) if err != nil { return target, err } } if pair.GID != target.GID { target.GID, err = toHost(pair.GID, i.gids) } return target, err } // ToContainer returns the container UID and GID for the host uid and gid func (i *IdentityMapping) ToContainer(pair Identity) (int, int, error) { uid, err := toContainer(pair.UID, i.uids) if err != nil { return -1, -1, err } gid, err := toContainer(pair.GID, i.gids) return uid, gid, err } // Empty returns true if there are no id mappings func (i *IdentityMapping) Empty() bool { return len(i.uids) == 0 && len(i.gids) == 0 } // UIDs return the UID mapping // TODO: remove this once everything has been refactored to use pairs func (i *IdentityMapping) UIDs() []IDMap { return i.uids } // GIDs return the UID mapping // TODO: remove this once everything has been refactored to use pairs func (i *IdentityMapping) GIDs() []IDMap { return i.gids } func createIDMap(subidRanges ranges) []IDMap { idMap := []IDMap{} containerID := 0 for _, idrange := range subidRanges { idMap = append(idMap, IDMap{ ContainerID: containerID, HostID: idrange.Start, Size: idrange.Length, }) containerID = containerID + idrange.Length } return idMap } func parseSubuid(uid int, username string) (ranges, error) { return parseSubidFile(subuidFileName, uid, username) } func parseSubgid(uid int, username string) (ranges, error) { return parseSubidFile(subgidFileName, uid, username) } // parseSubidFile will read the appropriate file (/etc/subuid or /etc/subgid) // and return all found ranges for a specified user. username is optional. func parseSubidFile(path string, uid int, username string) (ranges, error) { uidS := strconv.Itoa(uid) var rangeList ranges subidFile, err := os.Open(path) if err != nil { return rangeList, err } defer subidFile.Close() s := bufio.NewScanner(subidFile) for s.Scan() { text := strings.TrimSpace(s.Text()) if text == "" || strings.HasPrefix(text, "#") { continue } parts := strings.Split(text, ":") if len(parts) != 3 { return rangeList, fmt.Errorf("Cannot parse subuid/gid information: Format not correct for %s file", path) } if parts[0] == uidS || (username != "" && parts[0] == username) { startid, err := strconv.Atoi(parts[1]) if err != nil { return rangeList, fmt.Errorf("String to int conversion failed during subuid/gid parsing of %s: %v", path, err) } length, err := strconv.Atoi(parts[2]) if err != nil { return rangeList, fmt.Errorf("String to int conversion failed during subuid/gid parsing of %s: %v", path, err) } rangeList = append(rangeList, subIDRange{startid, length}) } } return rangeList, s.Err() } rootlesskit-0.14.6/pkg/parent/parent.go000066400000000000000000000252371417776672600201160ustar00rootroot00000000000000package parent import ( "context" "errors" "fmt" "net" "net/http" "os" "os/exec" "os/user" "path/filepath" "strconv" "syscall" "github.com/gofrs/flock" "github.com/gorilla/mux" "github.com/sirupsen/logrus" "golang.org/x/sys/unix" "github.com/rootless-containers/rootlesskit/pkg/api/router" "github.com/rootless-containers/rootlesskit/pkg/common" "github.com/rootless-containers/rootlesskit/pkg/msgutil" "github.com/rootless-containers/rootlesskit/pkg/network" "github.com/rootless-containers/rootlesskit/pkg/parent/cgrouputil" "github.com/rootless-containers/rootlesskit/pkg/parent/idtools" "github.com/rootless-containers/rootlesskit/pkg/port" "github.com/rootless-containers/rootlesskit/pkg/sigproxy" "github.com/rootless-containers/rootlesskit/pkg/sigproxy/signal" ) type Opt struct { PipeFDEnvKey string // needs to be set StateDir string // directory needs to be precreated StateDirEnvKey string // optional env key to propagate StateDir value NetworkDriver network.ParentDriver // nil for HostNetwork PortDriver port.ParentDriver // nil for --port-driver=none PublishPorts []port.Spec CreatePIDNS bool CreateCgroupNS bool CreateUTSNS bool CreateIPCNS bool ParentEUIDEnvKey string // optional env key to propagate geteuid() value ParentEGIDEnvKey string // optional env key to propagate getegid() value Propagation string EvacuateCgroup2 string // e.g. "rootlesskit_evacuation" } // Documented state files. Undocumented ones are subject to change. const ( StateFileLock = "lock" StateFileChildPID = "child_pid" // decimal pid number text StateFileAPISock = "api.sock" // REST API Socket ) func checkPreflight(opt Opt) error { if opt.PipeFDEnvKey == "" { return errors.New("pipe FD env key is not set") } if opt.StateDir == "" { return errors.New("state dir is not set") } if !filepath.IsAbs(opt.StateDir) { return errors.New("state dir must be absolute") } if stat, err := os.Stat(opt.StateDir); err != nil || !stat.IsDir() { return fmt.Errorf("state dir is inaccessible: %w", err) } if os.Geteuid() == 0 { logrus.Warn("Running RootlessKit as the root user is unsupported.") } warnSysctl() // invalid propagation doesn't result in an error warnPropagation(opt.Propagation) return nil } // createCleanupLock uses LOCK_SH for preventing automatic cleanup of // "/tmp/" caused by by systemd. // // This LOCK_SH lock is different from our lock file in the state dir. // We could unify the lock file into LOCK_SH, but we are still keeping // the lock file for a historical reason. // // See: // - https://github.com/rootless-containers/rootlesskit/issues/185 // - https://github.com/rootless-containers/rootlesskit/pull/188 func createCleanupLock(sDir string) error { //lock state dir when using /tmp/ path stateDir, err := os.Open(sDir) if err != nil { return err } err = unix.Flock(int(stateDir.Fd()), unix.LOCK_SH) if err != nil { logrus.Warnf("Failed to lock the state dir %s", sDir) } return nil } // LockStateDir creates and locks "lock" file in the state dir. func LockStateDir(stateDir string) (*flock.Flock, error) { lockPath := filepath.Join(stateDir, StateFileLock) lock := flock.New(lockPath) locked, err := lock.TryLock() if err != nil { return nil, fmt.Errorf("failed to lock %s: %w", lockPath, err) } if !locked { return nil, fmt.Errorf("failed to lock %s, another RootlessKit is running with the same state directory?", lockPath) } return lock, nil } func Parent(opt Opt) error { if err := checkPreflight(opt); err != nil { return err } err := createCleanupLock(opt.StateDir) if err != nil { return err } lock, err := LockStateDir(opt.StateDir) if err != nil { return err } defer os.RemoveAll(opt.StateDir) defer lock.Unlock() pipeR, pipeW, err := os.Pipe() if err != nil { return err } cmd := exec.Command("/proc/self/exe", os.Args[1:]...) cmd.SysProcAttr = &syscall.SysProcAttr{ Pdeathsig: syscall.SIGKILL, Cloneflags: syscall.CLONE_NEWUSER | syscall.CLONE_NEWNS, } if opt.NetworkDriver != nil { cmd.SysProcAttr.Unshareflags |= syscall.CLONE_NEWNET } if opt.CreatePIDNS { // cannot be Unshareflags (panics) cmd.SysProcAttr.Cloneflags |= syscall.CLONE_NEWPID } if opt.CreateCgroupNS { cmd.SysProcAttr.Unshareflags |= unix.CLONE_NEWCGROUP } if opt.CreateUTSNS { cmd.SysProcAttr.Unshareflags |= unix.CLONE_NEWUTS } if opt.CreateIPCNS { cmd.SysProcAttr.Unshareflags |= unix.CLONE_NEWIPC } cmd.Stdin = os.Stdin cmd.Stdout = os.Stdout cmd.Stderr = os.Stderr cmd.ExtraFiles = []*os.File{pipeR} cmd.Env = append(os.Environ(), opt.PipeFDEnvKey+"=3") if opt.StateDirEnvKey != "" { cmd.Env = append(cmd.Env, opt.StateDirEnvKey+"="+opt.StateDir) } if opt.ParentEUIDEnvKey != "" { cmd.Env = append(cmd.Env, fmt.Sprintf("%s=%d", opt.ParentEUIDEnvKey, os.Geteuid())) } if opt.ParentEGIDEnvKey != "" { cmd.Env = append(cmd.Env, fmt.Sprintf("%s=%d", opt.ParentEGIDEnvKey, os.Getegid())) } if err := cmd.Start(); err != nil { return fmt.Errorf("failed to start the child: %w", err) } if err := setupUIDGIDMap(cmd.Process.Pid); err != nil { return fmt.Errorf("failed to setup UID/GID map: %w", err) } sigc := sigproxy.ForwardAllSignals(context.TODO(), cmd.Process.Pid) defer signal.StopCatch(sigc) if opt.EvacuateCgroup2 != "" { if err := cgrouputil.EvacuateCgroup2(opt.EvacuateCgroup2); err != nil { return err } } // send message 0 msg := common.Message{ Stage: 0, Message0: common.Message0{}, } if _, err := msgutil.MarshalToWriter(pipeW, &msg); err != nil { return err } // configure Network driver msg = common.Message{ Stage: 1, Message1: common.Message1{ StateDir: opt.StateDir, }, } if opt.NetworkDriver != nil { netMsg, cleanupNetwork, err := opt.NetworkDriver.ConfigureNetwork(cmd.Process.Pid, opt.StateDir) if cleanupNetwork != nil { defer cleanupNetwork() } if err != nil { return fmt.Errorf("failed to setup network %+v: %w", opt.NetworkDriver, err) } msg.Message1.Network = *netMsg } // configure Port driver portDriverInitComplete := make(chan struct{}) portDriverQuit := make(chan struct{}) portDriverErr := make(chan error) if opt.PortDriver != nil { msg.Message1.Port.Opaque = opt.PortDriver.OpaqueForChild() cctx := &port.ChildContext{ PID: cmd.Process.Pid, IP: net.ParseIP(msg.Network.IP).To4(), } go func() { portDriverErr <- opt.PortDriver.RunParentDriver(portDriverInitComplete, portDriverQuit, cctx) }() } // send message 1 if _, err := msgutil.MarshalToWriter(pipeW, &msg); err != nil { return err } if err := pipeW.Close(); err != nil { return err } if opt.PortDriver != nil { // wait for port driver to be ready select { case <-portDriverInitComplete: case err = <-portDriverErr: return err } // publish ports for _, p := range opt.PublishPorts { st, err := opt.PortDriver.AddPort(context.TODO(), p) if err != nil { return fmt.Errorf("failed to expose port %v: %w", p, err) } logrus.Debugf("published port %v", st) } } // after child is fully configured, write PID to child_pid file childPIDPath := filepath.Join(opt.StateDir, StateFileChildPID) if err := os.WriteFile(childPIDPath, []byte(strconv.Itoa(cmd.Process.Pid)), 0444); err != nil { return fmt.Errorf("failed to write the child PID %d to %s: %w", cmd.Process.Pid, childPIDPath, err) } // listens the API apiSockPath := filepath.Join(opt.StateDir, StateFileAPISock) apiCloser, err := listenServeAPI(apiSockPath, &router.Backend{ StateDir: opt.StateDir, ChildPID: cmd.Process.Pid, NetworkDriver: opt.NetworkDriver, PortDriver: opt.PortDriver, }) if err != nil { return err } // block until the child exits if err := cmd.Wait(); err != nil { return fmt.Errorf("child exited: %w", err) } // close the API socket if err := apiCloser.Close(); err != nil { return fmt.Errorf("failed to close %s: %w", apiSockPath, err) } // shut down port driver if opt.PortDriver != nil { portDriverQuit <- struct{}{} err = <-portDriverErr } return err } func newugidmapArgs() ([]string, []string, error) { u, err := user.Current() if err != nil { return nil, nil, err } uidMap := []string{ "0", u.Uid, "1", } gidMap := []string{ "0", u.Gid, "1", } uid, err := strconv.Atoi(u.Uid) if err != nil { return nil, nil, err } ims, err := idtools.NewIdentityMapping(uid, u.Username) if err != nil { return nil, nil, err } uidMapLast := 1 for _, im := range ims.UIDs() { uidMap = append(uidMap, []string{ strconv.Itoa(uidMapLast), strconv.Itoa(im.HostID), strconv.Itoa(im.Size), }...) uidMapLast += im.Size } gidMapLast := 1 for _, im := range ims.GIDs() { gidMap = append(gidMap, []string{ strconv.Itoa(gidMapLast), strconv.Itoa(im.HostID), strconv.Itoa(im.Size), }...) gidMapLast += im.Size } return uidMap, gidMap, nil } func setupUIDGIDMap(pid int) error { uArgs, gArgs, err := newugidmapArgs() if err != nil { return fmt.Errorf("failed to compute uid/gid map: %w", err) } pidS := strconv.Itoa(pid) cmd := exec.Command("newuidmap", append([]string{pidS}, uArgs...)...) out, err := cmd.CombinedOutput() if err != nil { return fmt.Errorf("newuidmap %s %v failed: %s: %w", pidS, uArgs, string(out), err) } cmd = exec.Command("newgidmap", append([]string{pidS}, gArgs...)...) out, err = cmd.CombinedOutput() if err != nil { return fmt.Errorf("newgidmap %s %v failed: %s: %w", pidS, gArgs, string(out), err) } return nil } // apiCloser is implemented by *http.Server type apiCloser interface { Close() error Shutdown(context.Context) error } func listenServeAPI(socketPath string, backend *router.Backend) (apiCloser, error) { r := mux.NewRouter() router.AddRoutes(r, backend) srv := &http.Server{Handler: r} err := os.RemoveAll(socketPath) if err != nil { return nil, err } l, err := net.Listen("unix", socketPath) if err != nil { return nil, err } go srv.Serve(l) return srv, nil } // InitStateDir removes everything in the state dir except the lock file. // This is needed because when the previous execution crashed, the state dir may not be removed successfully. // // InitStateDir must be called before calling parent functions. func InitStateDir(stateDir string) error { if err := os.MkdirAll(stateDir, 0755); err != nil { return err } lk, err := LockStateDir(stateDir) if err != nil { return err } defer lk.Unlock() stateDirStuffs, err := os.ReadDir(stateDir) if err != nil { return err } for _, f := range stateDirStuffs { if f.Name() == StateFileLock { continue } p := filepath.Join(stateDir, f.Name()) if err := os.RemoveAll(p); err != nil { return fmt.Errorf("failed to remove %s: %w", p, err) } } return nil } rootlesskit-0.14.6/pkg/parent/parent_test.go000066400000000000000000000010121417776672600211360ustar00rootroot00000000000000package parent import ( "os" "testing" "golang.org/x/sys/unix" ) func TestBSDLockFileCreated(t *testing.T) { tmpDir, err := os.MkdirTemp("", "rootlesskit") if err != nil { t.Fatalf("expected no error, got %q", err) } err = createCleanupLock(tmpDir) if err != nil { t.Fatalf("expected no error, got %q", err) } stateDir, _ := os.Open(tmpDir) err = unix.Flock(int(stateDir.Fd()), unix.LOCK_EX|unix.LOCK_NB) if err == nil { t.Fatal("expected that there was an error because of existing LOCK_SH") } } rootlesskit-0.14.6/pkg/parent/warn.go000066400000000000000000000041521417776672600175650ustar00rootroot00000000000000package parent import ( "os" "strconv" "strings" "github.com/moby/sys/mountinfo" "github.com/sirupsen/logrus" ) func warnPropagation(propagation string) { mounts, err := mountinfo.GetMounts(mountinfo.SingleEntryFilter("/")) if err != nil || len(mounts) < 1 { logrus.WithError(err).Warn("Failed to parse mountinfo") return } root := mounts[0] // 1. When running on a "sane" host, root.Optional is like "shared:1". ("shared" in findmnt(8) output) // 2. When running inside a container, root.Optional is like "master:363". ("private, slave" in findmnt(8) output) // // Setting non-private propagation is supported for 1, unsupported for 2. if !strings.Contains(propagation, "private") && !strings.Contains(root.Optional, "shared") { logrus.Warnf("The host root filesystem is mounted as %q. Setting child propagation to %q is not supported.", root.Optional, propagation) } } // warnSysctl verifies /proc/sys/kernel/unprivileged_userns_clone and /proc/sys/user/max_user_namespaces func warnSysctl() { uuc, err := os.ReadFile("/proc/sys/kernel/unprivileged_userns_clone") // The file exists only on distros with the "add sysctl to disallow unprivileged CLONE_NEWUSER by default" patch. // (e.g. Debian and Arch) if err == nil { s := strings.TrimSpace(string(uuc)) i, err := strconv.ParseInt(s, 10, 64) if err != nil { logrus.WithError(err).Warnf("Failed to parse /proc/sys/kernel/unprivileged_userns_clone (%q)", s) } else if i == 0 { logrus.Warn("/proc/sys/kernel/unprivileged_userns_clone needs to be set to 1.") } } mun, err := os.ReadFile("/proc/sys/user/max_user_namespaces") if err == nil { s := strings.TrimSpace(string(mun)) i, err := strconv.ParseInt(strings.TrimSpace(string(mun)), 10, 64) if err != nil { logrus.WithError(err).Warnf("Failed to parse /proc/sys/user/max_user_namespaces (%q)", s) } else if i == 0 { logrus.Warn("/proc/sys/user/max_user_namespaces needs to be set to non-zero.") } else { threshold := int64(1024) if i < threshold { logrus.Warnf("/proc/sys/user/max_user_namespaces=%d may be low. Consider setting to >= %d.", i, threshold) } } } } rootlesskit-0.14.6/pkg/port/000077500000000000000000000000001417776672600157605ustar00rootroot00000000000000rootlesskit-0.14.6/pkg/port/builtin/000077500000000000000000000000001417776672600174265ustar00rootroot00000000000000rootlesskit-0.14.6/pkg/port/builtin/builtin.go000066400000000000000000000007011417776672600214210ustar00rootroot00000000000000package builtin import ( "io" "github.com/rootless-containers/rootlesskit/pkg/port" "github.com/rootless-containers/rootlesskit/pkg/port/builtin/child" "github.com/rootless-containers/rootlesskit/pkg/port/builtin/parent" ) var ( NewParentDriver func(logWriter io.Writer, stateDir string) (port.ParentDriver, error) = parent.NewDriver NewChildDriver func(logWriter io.Writer) port.ChildDriver = child.NewDriver ) rootlesskit-0.14.6/pkg/port/builtin/builtin_test.go000066400000000000000000000011201417776672600224540ustar00rootroot00000000000000package builtin import ( "os" "testing" "github.com/rootless-containers/rootlesskit/pkg/port" "github.com/rootless-containers/rootlesskit/pkg/port/testsuite" ) func TestMain(m *testing.M) { cf := func() port.ChildDriver { return NewChildDriver(os.Stderr) } testsuite.Main(m, cf) } func TestBuiltIn(t *testing.T) { tmpDir, err := os.MkdirTemp("", "test-builtin") if err != nil { t.Fatal(err) } defer os.RemoveAll(tmpDir) d, err := NewParentDriver(os.Stderr, tmpDir) if err != nil { t.Fatal(err) } pf := func() port.ParentDriver { return d } testsuite.Run(t, pf) } rootlesskit-0.14.6/pkg/port/builtin/child/000077500000000000000000000000001417776672600205115ustar00rootroot00000000000000rootlesskit-0.14.6/pkg/port/builtin/child/child.go000066400000000000000000000066471417776672600221400ustar00rootroot00000000000000package child import ( "errors" "fmt" "io" "net" "os" "strconv" "strings" "golang.org/x/sys/unix" "github.com/rootless-containers/rootlesskit/pkg/msgutil" "github.com/rootless-containers/rootlesskit/pkg/port" "github.com/rootless-containers/rootlesskit/pkg/port/builtin/msg" opaquepkg "github.com/rootless-containers/rootlesskit/pkg/port/builtin/opaque" ) func NewDriver(logWriter io.Writer) port.ChildDriver { return &childDriver{ logWriter: logWriter, } } type childDriver struct { logWriter io.Writer } func (d *childDriver) RunChildDriver(opaque map[string]string, quit <-chan struct{}) error { socketPath := opaque[opaquepkg.SocketPath] if socketPath == "" { return errors.New("socket path not set") } childReadyPipePath := opaque[opaquepkg.ChildReadyPipePath] if childReadyPipePath == "" { return errors.New("child ready pipe path not set") } childReadyPipeW, err := os.OpenFile(childReadyPipePath, os.O_WRONLY, os.ModeNamedPipe) if err != nil { return err } ln, err := net.ListenUnix("unix", &net.UnixAddr{ Name: socketPath, Net: "unix", }) if err != nil { return err } // write nothing, just close if err = childReadyPipeW.Close(); err != nil { return err } stopAccept := make(chan struct{}, 1) go func() { <-quit stopAccept <- struct{}{} ln.Close() }() for { c, err := ln.AcceptUnix() if err != nil { select { case <-stopAccept: return nil default: } return err } go func() { if rerr := d.routine(c); rerr != nil { rep := msg.Reply{ Error: rerr.Error(), } msgutil.MarshalToWriter(c, &rep) } c.Close() }() } } func (d *childDriver) routine(c *net.UnixConn) error { var req msg.Request if _, err := msgutil.UnmarshalFromReader(c, &req); err != nil { return err } switch req.Type { case msg.RequestTypeInit: return d.handleConnectInit(c, &req) case msg.RequestTypeConnect: return d.handleConnectRequest(c, &req) default: return fmt.Errorf("unknown request type %q", req.Type) } } func (d *childDriver) handleConnectInit(c *net.UnixConn, req *msg.Request) error { _, err := msgutil.MarshalToWriter(c, nil) return err } func (d *childDriver) handleConnectRequest(c *net.UnixConn, req *msg.Request) error { switch req.Proto { case "tcp": case "tcp4": case "tcp6": case "udp": case "udp4": case "udp6": default: return fmt.Errorf("unknown proto: %q", req.Proto) } // dialProto does not need "4", "6" suffix dialProto := strings.TrimSuffix(strings.TrimSuffix(req.Proto, "6"), "4") var dialer net.Dialer ip := req.IP if ip == "" { ip = "127.0.0.1" } else { p := net.ParseIP(ip) if p == nil { return fmt.Errorf("invalid IP: %q", ip) } ip = p.String() } targetConn, err := dialer.Dial(dialProto, net.JoinHostPort(ip, strconv.Itoa(req.Port))) if err != nil { return err } defer targetConn.Close() // no effect on duplicated FD targetConnFiler, ok := targetConn.(filer) if !ok { return fmt.Errorf("unknown target connection: %+v", targetConn) } targetConnFile, err := targetConnFiler.File() if err != nil { return err } defer targetConnFile.Close() oob := unix.UnixRights(int(targetConnFile.Fd())) f, err := c.File() if err != nil { return err } defer f.Close() for { err = unix.Sendmsg(int(f.Fd()), []byte("dummy"), oob, nil, 0) if err != unix.EINTR { break } } return err } // filer is implemented by *net.TCPConn and *net.UDPConn type filer interface { File() (f *os.File, err error) } rootlesskit-0.14.6/pkg/port/builtin/msg/000077500000000000000000000000001417776672600202145ustar00rootroot00000000000000rootlesskit-0.14.6/pkg/port/builtin/msg/msg.go000066400000000000000000000060421417776672600213330ustar00rootroot00000000000000package msg import ( "errors" "fmt" "net" "time" "golang.org/x/sys/unix" "github.com/rootless-containers/rootlesskit/pkg/msgutil" "github.com/rootless-containers/rootlesskit/pkg/port" ) const ( RequestTypeInit = "init" RequestTypeConnect = "connect" ) // Request and Response are encoded as JSON with uint32le length header. type Request struct { Type string // "init" or "connect" Proto string // "tcp", "tcp4", "tcp6", "udp", "udp4", "udp6" IP string Port int } // Reply may contain FD as OOB type Reply struct { Error string } // Initiate sends "init" request to the child UNIX socket. func Initiate(c *net.UnixConn) error { req := Request{ Type: RequestTypeInit, } if _, err := msgutil.MarshalToWriter(c, &req); err != nil { return err } if err := c.CloseWrite(); err != nil { return err } var rep Reply if _, err := msgutil.UnmarshalFromReader(c, &rep); err != nil { return err } return c.CloseRead() } // ConnectToChild connects to the child UNIX socket, and obtains TCP or UDP socket FD // that corresponds to the port spec. func ConnectToChild(c *net.UnixConn, spec port.Spec) (int, error) { req := Request{ Type: RequestTypeConnect, Proto: spec.Proto, Port: spec.ChildPort, IP: spec.ChildIP, } if _, err := msgutil.MarshalToWriter(c, &req); err != nil { return 0, err } if err := c.CloseWrite(); err != nil { return 0, err } oobSpace := unix.CmsgSpace(4) oob := make([]byte, oobSpace) var ( oobN int err error ) for { _, oobN, _, _, err = c.ReadMsgUnix(nil, oob) if err != unix.EINTR { break } } if err != nil { return 0, err } if oobN != oobSpace { return 0, fmt.Errorf("expected OOB space %d, got %d", oobSpace, oobN) } oob = oob[:oobN] fd, err := parseFDFromOOB(oob) if err != nil { return 0, err } if err := c.CloseRead(); err != nil { return 0, err } return fd, nil } // ConnectToChildWithSocketPath wraps ConnectToChild func ConnectToChildWithSocketPath(socketPath string, spec port.Spec) (int, error) { var dialer net.Dialer conn, err := dialer.Dial("unix", socketPath) if err != nil { return 0, err } defer conn.Close() c := conn.(*net.UnixConn) return ConnectToChild(c, spec) } // ConnectToChildWithRetry retries ConnectToChild every (i*5) milliseconds. func ConnectToChildWithRetry(socketPath string, spec port.Spec, retries int) (int, error) { for i := 0; i < retries; i++ { fd, err := ConnectToChildWithSocketPath(socketPath, spec) if i == retries-1 && err != nil { return 0, err } if err == nil { return fd, err } // TODO: backoff time.Sleep(time.Duration(i*5) * time.Millisecond) } // NOT REACHED return 0, errors.New("reached max retry") } func parseFDFromOOB(oob []byte) (int, error) { scms, err := unix.ParseSocketControlMessage(oob) if err != nil { return 0, err } if len(scms) != 1 { return 0, fmt.Errorf("unexpected scms: %v", scms) } scm := scms[0] fds, err := unix.ParseUnixRights(&scm) if err != nil { return 0, err } if len(fds) != 1 { return 0, fmt.Errorf("unexpected fds: %v", fds) } return fds[0], nil } rootlesskit-0.14.6/pkg/port/builtin/opaque/000077500000000000000000000000001417776672600207205ustar00rootroot00000000000000rootlesskit-0.14.6/pkg/port/builtin/opaque/opaque.go000066400000000000000000000001631417776672600225410ustar00rootroot00000000000000package opaque const ( SocketPath = "builtin.socketpath" ChildReadyPipePath = "builtin.readypipepath" ) rootlesskit-0.14.6/pkg/port/builtin/parent/000077500000000000000000000000001417776672600207175ustar00rootroot00000000000000rootlesskit-0.14.6/pkg/port/builtin/parent/parent.go000066400000000000000000000144741417776672600225510ustar00rootroot00000000000000package parent import ( "context" "errors" "fmt" "io" "net" "os" "path/filepath" "strconv" "strings" "sync" "syscall" "time" "github.com/rootless-containers/rootlesskit/pkg/api" "github.com/rootless-containers/rootlesskit/pkg/port" "github.com/rootless-containers/rootlesskit/pkg/port/builtin/msg" "github.com/rootless-containers/rootlesskit/pkg/port/builtin/opaque" "github.com/rootless-containers/rootlesskit/pkg/port/builtin/parent/tcp" "github.com/rootless-containers/rootlesskit/pkg/port/builtin/parent/udp" "github.com/rootless-containers/rootlesskit/pkg/port/portutil" ) // NewDriver for builtin driver. func NewDriver(logWriter io.Writer, stateDir string) (port.ParentDriver, error) { // TODO: consider using socketpair FD instead of socket file socketPath := filepath.Join(stateDir, ".bp.sock") childReadyPipePath := filepath.Join(stateDir, ".bp-ready.pipe") // remove the path just in case the previous rootlesskit instance crashed if err := os.RemoveAll(childReadyPipePath); err != nil { return nil, fmt.Errorf("cannot remove %s: %w", childReadyPipePath, err) } if err := syscall.Mkfifo(childReadyPipePath, 0600); err != nil { return nil, fmt.Errorf("cannot mkfifo %s: %w", childReadyPipePath, err) } d := driver{ logWriter: logWriter, socketPath: socketPath, childReadyPipePath: childReadyPipePath, ports: make(map[int]*port.Status, 0), stoppers: make(map[int]func(context.Context) error, 0), nextID: 1, } return &d, nil } type driver struct { logWriter io.Writer socketPath string childReadyPipePath string mu sync.Mutex ports map[int]*port.Status stoppers map[int]func(context.Context) error nextID int } func (d *driver) Info(ctx context.Context) (*api.PortDriverInfo, error) { info := &api.PortDriverInfo{ Driver: "builtin", Protos: []string{"tcp", "tcp4", "tcp6", "udp", "udp4", "udp6"}, DisallowLoopbackChildIP: false, } return info, nil } func (d *driver) OpaqueForChild() map[string]string { return map[string]string{ opaque.SocketPath: d.socketPath, opaque.ChildReadyPipePath: d.childReadyPipePath, } } func (d *driver) RunParentDriver(initComplete chan struct{}, quit <-chan struct{}, _ *port.ChildContext) error { childReadyPipeR, err := os.OpenFile(d.childReadyPipePath, os.O_RDONLY, os.ModeNamedPipe) if err != nil { return err } if _, err = io.ReadAll(childReadyPipeR); err != nil { return err } childReadyPipeR.Close() var dialer net.Dialer conn, err := dialer.Dial("unix", d.socketPath) if err != nil { return err } err = msg.Initiate(conn.(*net.UnixConn)) conn.Close() if err != nil { return err } initComplete <- struct{}{} <-quit return nil } func isEPERM(err error) bool { k := "permission denied" // As of Go 1.14, errors.Is(err, syscall.EPERM) does not seem to work for // "listen tcp 0.0.0.0:80: bind: permission denied" error from net.ListenTCP(). return errors.Is(err, syscall.EPERM) || strings.Contains(err.Error(), k) } // annotateEPERM annotates origErr for human-readability func annotateEPERM(origErr error, spec port.Spec) error { // Read "net.ipv4.ip_unprivileged_port_start" value (typically 1024) // TODO: what for IPv6? // NOTE: sync.Once should not be used here b, e := os.ReadFile("/proc/sys/net/ipv4/ip_unprivileged_port_start") if e != nil { return origErr } start, e := strconv.Atoi(strings.TrimSpace(string(b))) if e != nil { return origErr } if spec.ParentPort >= start { // origErr is unrelated to ip_unprivileged_port_start return origErr } text := fmt.Sprintf("cannot expose privileged port %d, you can add 'net.ipv4.ip_unprivileged_port_start=%d' to /etc/sysctl.conf (currently %d)", spec.ParentPort, spec.ParentPort, start) if filepath.Base(os.Args[0]) == "rootlesskit" { // NOTE: The following sentence is appended only if Args[0] == "rootlesskit", because it does not apply to Podman (as of Podman v1.9). // Podman launches the parent driver in the child user namespace (but in the parent network namespace), which disables the file capability. text += ", or set CAP_NET_BIND_SERVICE on rootlesskit binary" } text += fmt.Sprintf(", or choose a larger port number (>= %d)", start) return fmt.Errorf(text+": %w", origErr) } func (d *driver) AddPort(ctx context.Context, spec port.Spec) (*port.Status, error) { d.mu.Lock() err := portutil.ValidatePortSpec(spec, d.ports) d.mu.Unlock() if err != nil { return nil, err } // NOTE: routineStopCh is close-only channel. Do not send any data. // See commit 4803f18fae1e39d200d98f09e445a97ccd6f5526 `Revert "port/builtin: RemovePort() block until conn is closed"` routineStopCh := make(chan struct{}) routineStoppedCh := make(chan error) routineStop := func(ctx context.Context) error { close(routineStopCh) select { case stoppedResult, stoppedResultOk := <-routineStoppedCh: if stoppedResultOk { return stoppedResult } return errors.New("routineStoppedCh was closed without sending data?") case <-ctx.Done(): return fmt.Errorf("timed out while waiting for routineStoppedCh after closing routineStopCh: %w", err) } } switch spec.Proto { case "tcp", "tcp4", "tcp6": err = tcp.Run(d.socketPath, spec, routineStopCh, routineStoppedCh, d.logWriter) case "udp", "udp4", "udp6": err = udp.Run(d.socketPath, spec, routineStopCh, routineStoppedCh, d.logWriter) default: // NOTREACHED return nil, errors.New("spec was not validated?") } if err != nil { if isEPERM(err) { err = annotateEPERM(err, spec) } return nil, err } d.mu.Lock() id := d.nextID st := port.Status{ ID: id, Spec: spec, } d.ports[id] = &st d.stoppers[id] = routineStop d.nextID++ d.mu.Unlock() return &st, nil } func (d *driver) ListPorts(ctx context.Context) ([]port.Status, error) { var ports []port.Status d.mu.Lock() for _, p := range d.ports { ports = append(ports, *p) } d.mu.Unlock() return ports, nil } func (d *driver) RemovePort(ctx context.Context, id int) error { d.mu.Lock() defer d.mu.Unlock() stop, ok := d.stoppers[id] if !ok { return fmt.Errorf("unknown id: %d", id) } if _, ok := ctx.Deadline(); !ok { var cancel context.CancelFunc ctx, cancel = context.WithTimeout(ctx, 5*time.Second) defer cancel() } err := stop(ctx) delete(d.stoppers, id) delete(d.ports, id) return err } rootlesskit-0.14.6/pkg/port/builtin/parent/tcp/000077500000000000000000000000001417776672600215055ustar00rootroot00000000000000rootlesskit-0.14.6/pkg/port/builtin/parent/tcp/tcp.go000066400000000000000000000041251417776672600226240ustar00rootroot00000000000000package tcp import ( "fmt" "io" "net" "os" "strconv" "sync" "github.com/rootless-containers/rootlesskit/pkg/port" "github.com/rootless-containers/rootlesskit/pkg/port/builtin/msg" ) func Run(socketPath string, spec port.Spec, stopCh <-chan struct{}, stoppedCh chan error, logWriter io.Writer) error { ln, err := net.Listen(spec.Proto, net.JoinHostPort(spec.ParentIP, strconv.Itoa(spec.ParentPort))) if err != nil { fmt.Fprintf(logWriter, "listen: %v\n", err) return err } newConns := make(chan net.Conn) go func() { for { c, err := ln.Accept() if err != nil { fmt.Fprintf(logWriter, "accept: %v\n", err) close(newConns) return } newConns <- c } }() go func() { defer func() { stoppedCh <- ln.Close() close(stoppedCh) }() for { select { case c, ok := <-newConns: if !ok { return } go func() { if err := copyConnToChild(c, socketPath, spec, stopCh); err != nil { fmt.Fprintf(logWriter, "copyConnToChild: %v\n", err) return } }() case <-stopCh: return } } }() // no wait return nil } func copyConnToChild(c net.Conn, socketPath string, spec port.Spec, stopCh <-chan struct{}) error { defer c.Close() // get fd from the child as an SCM_RIGHTS cmsg fd, err := msg.ConnectToChildWithRetry(socketPath, spec, 10) if err != nil { return err } f := os.NewFile(uintptr(fd), "") defer f.Close() fc, err := net.FileConn(f) if err != nil { return err } defer fc.Close() bicopy(c, fc, stopCh) return nil } // bicopy is based on libnetwork/cmd/proxy/tcp_proxy.go . // NOTE: sendfile(2) cannot be used for sockets func bicopy(x, y net.Conn, quit <-chan struct{}) { var wg sync.WaitGroup var broker = func(to, from net.Conn) { io.Copy(to, from) if fromTCP, ok := from.(*net.TCPConn); ok { fromTCP.CloseRead() } if toTCP, ok := to.(*net.TCPConn); ok { toTCP.CloseWrite() } wg.Done() } wg.Add(2) go broker(x, y) go broker(y, x) finish := make(chan struct{}) go func() { wg.Wait() close(finish) }() select { case <-quit: case <-finish: } x.Close() y.Close() <-finish } rootlesskit-0.14.6/pkg/port/builtin/parent/udp/000077500000000000000000000000001417776672600215075ustar00rootroot00000000000000rootlesskit-0.14.6/pkg/port/builtin/parent/udp/udp.go000066400000000000000000000025351417776672600226330ustar00rootroot00000000000000package udp import ( "fmt" "io" "net" "os" "strconv" "github.com/rootless-containers/rootlesskit/pkg/port" "github.com/rootless-containers/rootlesskit/pkg/port/builtin/msg" "github.com/rootless-containers/rootlesskit/pkg/port/builtin/parent/udp/udpproxy" ) func Run(socketPath string, spec port.Spec, stopCh <-chan struct{}, stoppedCh chan error, logWriter io.Writer) error { addr, err := net.ResolveUDPAddr(spec.Proto, net.JoinHostPort(spec.ParentIP, strconv.Itoa(spec.ParentPort))) if err != nil { return err } c, err := net.ListenUDP(spec.Proto, addr) if err != nil { return err } udpp := &udpproxy.UDPProxy{ LogWriter: logWriter, Listener: c, BackendDial: func() (*net.UDPConn, error) { // get fd from the child as an SCM_RIGHTS cmsg fd, err := msg.ConnectToChildWithRetry(socketPath, spec, 10) if err != nil { return nil, err } f := os.NewFile(uintptr(fd), "") defer f.Close() fc, err := net.FileConn(f) if err != nil { return nil, err } uc, ok := fc.(*net.UDPConn) if !ok { return nil, fmt.Errorf("file conn doesn't implement *net.UDPConn: %+v", fc) } return uc, nil }, } go udpp.Run() go func() { for { select { case <-stopCh: // udpp.Close closes ln as well udpp.Close() stoppedCh <- nil close(stoppedCh) return } } }() // no wait return nil } rootlesskit-0.14.6/pkg/port/builtin/parent/udp/udpproxy/000077500000000000000000000000001417776672600234015ustar00rootroot00000000000000rootlesskit-0.14.6/pkg/port/builtin/parent/udp/udpproxy/udp_proxy.go000066400000000000000000000075501417776672600257700ustar00rootroot00000000000000// Package udpproxy is from https://raw.githubusercontent.com/docker/libnetwork/fec6476dfa21380bf8ee4d74048515d968c1ee63/cmd/proxy/udp_proxy.go package udpproxy import ( "encoding/binary" "fmt" "io" "net" "strings" "sync" "syscall" "time" ) const ( // UDPConnTrackTimeout is the timeout used for UDP connection tracking UDPConnTrackTimeout = 90 * time.Second // UDPBufSize is the buffer size for the UDP proxy UDPBufSize = 65507 ) // A net.Addr where the IP is split into two fields so you can use it as a key // in a map: type connTrackKey struct { IPHigh uint64 IPLow uint64 Port int } func newConnTrackKey(addr *net.UDPAddr) *connTrackKey { if len(addr.IP) == net.IPv4len { return &connTrackKey{ IPHigh: 0, IPLow: uint64(binary.BigEndian.Uint32(addr.IP)), Port: addr.Port, } } return &connTrackKey{ IPHigh: binary.BigEndian.Uint64(addr.IP[:8]), IPLow: binary.BigEndian.Uint64(addr.IP[8:]), Port: addr.Port, } } type connTrackMap map[connTrackKey]*net.UDPConn // UDPProxy is proxy for which handles UDP datagrams. // From libnetwork udp_proxy.go . type UDPProxy struct { LogWriter io.Writer Listener *net.UDPConn BackendDial func() (*net.UDPConn, error) connTrackTable connTrackMap connTrackLock sync.Mutex } func (proxy *UDPProxy) replyLoop(proxyConn *net.UDPConn, clientAddr *net.UDPAddr, clientKey *connTrackKey) { defer func() { proxy.connTrackLock.Lock() delete(proxy.connTrackTable, *clientKey) proxy.connTrackLock.Unlock() proxyConn.Close() }() readBuf := make([]byte, UDPBufSize) for { proxyConn.SetReadDeadline(time.Now().Add(UDPConnTrackTimeout)) again: read, err := proxyConn.Read(readBuf) if err != nil { if err, ok := err.(*net.OpError); ok && err.Err == syscall.ECONNREFUSED { // This will happen if the last write failed // (e.g: nothing is actually listening on the // proxied port on the container), ignore it // and continue until UDPConnTrackTimeout // expires: goto again } return } for i := 0; i != read; { written, err := proxy.Listener.WriteToUDP(readBuf[i:read], clientAddr) if err != nil { return } i += written } } } // Run starts forwarding the traffic using UDP. func (proxy *UDPProxy) Run() { proxy.connTrackTable = make(connTrackMap) readBuf := make([]byte, UDPBufSize) for { read, from, err := proxy.Listener.ReadFromUDP(readBuf) if err != nil { // NOTE: Apparently ReadFrom doesn't return // ECONNREFUSED like Read do (see comment in // UDPProxy.replyLoop) if !isClosedError(err) { fmt.Fprintf(proxy.LogWriter, "Stopping proxy on udp: %v\n", err) } break } fromKey := newConnTrackKey(from) proxy.connTrackLock.Lock() proxyConn, hit := proxy.connTrackTable[*fromKey] if !hit { proxyConn, err = proxy.BackendDial() if err != nil { fmt.Fprintf(proxy.LogWriter, "Can't proxy a datagram to udp: %v\n", err) proxy.connTrackLock.Unlock() continue } proxy.connTrackTable[*fromKey] = proxyConn go proxy.replyLoop(proxyConn, from, fromKey) } proxy.connTrackLock.Unlock() for i := 0; i != read; { written, err := proxyConn.Write(readBuf[i:read]) if err != nil { fmt.Fprintf(proxy.LogWriter, "Can't proxy a datagram to udp: %v\n", err) break } i += written } } } // Close stops forwarding the traffic. func (proxy *UDPProxy) Close() { proxy.Listener.Close() proxy.connTrackLock.Lock() defer proxy.connTrackLock.Unlock() for _, conn := range proxy.connTrackTable { conn.Close() } } func isClosedError(err error) bool { /* This comparison is ugly, but unfortunately, net.go doesn't export errClosing. * See: * http://golang.org/src/pkg/net/net.go * https://code.google.com/p/go/issues/detail?id=4337 * https://groups.google.com/forum/#!msg/golang-nuts/0_aaCvBmOcM/SptmDyX1XJMJ */ return strings.HasSuffix(err.Error(), "use of closed network connection") } rootlesskit-0.14.6/pkg/port/port.go000066400000000000000000000034721417776672600173010ustar00rootroot00000000000000package port import ( "context" "net" "github.com/rootless-containers/rootlesskit/pkg/api" ) type Spec struct { // Proto is one of ["tcp", "tcp4", "tcp6", "udp", "udp4", "udp6"]. // "tcp" may cause listening on both IPv4 and IPv6. (Corresponds to Go's net.Listen .) Proto string `json:"proto,omitempty"` ParentIP string `json:"parentIP,omitempty"` // IPv4 or IPv6 address. can be empty (0.0.0.0). ParentPort int `json:"parentPort,omitempty"` ChildPort int `json:"childPort,omitempty"` // ChildIP is an IPv4 or IPv6 address. // Default values: // - builtin driver: 127.0.0.1 // - slirp4netns driver: slirp4netns's child IP, e.g., 10.0.2.100 ChildIP string `json:"childIP,omitempty"` } type Status struct { ID int `json:"id"` Spec Spec `json:"spec"` } // Manager MUST be thread-safe. type Manager interface { AddPort(ctx context.Context, spec Spec) (*Status, error) ListPorts(ctx context.Context) ([]Status, error) RemovePort(ctx context.Context, id int) error } // ChildContext is used for RunParentDriver type ChildContext struct { // PID of the child, can be used for ns-entering to the child namespaces. PID int // IP of the tap device IP net.IP } // ParentDriver is a driver for the parent process. type ParentDriver interface { Manager Info(ctx context.Context) (*api.PortDriverInfo, error) // OpaqueForChild typically consists of socket path // for controlling child from parent OpaqueForChild() map[string]string // RunParentDriver signals initComplete when ParentDriver is ready to // serve as Manager. // RunParentDriver blocks until quit is signaled. // // ChildContext is optional. RunParentDriver(initComplete chan struct{}, quit <-chan struct{}, cctx *ChildContext) error } type ChildDriver interface { RunChildDriver(opaque map[string]string, quit <-chan struct{}) error } rootlesskit-0.14.6/pkg/port/portutil/000077500000000000000000000000001417776672600176425ustar00rootroot00000000000000rootlesskit-0.14.6/pkg/port/portutil/portutil.go000066400000000000000000000105571417776672600220630ustar00rootroot00000000000000package portutil import ( "fmt" "net" "strconv" "strings" "text/scanner" "github.com/rootless-containers/rootlesskit/pkg/port" ) // ParsePortSpec parses a Docker-like representation of PortSpec, but with // support for both "parent IP" and "child IP" (optional); // e.g. "127.0.0.1:8080:80/tcp", or "127.0.0.1:8080:10.0.2.100:80/tcp" // // Format is as follows: // // :[:]:/ // // Note that (child IP being optional) the format can either contain 5 or 4 // components. When using IPv6 IP addresses, addresses must use square brackets // to prevent the colons being mistaken for delimiters. For example: // // [::1]:8080:[::2]:80/udp func ParsePortSpec(portSpec string) (*port.Spec, error) { const ( parentIP = iota parentPort = iota childIP = iota childPort = iota proto = iota ) var ( s scanner.Scanner err error parts = make([]string, 5) index = parentIP delimiter = ':' ) // First get the "proto" and "parent-port" at the end. These parts are // required, whereas "ParentIP" is optional. Removing them first makes // it easier to parse the remaining parts, as otherwise the third part // could be _either_ an IP-address _or_ a Port. // Get the proto protoPos := strings.LastIndex(portSpec, "/") if protoPos < 0 { return nil, fmt.Errorf("missing proto in PortSpec string: %q", portSpec) } parts[proto] = portSpec[protoPos+1:] err = validateProto(parts[proto]) if err != nil { return nil, fmt.Errorf("invalid PortSpec string: %q: %w", portSpec, err) } // Get the parent port portPos := strings.LastIndex(portSpec, ":") if portPos < 0 { return nil, fmt.Errorf("unexpected PortSpec string: %q", portSpec) } parts[childPort] = portSpec[portPos+1 : protoPos] // Scan the remainder ":[:]" s.Init(strings.NewReader(portSpec[:portPos])) for tok := s.Scan(); tok != scanner.EOF; tok = s.Scan() { if index > childPort { return nil, fmt.Errorf("unexpected PortSpec string: %q", portSpec) } switch tok { case '[': // Start of IPv6 IP-address; value ends at closing bracket (]) delimiter = ']' continue case delimiter: if delimiter == ']' { // End of IPv6 IP-address delimiter = ':' // Skip the next token, which should be a colon delimiter (:) tok = s.Scan() } index++ continue default: parts[index] += s.TokenText() } } if parts[parentIP] != "" && net.ParseIP(parts[parentIP]) == nil { return nil, fmt.Errorf("unexpected ParentIP in PortSpec string: %q", portSpec) } if parts[childIP] != "" && net.ParseIP(parts[childIP]) == nil { return nil, fmt.Errorf("unexpected ParentIP in PortSpec string: %q", portSpec) } ps := &port.Spec{ Proto: parts[proto], ParentIP: parts[parentIP], ChildIP: parts[childIP], } ps.ParentPort, err = strconv.Atoi(parts[parentPort]) if err != nil { return nil, fmt.Errorf("unexpected ChildPort in PortSpec string: %q: %w", portSpec, err) } ps.ChildPort, err = strconv.Atoi(parts[childPort]) if err != nil { return nil, fmt.Errorf("unexpected ParentPort in PortSpec string: %q: %w", portSpec, err) } return ps, nil } // ValidatePortSpec validates *port.Spec. // existingPorts can be optionally passed for detecting conflicts. func ValidatePortSpec(spec port.Spec, existingPorts map[int]*port.Status) error { if err := validateProto(spec.Proto); err != nil { return err } if spec.ParentIP != "" { if net.ParseIP(spec.ParentIP) == nil { return fmt.Errorf("invalid ParentIP: %q", spec.ParentIP) } } if spec.ChildIP != "" { if net.ParseIP(spec.ChildIP) == nil { return fmt.Errorf("invalid ChildIP: %q", spec.ChildIP) } } if spec.ParentPort <= 0 || spec.ParentPort > 65535 { return fmt.Errorf("invalid ParentPort: %q", spec.ParentPort) } if spec.ChildPort <= 0 || spec.ChildPort > 65535 { return fmt.Errorf("invalid ChildPort: %q", spec.ChildPort) } for id, p := range existingPorts { sp := p.Spec sameProto := sp.Proto == spec.Proto sameParent := sp.ParentIP == spec.ParentIP && sp.ParentPort == spec.ParentPort if sameProto && sameParent { return fmt.Errorf("conflict with ID %d", id) } } return nil } func validateProto(proto string) error { switch proto { case "tcp", "tcp4", "tcp6", "udp", "udp4", "udp6", "sctp", "sctp4", "sctp6": return nil default: return fmt.Errorf("unknown proto: %q", proto) } } rootlesskit-0.14.6/pkg/port/portutil/portutil_test.go000066400000000000000000000115441417776672600231170ustar00rootroot00000000000000package portutil import ( "reflect" "testing" "github.com/rootless-containers/rootlesskit/pkg/port" "github.com/stretchr/testify/assert" ) func TestParsePortSpec(t *testing.T) { type testCase struct { s string // nil for invalid string expected *port.Spec } testCases := []testCase{ { s: "127.0.0.1:8080:80/tcp", expected: &port.Spec{ Proto: "tcp", ParentIP: "127.0.0.1", ParentPort: 8080, ChildPort: 80, }, }, { s: "127.0.0.1:8080:80/tcp4", expected: &port.Spec{ Proto: "tcp4", ParentIP: "127.0.0.1", ParentPort: 8080, ChildPort: 80, }, }, { s: "127.0.0.1:8080:10.0.2.100:80/tcp", expected: &port.Spec{ Proto: "tcp", ParentIP: "127.0.0.1", ParentPort: 8080, ChildIP: "10.0.2.100", ChildPort: 80, }, }, { s: "bad", }, { s: "127.0.0.1:8080:80/tcp,127.0.0.1:4040:40/tcp", // one entry per one string }, { s: "8080", // future version may support short formats like this }, { s: "[::1]:8080:80/tcp", expected: &port.Spec{ Proto: "tcp", ParentIP: "::1", ParentPort: 8080, ChildPort: 80, }, }, { s: "[::1]:8080:[::2]:80/udp", expected: &port.Spec{ Proto: "udp", ParentIP: "::1", ParentPort: 8080, ChildIP: "::2", ChildPort: 80, }, }, } for _, tc := range testCases { tc := tc t.Run(tc.s, func(t *testing.T) { got, err := ParsePortSpec(tc.s) if tc.expected == nil { if err == nil { t.Fatalf("error is expected for %q", tc.s) } } else { if err != nil { t.Fatalf("got error for %q: %v", tc.s, err) } if !reflect.DeepEqual(got, tc.expected) { t.Fatalf("expected %+v, got %+v", tc.expected, got) } } }) } } func TestValidatePortSpec(t *testing.T) { existingPorts := make(map[int]*port.Status) // bind to all host IPs existingPorts[1] = &port.Status{ ID: 1, Spec: port.Spec{ Proto: "tcp", ParentIP: "", ParentPort: 80, ChildPort: 80, }, } // bind to only host IP 10.10.10.10 existingPorts[2] = &port.Status{ ID: 2, Spec: port.Spec{ Proto: "tcp", ParentIP: "10.10.10.10", ParentPort: 8080, ChildPort: 8080, }, } // avoid typing the spec over and over for small changes spec := port.Spec{ Proto: "tcp", ParentIP: "127.0.0.1", ParentPort: 1001, ChildPort: 1001, } // proto must be supplied and must equal "udp" or "tcp" invalidProtos := []string{"", "NaN", "TCP"} validProtos := []string{"udp", "tcp", "sctp"} for _, p := range invalidProtos { s := spec s.Proto = p err := ValidatePortSpec(s, existingPorts) assert.Error(t, err) } for _, p := range validProtos { s := spec s.Proto = p err := ValidatePortSpec(s, existingPorts) assert.NoError(t, err) } s := port.Spec{Proto: "tcp", ParentIP: "invalid", ParentPort: 80, ChildPort: 80} assert.Error(t, ValidatePortSpec(s, existingPorts)) s = port.Spec{Proto: "tcp", ParentPort: 80, ChildIP: "invalid", ChildPort: 80} assert.Error(t, ValidatePortSpec(s, existingPorts)) invalidPorts := []int{-200, 0, 1000000} validPorts := []int{20, 500, 1337, 65000} // 0 < parentPort <= 65535 for _, p := range invalidPorts { s := spec s.ParentPort = p err := ValidatePortSpec(s, existingPorts) assert.Error(t, err) } for _, p := range validPorts { s := spec s.ParentPort = p err := ValidatePortSpec(s, existingPorts) assert.NoError(t, err) } // 0 < childPort <= 65535 for _, p := range invalidPorts { s := spec s.ChildPort = p err := ValidatePortSpec(s, existingPorts) assert.Error(t, err, "invalid ChildPort") } for _, p := range validPorts { s := spec s.ChildPort = p err := ValidatePortSpec(s, existingPorts) assert.NoError(t, err) } // ChildPorts can overlap so long as parent port/IPs don't // existing ports include tcp 10.10.10.10:8080, tcp *:80, no udp // udp doesn't conflict with tcp s = port.Spec{Proto: "udp", ParentPort: 80, ChildPort: 80} assert.NoError(t, ValidatePortSpec(s, existingPorts)) // same parent, same child, different IP has no conflict s = port.Spec{Proto: "tcp", ParentIP: "10.10.10.11", ParentPort: 8080, ChildPort: 8080} assert.NoError(t, ValidatePortSpec(s, existingPorts)) // same IP different parentPort, same child port has no conflict s = port.Spec{Proto: "tcp", ParentIP: "10.10.10.10", ParentPort: 8081, ChildPort: 8080} assert.NoError(t, ValidatePortSpec(s, existingPorts)) // Same parent IP and Port should conflict, even if child port different // conflict with ID 1: s = port.Spec{Proto: "tcp", ParentPort: 80, ChildPort: 90} err := ValidatePortSpec(s, existingPorts) assert.EqualError(t, err, "conflict with ID 1") // conflict with ID 2 s = port.Spec{Proto: "tcp", ParentIP: "10.10.10.10", ParentPort: 8080, ChildPort: 8080} err = ValidatePortSpec(s, existingPorts) assert.EqualError(t, err, "conflict with ID 2") } rootlesskit-0.14.6/pkg/port/slirp4netns/000077500000000000000000000000001417776672600202455ustar00rootroot00000000000000rootlesskit-0.14.6/pkg/port/slirp4netns/slirp4netns.go000066400000000000000000000110471417776672600230640ustar00rootroot00000000000000package slirp4netns import ( "context" "encoding/json" "errors" "fmt" "io" "net" "strings" "sync" "github.com/rootless-containers/rootlesskit/pkg/api" "github.com/rootless-containers/rootlesskit/pkg/port" "github.com/rootless-containers/rootlesskit/pkg/port/portutil" ) func NewParentDriver(logWriter io.Writer, apiSocketPath string) (port.ParentDriver, error) { if apiSocketPath == "" { return nil, errors.New("api socket path is not set") } d := driver{ logWriter: logWriter, ports: make(map[int]*port.Status, 0), apiSocketPath: apiSocketPath, } return &d, nil } type driver struct { logWriter io.Writer apiSocketPath string mu sync.Mutex childIP string // can be empty ports map[int]*port.Status } func (d *driver) Info(ctx context.Context) (*api.PortDriverInfo, error) { info := &api.PortDriverInfo{ Driver: "slirp4netns", // No IPv6 support yet Protos: []string{"tcp", "tcp4", "udp", "udp4"}, DisallowLoopbackChildIP: true, } return info, nil } func (d *driver) OpaqueForChild() map[string]string { // NOP, as this driver does not have child-side logic. return nil } func (d *driver) RunParentDriver(initComplete chan struct{}, quit <-chan struct{}, cctx *port.ChildContext) error { if cctx != nil && cctx.IP != nil && cctx.IP.To4() != nil { d.childIP = cctx.IP.To4().String() } initComplete <- struct{}{} <-quit return nil } func (d *driver) AddPort(ctx context.Context, spec port.Spec) (*port.Status, error) { d.mu.Lock() defer d.mu.Unlock() err := portutil.ValidatePortSpec(spec, d.ports) if err != nil { return nil, err } if strings.HasSuffix(spec.Proto, "6") { return nil, fmt.Errorf("unsupported protocol %q", spec.Proto) } proto := strings.TrimSuffix(spec.Proto, "4") ip := spec.ChildIP if ip == "" { ip = d.childIP } else { p := net.ParseIP(ip) if p == nil { return nil, fmt.Errorf("invalid IP: %q", ip) } p = p.To4() if p == nil { return nil, fmt.Errorf("unsupported IP (v6?): %s", ip) } ip = p.String() } req := request{ Execute: "add_hostfwd", Arguments: addHostFwdArguments{ Proto: proto, HostAddr: spec.ParentIP, HostPort: spec.ParentPort, GuestAddr: ip, GuestPort: spec.ChildPort, }, } rep, err := callAPI(d.apiSocketPath, req) if err != nil { return nil, err } if len(rep.Error) != 0 { return nil, fmt.Errorf("reply.Error: %+v", rep.Error) } idIntf, ok := rep.Return["id"] if !ok { return nil, fmt.Errorf("unexpected reply: %+v", rep) } idFloat, ok := idIntf.(float64) if !ok { return nil, fmt.Errorf("unexpected id: %+v", idIntf) } id := int(idFloat) st := port.Status{ ID: id, Spec: spec, } d.ports[id] = &st return &st, nil } func (d *driver) ListPorts(ctx context.Context) ([]port.Status, error) { var ports []port.Status d.mu.Lock() for _, p := range d.ports { ports = append(ports, *p) } d.mu.Unlock() return ports, nil } func (d *driver) RemovePort(ctx context.Context, id int) error { d.mu.Lock() defer d.mu.Unlock() req := request{ Execute: "remove_hostfwd", Arguments: removeHostFwdArguments{ ID: id, }, } rep, err := callAPI(d.apiSocketPath, req) if err != nil { return err } if len(rep.Error) != 0 { return fmt.Errorf("reply.Error: %v", rep.Error) } delete(d.ports, id) return nil } type addHostFwdArguments struct { Proto string `json:"proto"` HostAddr string `json:"host_addr"` HostPort int `json:"host_port"` GuestAddr string `json:"guest_addr"` GuestPort int `json:"guest_port"` } type removeHostFwdArguments struct { ID int `json:"id"` } type request struct { Execute string `json:"execute"` Arguments interface{} `json:"arguments"` } type reply struct { Return map[string]interface{} `json:"return,omitempty"` Error map[string]interface{} `json:"error,omitempty"` } func callAPI(apiSocketPath string, req request) (*reply, error) { addr := &net.UnixAddr{Net: "unix", Name: apiSocketPath} conn, err := net.DialUnix("unix", nil, addr) if err != nil { return nil, err } defer conn.Close() if err := json.NewEncoder(conn).Encode(req); err != nil { return nil, err } if err := conn.CloseWrite(); err != nil { return nil, err } b, err := io.ReadAll(conn) if err != nil { return nil, err } var rep reply if err := json.Unmarshal(b, &rep); err != nil { return nil, err } return &rep, nil } func NewChildDriver() port.ChildDriver { return &childDriver{} } type childDriver struct { } func (d *childDriver) RunChildDriver(opaque map[string]string, quit <-chan struct{}) error { // NOP <-quit return nil } rootlesskit-0.14.6/pkg/port/testsuite/000077500000000000000000000000001417776672600200115ustar00rootroot00000000000000rootlesskit-0.14.6/pkg/port/testsuite/testsuite.go000066400000000000000000000165141417776672600224000ustar00rootroot00000000000000package testsuite import ( "bytes" "context" "encoding/json" "fmt" "io" "net" "os" "os/exec" "strconv" "strings" "sync" "syscall" "testing" "time" "github.com/rootless-containers/rootlesskit/pkg/port" ) const ( reexecKeyMode = "rootlesskit-port-testsuite.mode" reexecKeyOpaque = "rootlesskit-port-testsuite.opaque" reexecKeyQuitFD = "rootlesskit-port-testsuite.quitfd" ) func Main(m *testing.M, cf func() port.ChildDriver) { switch mode := os.Getenv(reexecKeyMode); mode { case "": os.Exit(m.Run()) case "child": default: panic(fmt.Errorf("unknown mode: %q", mode)) } var opaque map[string]string if err := json.Unmarshal([]byte(os.Getenv(reexecKeyOpaque)), &opaque); err != nil { panic(err) } quit := make(chan struct{}) errCh := make(chan error) go func() { d := cf() dErr := d.RunChildDriver(opaque, quit) errCh <- dErr }() quitFD, err := strconv.Atoi(os.Getenv(reexecKeyQuitFD)) if err != nil { panic(err) } quitR := os.NewFile(uintptr(quitFD), "") defer quitR.Close() if _, err = io.ReadAll(quitR); err != nil { panic(err) } quit <- struct{}{} err = <-errCh if err != nil { panic(err) } // when race detector is enabled, it takes about 1s after leaving from Main() } func Run(t *testing.T, pf func() port.ParentDriver) { RunTCP(t, pf) RunTCP4(t, pf) RunUDP(t, pf) RunUDP4(t, pf) } func RunTCP(t *testing.T, pf func() port.ParentDriver) { t.Run("TestTCP", func(t *testing.T) { TestProto(t, "tcp", pf()) }) } func RunTCP4(t *testing.T, pf func() port.ParentDriver) { t.Run("TestTCP4", func(t *testing.T) { TestProto(t, "tcp4", pf()) }) } func RunUDP(t *testing.T, pf func() port.ParentDriver) { t.Run("TestUDP", func(t *testing.T) { TestProto(t, "udp", pf()) }) } func RunUDP4(t *testing.T, pf func() port.ParentDriver) { t.Run("TestUDP4", func(t *testing.T) { TestProto(t, "udp4", pf()) }) } func TestProto(t *testing.T, proto string, d port.ParentDriver) { ensureDeps(t, "nsenter") t.Logf("creating USER+NET namespace") opaque := d.OpaqueForChild() opaqueJSON, err := json.Marshal(opaque) if err != nil { t.Fatal(err) } pr, pw, err := os.Pipe() if err != nil { t.Fatal(err) } cmd := exec.Command("/proc/self/exe") cmd.Stdout = os.Stderr cmd.Stderr = os.Stderr cmd.Env = append([]string{ reexecKeyMode + "=child", reexecKeyOpaque + "=" + string(opaqueJSON), reexecKeyQuitFD + "=3"}, os.Environ()...) cmd.SysProcAttr = &syscall.SysProcAttr{ Pdeathsig: syscall.SIGKILL, Cloneflags: syscall.CLONE_NEWUSER | syscall.CLONE_NEWNET, UidMappings: []syscall.SysProcIDMap{ { ContainerID: 0, HostID: os.Geteuid(), Size: 1, }, }, GidMappings: []syscall.SysProcIDMap{ { ContainerID: 0, HostID: os.Getegid(), Size: 1, }, }, } cmd.ExtraFiles = []*os.File{pr} if err := cmd.Start(); err != nil { t.Fatal(err) } defer func() { pw.Close() cmd.Wait() }() childPID := cmd.Process.Pid if out, err := nsenterExec(childPID, "ip", "link", "set", "lo", "up"); err != nil { t.Fatalf("%v, out=%s", err, string(out)) } testProtoWithPID(t, proto, d, childPID) } func testProtoWithPID(t *testing.T, proto string, d port.ParentDriver, childPID int) { ensureDeps(t, "nsenter", "ip", "nc") // [child]parent pairs := map[int]int{ // FIXME: flaky 80: (childPID + 80) % 60000, 8080: (childPID + 8080) % 60000, } if proto == "tcp" { for _, parentPort := range pairs { var d net.Dialer d.Timeout = 50 * time.Millisecond _, err := d.Dial(proto, fmt.Sprintf("127.0.0.1:%d", parentPort)) if err == nil { t.Fatalf("port %d is already used?", parentPort) } } } t.Logf("namespace pid: %d", childPID) initComplete := make(chan struct{}) quit := make(chan struct{}) driverErr := make(chan error) go func() { cctx := &port.ChildContext{ PID: childPID, IP: nil, // we don't have tap device in this test suite } driverErr <- d.RunParentDriver(initComplete, quit, cctx) }() select { case <-initComplete: case err := <-driverErr: t.Fatal(err) } var wg sync.WaitGroup for c, p := range pairs { childP, parentP := c, p wg.Add(1) go func() { testProtoRoutine(t, proto, d, childPID, childP, parentP) wg.Done() }() } wg.Wait() quit <- struct{}{} err := <-driverErr if err != nil { t.Fatal(err) } } func nsenterExec(pid int, cmdss ...string) ([]byte, error) { cmd := exec.Command("nsenter", append([]string{"-U", "--preserve-credential", "-n", "-t", strconv.Itoa(pid)}, cmdss...)...) cmd.SysProcAttr = &syscall.SysProcAttr{ Pdeathsig: syscall.SIGKILL, } return cmd.CombinedOutput() } // FIXME: support IPv6 func testProtoRoutine(t *testing.T, proto string, d port.ParentDriver, childPID, childP, parentP int) { stdoutR, stdoutW := io.Pipe() var ncFlags []string switch proto { case "tcp", "tcp4": // NOP case "udp", "udp4": ncFlags = append(ncFlags, "-u") default: panic("invalid proto") } cmd := exec.Command("nsenter", append( []string{"-U", "--preserve-credential", "-n", "-t", strconv.Itoa(childPID), "nc"}, append(ncFlags, []string{"-l", strconv.Itoa(childP)}...)...)...) cmd.SysProcAttr = &syscall.SysProcAttr{ Pdeathsig: syscall.SIGKILL, } cmd.Stdout = stdoutW cmd.Stderr = os.Stderr if err := cmd.Start(); err != nil { // NOTE: t.Fatal is not thread-safe while t.Error is (see godoc testing) panic(err) } defer cmd.Process.Kill() portStatus, err := d.AddPort(context.TODO(), port.Spec{ Proto: proto, ParentIP: "127.0.0.1", ParentPort: parentP, ChildPort: childP, }) if err != nil { panic(err) } t.Logf("opened port: %+v", portStatus) if proto == "udp" || proto == "udp4" { // Dial does not return an error for UDP even if the port is not exposed yet time.Sleep(1 * time.Second) } var conn net.Conn for i := 0; i < 5; i++ { var dialer net.Dialer conn, err = dialer.Dial(proto, fmt.Sprintf("127.0.0.1:%d", parentP)) if i == 4 && err != nil { panic(err) } if conn != nil && err == nil { break } time.Sleep(time.Duration(i*5) * time.Millisecond) } wBytes := []byte(fmt.Sprintf("test-%s-%d-%d-%d", proto, childPID, childP, parentP)) if _, err := conn.Write(wBytes); err != nil { panic(err) } switch proto { case "tcp", "tcp4": if err := conn.(*net.TCPConn).CloseWrite(); err != nil { panic(err) } case "udp", "udp4": if err := conn.(*net.UDPConn).Close(); err != nil { panic(err) } } rBytes := make([]byte, len(wBytes)) if _, err := stdoutR.Read(rBytes); err != nil { panic(err) } if bytes.Compare(wBytes, rBytes) != 0 { panic(fmt.Errorf("expected %q, got %q", string(wBytes), string(rBytes))) } if proto == "tcp" || proto == "tcp4" { if err := conn.Close(); err != nil { panic(err) } if err := cmd.Wait(); err != nil { panic(err) } } else { // nc -u does not exit automatically syscall.Kill(cmd.Process.Pid, syscall.SIGKILL) } if err := d.RemovePort(context.TODO(), portStatus.ID); err != nil { panic(err) } t.Logf("closed port ID %d", portStatus.ID) } func ensureDeps(t testing.TB, deps ...string) { for _, dep := range deps { if _, err := exec.LookPath(dep); err != nil { t.Skipf("%q not found: %v", dep, err) } } } func TLogWriter(t testing.TB, s string) io.Writer { return &tLogWriter{t: t, s: s} } type tLogWriter struct { t testing.TB s string } func (w *tLogWriter) Write(p []byte) (int, error) { w.t.Logf("%s: %s", w.s, strings.TrimSuffix(string(p), "\n")) return len(p), nil } rootlesskit-0.14.6/pkg/sigproxy/000077500000000000000000000000001417776672600166605ustar00rootroot00000000000000rootlesskit-0.14.6/pkg/sigproxy/signal/000077500000000000000000000000001417776672600201355ustar00rootroot00000000000000rootlesskit-0.14.6/pkg/sigproxy/signal/signal.go000066400000000000000000000012201417776672600217340ustar00rootroot00000000000000// Package signal provides helper functions for dealing with signals across // various operating systems. // // Forked from https://github.com/moby/moby/tree/37defbfd9b968f38e8e15dfa5f06d9f878bd65ba/pkg/signal package signal import ( "os" "os/signal" ) // CatchAll catches all signals and relays them to the specified channel. func CatchAll(sigc chan os.Signal) { var handledSigs []os.Signal for _, s := range SignalMap { handledSigs = append(handledSigs, s) } signal.Notify(sigc, handledSigs...) } // StopCatch stops catching the signals and closes the specified channel. func StopCatch(sigc chan os.Signal) { signal.Stop(sigc) close(sigc) } rootlesskit-0.14.6/pkg/sigproxy/signal/signal_linux.go000066400000000000000000000037651417776672600231730ustar00rootroot00000000000000//go:build !mips && !mipsle && !mips64 && !mips64le // +build !mips,!mipsle,!mips64,!mips64le package signal import ( "syscall" "golang.org/x/sys/unix" ) const ( sigrtmin = 34 sigrtmax = 64 ) // SignalMap is a map of Linux signals. var SignalMap = map[string]syscall.Signal{ "ABRT": unix.SIGABRT, "ALRM": unix.SIGALRM, "BUS": unix.SIGBUS, "CHLD": unix.SIGCHLD, "CLD": unix.SIGCLD, "CONT": unix.SIGCONT, "FPE": unix.SIGFPE, "HUP": unix.SIGHUP, "ILL": unix.SIGILL, "INT": unix.SIGINT, "IO": unix.SIGIO, "IOT": unix.SIGIOT, "KILL": unix.SIGKILL, "PIPE": unix.SIGPIPE, "POLL": unix.SIGPOLL, "PROF": unix.SIGPROF, "PWR": unix.SIGPWR, "QUIT": unix.SIGQUIT, "SEGV": unix.SIGSEGV, "STKFLT": unix.SIGSTKFLT, "STOP": unix.SIGSTOP, "SYS": unix.SIGSYS, "TERM": unix.SIGTERM, "TRAP": unix.SIGTRAP, "TSTP": unix.SIGTSTP, "TTIN": unix.SIGTTIN, "TTOU": unix.SIGTTOU, "URG": unix.SIGURG, "USR1": unix.SIGUSR1, "USR2": unix.SIGUSR2, "VTALRM": unix.SIGVTALRM, "WINCH": unix.SIGWINCH, "XCPU": unix.SIGXCPU, "XFSZ": unix.SIGXFSZ, "RTMIN": sigrtmin, "RTMIN+1": sigrtmin + 1, "RTMIN+2": sigrtmin + 2, "RTMIN+3": sigrtmin + 3, "RTMIN+4": sigrtmin + 4, "RTMIN+5": sigrtmin + 5, "RTMIN+6": sigrtmin + 6, "RTMIN+7": sigrtmin + 7, "RTMIN+8": sigrtmin + 8, "RTMIN+9": sigrtmin + 9, "RTMIN+10": sigrtmin + 10, "RTMIN+11": sigrtmin + 11, "RTMIN+12": sigrtmin + 12, "RTMIN+13": sigrtmin + 13, "RTMIN+14": sigrtmin + 14, "RTMIN+15": sigrtmin + 15, "RTMAX-14": sigrtmax - 14, "RTMAX-13": sigrtmax - 13, "RTMAX-12": sigrtmax - 12, "RTMAX-11": sigrtmax - 11, "RTMAX-10": sigrtmax - 10, "RTMAX-9": sigrtmax - 9, "RTMAX-8": sigrtmax - 8, "RTMAX-7": sigrtmax - 7, "RTMAX-6": sigrtmax - 6, "RTMAX-5": sigrtmax - 5, "RTMAX-4": sigrtmax - 4, "RTMAX-3": sigrtmax - 3, "RTMAX-2": sigrtmax - 2, "RTMAX-1": sigrtmax - 1, "RTMAX": sigrtmax, } rootlesskit-0.14.6/pkg/sigproxy/signal/signal_linux_mipsx.go000066400000000000000000000040061417776672600244000ustar00rootroot00000000000000//go:build linux && (mips || mipsle || mips64 || mips64le) // +build linux // +build mips mipsle mips64 mips64le package signal import ( "syscall" "golang.org/x/sys/unix" ) const ( sigrtmin = 34 sigrtmax = 127 ) // SignalMap is a map of Linux signals. var SignalMap = map[string]syscall.Signal{ "ABRT": unix.SIGABRT, "ALRM": unix.SIGALRM, "BUS": unix.SIGBUS, "CHLD": unix.SIGCHLD, "CLD": unix.SIGCLD, "CONT": unix.SIGCONT, "FPE": unix.SIGFPE, "HUP": unix.SIGHUP, "ILL": unix.SIGILL, "INT": unix.SIGINT, "IO": unix.SIGIO, "IOT": unix.SIGIOT, "KILL": unix.SIGKILL, "PIPE": unix.SIGPIPE, "POLL": unix.SIGPOLL, "PROF": unix.SIGPROF, "PWR": unix.SIGPWR, "QUIT": unix.SIGQUIT, "SEGV": unix.SIGSEGV, "EMT": unix.SIGEMT, "STOP": unix.SIGSTOP, "SYS": unix.SIGSYS, "TERM": unix.SIGTERM, "TRAP": unix.SIGTRAP, "TSTP": unix.SIGTSTP, "TTIN": unix.SIGTTIN, "TTOU": unix.SIGTTOU, "URG": unix.SIGURG, "USR1": unix.SIGUSR1, "USR2": unix.SIGUSR2, "VTALRM": unix.SIGVTALRM, "WINCH": unix.SIGWINCH, "XCPU": unix.SIGXCPU, "XFSZ": unix.SIGXFSZ, "RTMIN": sigrtmin, "RTMIN+1": sigrtmin + 1, "RTMIN+2": sigrtmin + 2, "RTMIN+3": sigrtmin + 3, "RTMIN+4": sigrtmin + 4, "RTMIN+5": sigrtmin + 5, "RTMIN+6": sigrtmin + 6, "RTMIN+7": sigrtmin + 7, "RTMIN+8": sigrtmin + 8, "RTMIN+9": sigrtmin + 9, "RTMIN+10": sigrtmin + 10, "RTMIN+11": sigrtmin + 11, "RTMIN+12": sigrtmin + 12, "RTMIN+13": sigrtmin + 13, "RTMIN+14": sigrtmin + 14, "RTMIN+15": sigrtmin + 15, "RTMAX-14": sigrtmax - 14, "RTMAX-13": sigrtmax - 13, "RTMAX-12": sigrtmax - 12, "RTMAX-11": sigrtmax - 11, "RTMAX-10": sigrtmax - 10, "RTMAX-9": sigrtmax - 9, "RTMAX-8": sigrtmax - 8, "RTMAX-7": sigrtmax - 7, "RTMAX-6": sigrtmax - 6, "RTMAX-5": sigrtmax - 5, "RTMAX-4": sigrtmax - 4, "RTMAX-3": sigrtmax - 3, "RTMAX-2": sigrtmax - 2, "RTMAX-1": sigrtmax - 1, "RTMAX": sigrtmax, } rootlesskit-0.14.6/pkg/sigproxy/sigproxy.go000066400000000000000000000014731417776672600211000ustar00rootroot00000000000000package sigproxy import ( "context" "os" "github.com/sirupsen/logrus" "golang.org/x/sys/unix" "github.com/rootless-containers/rootlesskit/pkg/sigproxy/signal" ) // ForwardAllSignals forwards signals. // Based on https://github.com/docker/cli/blob/ef2f64abbd37edfa148f745fa0013731b5074d1b/cli/command/container/tty.go#L99-L126 func ForwardAllSignals(ctx context.Context, pid int) chan os.Signal { sigc := make(chan os.Signal, 128) signal.CatchAll(sigc) go func() { for s := range sigc { if s == unix.SIGCHLD || s == unix.SIGPIPE || s == unix.SIGURG { continue } us, ok := s.(unix.Signal) if !ok { logrus.Warnf("Unsupported signal %v", s) continue } if err := unix.Kill(pid, us); err != nil { logrus.WithError(err).Debugf("Error sending signal %v", s) } } }() return sigc } rootlesskit-0.14.6/pkg/version/000077500000000000000000000000001417776672600164615ustar00rootroot00000000000000rootlesskit-0.14.6/pkg/version/version.go000066400000000000000000000000521417776672600204720ustar00rootroot00000000000000package version const Version = "0.14.6"