pax_global_header00006660000000000000000000000064145067547420014530gustar00rootroot0000000000000052 comment=7cf5a12dba5aaeea9efc5ad97176b07b5614e350 rr-5.7.0/000077500000000000000000000000001450675474200121645ustar00rootroot00000000000000rr-5.7.0/.android/000077500000000000000000000000001450675474200136625ustar00rootroot00000000000000rr-5.7.0/.android/Dockerfile000066400000000000000000000013411450675474200156530ustar00rootroot00000000000000FROM ubuntu:latest ARG ndk_version=r25c RUN apt-get update && apt-get install -y \ build-essential \ curl \ cmake \ ninja-build \ python3 \ unzip RUN curl -o ndk.zip https://dl.google.com/android/repository/android-ndk-${ndk_version}-linux.zip RUN unzip ndk.zip && mv android-ndk-${ndk_version} /ndk RUN curl -o capnproto.tar.gz https://capnproto.org/capnproto-c++-0.10.2.tar.gz RUN mkdir -p /src/capnproto RUN tar zxf capnproto.tar.gz -C /src/capnproto --strip-components=1 RUN mkdir -p /build/capnproto RUN cd /build/capnproto RUN /src/capnproto/configure RUN make -j$(nproc) install RUN cd - RUN mkdir -p /build/rr RUN chmod 777 /build/rr WORKDIR /build/rr CMD ["/bin/bash", "/src/rr/.android/build.sh"] rr-5.7.0/.android/README.md000066400000000000000000000012271450675474200151430ustar00rootroot00000000000000# Building for Android To build for Android (from the root of the rr source tree): ``` docker build .android -t rr-android mkdir -p obj/dist docker run -it --rm \ -u $UID:$GID \ -v $(pwd):/src/rr \ -v $(pwd)/obj/dist:/dist \ rr-android ``` `-u $UID:GID` ensures that the build runs with your current UID/GID, which is necessary to avoid the output being only writable by root. `-v $(pwd):/src/rr` mounts the source tree in the container so it can be built. `-v $(pwd)/obj/dist:/dist` sets the output directory for the container to the current directory. The last step of the build will copy the rr tarball to the directory on the left of `:`.rr-5.7.0/.android/build.sh000077500000000000000000000014311450675474200153170ustar00rootroot00000000000000#!/usr/bin/env bash set -e set -x DEVICE_CMAKE_DEFS="-DCMAKE_TOOLCHAIN_FILE=/ndk/build/cmake/android.toolchain.cmake -DANDROID_ABI=x86_64 -DANDROID_PLATFORM=android-28" # Build capnp again for the device INSTALL_PREFIX=$(pwd)/install mkdir -p $INSTALL_PREFIX mkdir capnproto-android cd capnproto-android cmake -G Ninja \ $DEVICE_CMAKE_DEFS \ -DEXTERNAL_CAPNP=True \ -DBUILD_SHARED_LIBS=True \ -DCMAKE_INSTALL_PREFIX=$INSTALL_PREFIX \ /src/capnproto cmake --build . cmake --install . cd - mkdir obj cd obj cmake -G Ninja \ $DEVICE_CMAKE_DEFS \ -Ddisable32bit=True \ -DBUILD_TESTS=False \ -DCMAKE_FIND_ROOT_PATH=$INSTALL_PREFIX \ -DSKIP_PKGCONFIG=True \ -DEXTRA_VERSION_STRING="$BUILD_ID" \ -DZLIB_LDFLAGS=-lz \ /src/rr cmake --build . cpack -G TGZ cp dist/* /dist/rr-5.7.0/.android/cloudbuild.yaml000066400000000000000000000006731450675474200167020ustar00rootroot00000000000000steps: - name: "gcr.io/rr-android-testing/android:31-ndk-r23b" script: ".android/build.sh" timeout: 3600s timeout: 3600s artifacts: objects: # Configure _ARTIFACT_BUCKET in you cloud build trigger # https://cloud.google.com/build/docs/automating-builds/github/build-repos-from-github#creating_a_github_trigger. location: 'gs://$_ARTIFACT_BUCKET/$PROJECT_ID/$BUILD_ID' paths: ["obj/dist/rr-*-Android-x86_64.tar.gz"] rr-5.7.0/.buildkite/000077500000000000000000000000001450675474200142165ustar00rootroot00000000000000rr-5.7.0/.buildkite/.gitignore000066400000000000000000000000241450675474200162020ustar00rootroot00000000000000test.yml test-*.yml rr-5.7.0/.buildkite/CTestCostData.txt000066400000000000000000003031341450675474200174300ustar00rootroot00000000000000check_environment 2 0.0177417 64bit_child 2 0.388554 64bit_child-no-syscallbuf 2 0.343397 _llseek 2 0.178732 _llseek-no-syscallbuf 2 0.176157 abort 2 0.405355 abort-no-syscallbuf 2 0.401123 accept 2 0.296525 accept-no-syscallbuf 2 0.238087 acct 2 0.172497 acct-no-syscallbuf 2 0.157378 adjtimex 2 0.164065 adjtimex-no-syscallbuf 2 0.157286 aio 2 0.174191 aio-no-syscallbuf 2 0.172184 alarm 2 2.31239 alarm-no-syscallbuf 2 2.62346 alarm2 2 1.14969 alarm2-no-syscallbuf 2 1.17563 alsa_ioctl 2 0.183019 alsa_ioctl-no-syscallbuf 2 0.175809 arch_prctl 2 0.165938 arch_prctl-no-syscallbuf 2 0.164753 async_segv_ignored 2 0.176697 async_segv_ignored-no-syscallbuf 2 0.158012 at_threadexit 2 0.222064 at_threadexit-no-syscallbuf 2 0.206986 bad_ip 2 0.165407 bad_ip-no-syscallbuf 2 0.187529 bad_syscall 2 0.174693 bad_syscall-no-syscallbuf 2 0.143219 barrier 2 0.33947 barrier-no-syscallbuf 2 0.335352 big_buffers 2 0.180078 big_buffers-no-syscallbuf 2 0.185628 block 2 9.35626 block-no-syscallbuf 2 9.41618 block_open 2 0.213755 block_open-no-syscallbuf 2 0.210349 bpf 2 0.19726 bpf-no-syscallbuf 2 0.178959 bpf_map 2 0.182377 bpf_map-no-syscallbuf 2 0.16628 brk 2 0.202035 brk-no-syscallbuf 2 0.268974 brk2 2 0.192675 brk2-no-syscallbuf 2 0.1618 capget 2 0.186253 capget-no-syscallbuf 2 0.178183 chew_cpu 2 9.10871 chew_cpu-no-syscallbuf 2 9.7596 x86/chew_cpu_cpuid 2 0.909633 x86/chew_cpu_cpuid-no-syscallbuf 2 0.241009 chmod 2 0.175173 chmod-no-syscallbuf 2 0.193 chown 2 0.260438 chown-no-syscallbuf 2 0.247314 clock 2 0.321689 clock-no-syscallbuf 2 0.405406 clock_adjtime 2 0.179823 clock_adjtime-no-syscallbuf 2 0.182865 clock_nanosleep 2 1.17138 clock_nanosleep-no-syscallbuf 2 1.15077 clock_time64 2 0.18665 clock_time64-no-syscallbuf 2 0.16131 clone 2 0.184247 clone-no-syscallbuf 2 0.194884 clone_bad_stack 2 0.322686 clone_bad_stack-no-syscallbuf 2 0.305627 clone_bad_tls 2 0.200441 clone_bad_tls-no-syscallbuf 2 0.159102 clone_cleartid_coredump 2 0.382393 clone_cleartid_coredump-no-syscallbuf 2 0.327541 clone_fail 2 0.182033 clone_fail-no-syscallbuf 2 0.160097 clone_file_range 2 0.225524 clone_file_range-no-syscallbuf 2 0.19126 clone_immediate_exit 2 0.187786 clone_immediate_exit-no-syscallbuf 2 0.184644 clone_newflags 2 0.187025 clone_newflags-no-syscallbuf 2 0.945696 clone_parent 2 0.182157 clone_parent-no-syscallbuf 2 0.204792 clone_untraced 2 0.196201 clone_untraced-no-syscallbuf 2 0.207853 clone_vfork_pidfd 2 0.168749 clone_vfork_pidfd-no-syscallbuf 2 0.169371 cloned_sigmask 2 0.304638 cloned_sigmask-no-syscallbuf 2 0.315861 constructor 2 2.22217 constructor-no-syscallbuf 2 2.21146 copy_file_range 2 0.187135 copy_file_range-no-syscallbuf 2 0.181734 x86/cpuid_same_state 2 0.159951 x86/cpuid_same_state-no-syscallbuf 2 0.165536 creat_address_not_truncated 2 0.182924 creat_address_not_truncated-no-syscallbuf 2 0.17218 x86/cross_arch 2 0.175549 x86/cross_arch-no-syscallbuf 2 0.173372 cwd_inaccessible 2 0.275782 cwd_inaccessible-no-syscallbuf 2 0.234584 daemon 2 0.205866 daemon-no-syscallbuf 2 0.174672 desched_blocking_poll 2 0.184894 desched_blocking_poll-no-syscallbuf 2 0.172352 desched_sigkill 2 0.30291 desched_sigkill-no-syscallbuf 2 0.240905 detach_state 2 0.219355 detach_state-no-syscallbuf 2 0.220512 detach_threads 2 0.314015 detach_threads-no-syscallbuf 2 0.280626 detach_sigkill 2 0.196443 detach_sigkill-no-syscallbuf 2 0.218506 detach_sigkill_exit 2 0.207984 detach_sigkill_exit-no-syscallbuf 2 0.197267 deterministic_sigsys 2 0.346079 deterministic_sigsys-no-syscallbuf 2 0.307801 dev_zero 2 0.154614 dev_zero-no-syscallbuf 2 0.23869 direct 2 0.243545 direct-no-syscallbuf 2 0.172084 dup 2 0.232475 dup-no-syscallbuf 2 0.189027 doublesegv 2 0.501823 doublesegv-no-syscallbuf 2 0.408514 epoll_create 2 0.194815 epoll_create-no-syscallbuf 2 0.153384 epoll_create1 2 0.171597 epoll_create1-no-syscallbuf 2 0.17207 epoll_edge 2 0.178929 epoll_edge-no-syscallbuf 2 0.158552 epoll_pwait_eintr_sigmask 2 2.19519 epoll_pwait_eintr_sigmask-no-syscallbuf 2 2.21515 eventfd 2 0.165454 eventfd-no-syscallbuf 2 0.166546 exec_flags 2 0.161951 exec_flags-no-syscallbuf 2 0.183996 exec_no_env 2 0.220634 exec_no_env-no-syscallbuf 2 0.212066 exec_self 2 0.235062 exec_self-no-syscallbuf 2 0.228894 exec_from_main_thread 2 0.274183 exec_from_main_thread-no-syscallbuf 2 0.293866 exec_from_other_thread 2 16.1407 exec_from_other_thread-no-syscallbuf 2 7.33557 exec_stopsig 2 0.251712 exec_stopsig-no-syscallbuf 2 0.344252 execveat 2 0.231512 execveat-no-syscallbuf 2 0.227455 exit_with_syscallbuf_signal 2 0.217025 exit_with_syscallbuf_signal-no-syscallbuf 2 0.167443 fadvise 2 0.170039 fadvise-no-syscallbuf 2 0.160222 fanotify 2 0.174126 fanotify-no-syscallbuf 2 0.166894 fatal_init_signal 2 0.220292 fatal_init_signal-no-syscallbuf 2 0.191934 fatal_sigsegv_thread 2 0.645623 fatal_sigsegv_thread-no-syscallbuf 2 0.624305 x86/fault_in_code_page 2 0.18644 x86/fault_in_code_page-no-syscallbuf 2 0.183687 fcntl_dupfd 2 0.178214 fcntl_dupfd-no-syscallbuf 2 0.18758 fcntl_misc 2 0.178466 fcntl_misc-no-syscallbuf 2 0.164349 fcntl_notify 2 0.178185 fcntl_notify-no-syscallbuf 2 0.17075 fcntl_owner_ex 2 0.172739 fcntl_owner_ex-no-syscallbuf 2 0.189245 fcntl_rw_hints 2 0.177118 fcntl_rw_hints-no-syscallbuf 2 0.169723 fcntl_seals 2 0.169003 fcntl_seals-no-syscallbuf 2 0.16143 fcntl_sig 2 0.192377 fcntl_sig-no-syscallbuf 2 0.172423 fd_cleanup 2 0.181429 fd_cleanup-no-syscallbuf 2 0.190582 fd_tracking_across_threads 2 0.233957 fd_tracking_across_threads-no-syscallbuf 2 0.202603 fds_clean 2 0.281325 fds_clean-no-syscallbuf 2 0.250604 flock 2 0.684811 flock-no-syscallbuf 2 1.10324 flock_ofd 2 0.251028 flock_ofd-no-syscallbuf 2 0.608903 flock2 2 0.178437 flock2-no-syscallbuf 2 0.166685 fork_brk 2 0.22916 fork_brk-no-syscallbuf 2 0.161856 fork_child_crash 2 0.297335 fork_child_crash-no-syscallbuf 2 0.305658 fork_many 2 1.1968 fork_many-no-syscallbuf 2 0.665619 futex_exit_race 2 0.580114 futex_exit_race-no-syscallbuf 2 0.402464 futex_exit_race_sigsegv 2 0.56445 futex_exit_race_sigsegv-no-syscallbuf 2 0.393731 futex_pi 2 0.215186 futex_pi-no-syscallbuf 2 0.181878 futex_priorities 2 0.261775 futex_priorities-no-syscallbuf 2 0.198346 futex_requeue 2 1.18771 futex_requeue-no-syscallbuf 2 1.18066 gcrypt_rdrand 2 0.178023 gcrypt_rdrand-no-syscallbuf 2 0.167756 getcpu 2 0.22876 getcpu-no-syscallbuf 2 0.167692 getgroups 2 0.172681 getgroups-no-syscallbuf 2 0.17935 getpwnam 2 0.237821 getpwnam-no-syscallbuf 2 0.32354 getrandom 2 0.204551 getrandom-no-syscallbuf 2 0.194019 setitimer 2 0.222777 setitimer-no-syscallbuf 2 0.175497 getsid 2 0.184714 getsid-no-syscallbuf 2 0.172561 gettimeofday 2 0.179022 gettimeofday-no-syscallbuf 2 0.18866 grandchild_threads 2 0.219542 grandchild_threads-no-syscallbuf 2 0.193676 grandchild_threads_main_running 2 2.24898 grandchild_threads_main_running-no-syscallbuf 2 2.27881 grandchild_threads_thread_running 2 5.26817 grandchild_threads_thread_running-no-syscallbuf 2 5.25683 grandchild_threads_parent_alive 2 0.197781 grandchild_threads_parent_alive-no-syscallbuf 2 0.224033 x86/hle 2 1.06105 x86/hle-no-syscallbuf 2 0.951817 inotify 2 0.206718 inotify-no-syscallbuf 2 0.161259 int3 2 0.174208 int3-no-syscallbuf 2 0.153777 intr_futex_wait_restart 2 1.77408 intr_futex_wait_restart-no-syscallbuf 2 1.69339 intr_poll 2 3.71479 intr_poll-no-syscallbuf 2 3.66918 intr_ppoll 2 21.3612 intr_ppoll-no-syscallbuf 2 21.3163 intr_pselect 2 9.16476 intr_pselect-no-syscallbuf 2 9.20851 intr_read_no_restart 2 1.40236 intr_read_no_restart-no-syscallbuf 2 1.32017 intr_read_restart 2 2.42967 intr_read_restart-no-syscallbuf 2 2.44465 intr_sleep 2 4.18177 intr_sleep-no-syscallbuf 2 4.16812 intr_sleep_no_restart 2 2.16093 intr_sleep_no_restart-no-syscallbuf 2 1.78601 invalid_exec 2 0.179465 invalid_exec-no-syscallbuf 2 0.174301 invalid_fcntl 2 0.175987 invalid_fcntl-no-syscallbuf 2 0.165666 invalid_ioctl 2 0.181297 invalid_ioctl-no-syscallbuf 2 0.176853 io 2 0.171498 io-no-syscallbuf 2 0.161433 io_uring 2 0.170709 io_uring-no-syscallbuf 2 0.225948 ioctl 2 0.182441 ioctl-no-syscallbuf 2 0.167527 ioctl_blk 2 0.176945 ioctl_blk-no-syscallbuf 2 0.184102 ioctl_fb 2 0.191506 ioctl_fb-no-syscallbuf 2 0.18009 ioctl_fs 2 0.185416 ioctl_fs-no-syscallbuf 2 0.183753 ioctl_pty 2 0.211126 ioctl_pty-no-syscallbuf 2 0.193052 ioctl_sg 2 0.18375 ioctl_sg-no-syscallbuf 2 0.16753 ioctl_tty 2 0.356545 ioctl_tty-no-syscallbuf 2 0.213868 ioctl_vt 2 0.244868 ioctl_vt-no-syscallbuf 2 0.159736 ioprio 2 0.169212 ioprio-no-syscallbuf 2 0.178082 x86/ioperm 2 0.186083 x86/ioperm-no-syscallbuf 2 0.164016 x86/iopl 2 0.185867 x86/iopl-no-syscallbuf 2 0.182458 join_threads 2 4.90819 join_threads-no-syscallbuf 2 3.54206 joystick 2 0.159592 joystick-no-syscallbuf 2 0.201406 kcmp 2 0.19948 kcmp-no-syscallbuf 2 0.168435 keyctl 2 0.187736 keyctl-no-syscallbuf 2 0.171832 kill_newborn 2 0.413833 kill_newborn-no-syscallbuf 2 0.300332 kill_ptracee 2 1.81914 kill_ptracee-no-syscallbuf 2 1.77773 large_hole 2 0.188435 large_hole-no-syscallbuf 2 0.170501 large_write_deadlock 2 0.205733 large_write_deadlock-no-syscallbuf 2 0.198197 legacy_ugid 2 0.165708 legacy_ugid-no-syscallbuf 2 0.182131 x86/lsl 2 0.167781 x86/lsl-no-syscallbuf 2 0.167028 madvise 2 0.190806 madvise-no-syscallbuf 2 0.196604 madvise_free 2 0.200002 madvise_free-no-syscallbuf 2 0.171689 madvise_wipeonfork 2 0.317507 madvise_wipeonfork-no-syscallbuf 2 0.348225 map_fixed 2 0.167354 map_fixed-no-syscallbuf 2 0.179426 map_shared_syscall 2 0.175052 map_shared_syscall-no-syscallbuf 2 0.170174 membarrier 2 0.228412 membarrier-no-syscallbuf 2 0.230171 memfd_create 2 0.182084 memfd_create-no-syscallbuf 2 0.169879 memfd_create_shared 2 0.222174 memfd_create_shared-no-syscallbuf 2 0.174558 memfd_create_shared_huge 2 0.185374 memfd_create_shared_huge-no-syscallbuf 2 0.223016 mincore 2 0.174544 mincore-no-syscallbuf 2 0.192059 mknod 2 0.179847 mknod-no-syscallbuf 2 0.196489 mlock 2 0.173352 mlock-no-syscallbuf 2 0.167404 mmap_adjacent_to_rr_usage 2 0.17767 mmap_adjacent_to_rr_usage-no-syscallbuf 2 0.186486 mmap_private 2 1.12045 mmap_private-no-syscallbuf 2 1.06585 mmap_private_grow_under_map 2 0.189616 mmap_private_grow_under_map-no-syscallbuf 2 0.155726 mmap_recycle 2 0.20233 mmap_recycle-no-syscallbuf 2 0.211311 mmap_ro 2 0.185541 mmap_ro-no-syscallbuf 2 0.169573 mmap_self_maps_shared 2 0.229838 mmap_self_maps_shared-no-syscallbuf 2 0.281218 mmap_shared 2 0.191652 mmap_shared-no-syscallbuf 2 0.220219 mmap_shared_dev_zero 2 0.212095 mmap_shared_dev_zero-no-syscallbuf 2 0.183368 mmap_shared_grow 2 0.177507 mmap_shared_grow-no-syscallbuf 2 0.178328 mmap_shared_grow_under_map 2 0.199644 mmap_shared_grow_under_map-no-syscallbuf 2 0.166737 mmap_shared_multiple 2 0.19847 mmap_shared_multiple-no-syscallbuf 2 0.157765 mmap_shared_subpage 2 0.218759 mmap_shared_subpage-no-syscallbuf 2 0.231295 mmap_shared_write 2 6.26377 mmap_shared_write-no-syscallbuf 2 7.91176 mmap_shared_write_fork 2 0.237884 mmap_shared_write_fork-no-syscallbuf 2 3.13827 mmap_short_file 2 0.189922 mmap_short_file-no-syscallbuf 2 0.171696 mmap_write_complex 2 0.202205 mmap_write_complex-no-syscallbuf 2 0.175287 mmap_zero_size_fd 2 0.18312 mmap_zero_size_fd-no-syscallbuf 2 0.162213 x86/modify_ldt 2 0.188821 x86/modify_ldt-no-syscallbuf 2 0.166663 mount_ns_exec 2 0.23793 mount_ns_exec-no-syscallbuf 2 0.250413 mount_ns_exec2 2 0.24358 mount_ns_exec2-no-syscallbuf 2 0.268304 mprotect 2 0.191343 mprotect-no-syscallbuf 2 0.166355 mprotect_heterogenous 2 0.193389 mprotect_heterogenous-no-syscallbuf 2 0.184813 mprotect_none 2 0.19807 mprotect_none-no-syscallbuf 2 0.172753 mprotect_stack 2 0.171026 mprotect_stack-no-syscallbuf 2 0.183265 mq 2 2.18257 mq-no-syscallbuf 2 2.24255 mremap 2 2.04727 mremap-no-syscallbuf 2 8.17459 mremap_after_coalesce 2 0.17843 mremap_after_coalesce-no-syscallbuf 2 0.20688 mremap_grow 2 0.20804 mremap_grow-no-syscallbuf 2 0.160446 mremap_grow_shared 2 0.225226 mremap_grow_shared-no-syscallbuf 2 0.16525 mremap_non_page_size 2 0.177265 mremap_non_page_size-no-syscallbuf 2 1.12233 mremap_overwrite 2 0.17549 mremap_overwrite-no-syscallbuf 2 0.160699 mremap_private_grow_under_map 2 0.1715 mremap_private_grow_under_map-no-syscallbuf 2 0.187527 mremap_shrink 2 0.182712 mremap_shrink-no-syscallbuf 2 0.167213 msg 2 1.72379 msg-no-syscallbuf 2 1.69238 msg_trunc 2 0.185576 msg_trunc-no-syscallbuf 2 0.170801 msync 2 1.10828 msync-no-syscallbuf 2 1.67113 mtio 2 0.175655 mtio-no-syscallbuf 2 0.185065 multiple_pending_signals 2 0.241691 multiple_pending_signals-no-syscallbuf 2 0.208994 multiple_pending_signals_sequential 2 0.228107 multiple_pending_signals_sequential-no-syscallbuf 2 0.247733 munmap_segv 2 0.187787 munmap_segv-no-syscallbuf 2 0.167144 munmap_discontinuous 2 0.178539 munmap_discontinuous-no-syscallbuf 2 0.168095 nanosleep 2 1.17135 nanosleep-no-syscallbuf 2 1.15676 netfilter 2 0.194444 netfilter-no-syscallbuf 2 0.247537 netlink_mmap_disable 2 0.179323 netlink_mmap_disable-no-syscallbuf 2 0.166943 no_mask_timeslice 2 1.0076 no_mask_timeslice-no-syscallbuf 2 0.869596 nscd 2 0.166578 nscd-no-syscallbuf 2 0.17653 numa 2 0.183699 numa-no-syscallbuf 2 0.199299 x86/old_fork 2 0.180442 x86/old_fork-no-syscallbuf 2 0.218585 orphan_process 2 0.197056 orphan_process-no-syscallbuf 2 0.188056 packet_mmap_disable 2 0.183351 packet_mmap_disable-no-syscallbuf 2 0.193332 pause 2 1.15275 pause-no-syscallbuf 2 1.17576 perf_event 2 0.204277 perf_event-no-syscallbuf 2 0.174976 personality 2 0.203447 personality-no-syscallbuf 2 0.18611 pid_ns_kill_child 2 0.46607 pid_ns_kill_child-no-syscallbuf 2 0.252649 pid_ns_kill_child_threads 2 0.239128 pid_ns_kill_child_threads-no-syscallbuf 2 0.204937 pid_ns_kill_child_zombie 2 0.275996 pid_ns_kill_child_zombie-no-syscallbuf 2 0.194321 pid_ns_kill_threads 2 0.269501 pid_ns_kill_threads-no-syscallbuf 2 0.216829 pid_ns_kill_threads_exit_wait 2 0.279025 pid_ns_kill_threads_exit_wait-no-syscallbuf 2 0.265377 pid_ns_reap 2 0.253094 pid_ns_reap-no-syscallbuf 2 0.221562 pid_ns_segv 2 0.305488 pid_ns_segv-no-syscallbuf 2 0.296093 pid_ns_shutdown 2 0.425677 pid_ns_shutdown-no-syscallbuf 2 0.401956 pidfd 2 0.239427 pidfd-no-syscallbuf 2 0.178746 x86/pkeys 2 0.195752 x86/pkeys-no-syscallbuf 2 0.151316 poll_sig_race 2 2.97674 poll_sig_race-no-syscallbuf 2 2.9028 ppoll 2 5.72922 ppoll-no-syscallbuf 2 5.73113 prctl 2 0.207562 prctl-no-syscallbuf 2 0.213663 prctl_caps 2 0.522074 prctl_caps-no-syscallbuf 2 0.399332 prctl_deathsig 2 0.202488 prctl_deathsig-no-syscallbuf 2 0.172917 prctl_name 2 0.27457 prctl_name-no-syscallbuf 2 0.272148 prctl_short_name 2 0.2066 prctl_short_name-no-syscallbuf 2 0.177081 prctl_speculation_ctrl 2 0.197116 prctl_speculation_ctrl-no-syscallbuf 2 0.161439 x86/prctl_tsc 2 0.179877 x86/prctl_tsc-no-syscallbuf 2 0.190523 privileged_net_ioctl 2 0.364182 privileged_net_ioctl-no-syscallbuf 2 0.405736 proc_fds 2 0.805502 proc_fds-no-syscallbuf 2 0.599653 proc_mem 2 0.337102 proc_mem-no-syscallbuf 2 0.315536 protect_rr_fds 2 3.74297 protect_rr_fds-no-syscallbuf 2 3.30292 prw 2 0.168392 prw-no-syscallbuf 2 0.194568 pthread_condvar_locking 2 0.185851 pthread_condvar_locking-no-syscallbuf 2 0.214291 pthread_mutex_timedlock 2 0.158389 pthread_mutex_timedlock-no-syscallbuf 2 0.183897 pthread_pi_mutex 2 0.205553 pthread_pi_mutex-no-syscallbuf 2 0.16821 pthread_rwlocks 2 0.211671 pthread_rwlocks-no-syscallbuf 2 0.196137 x86/ptrace 2 0.225648 x86/ptrace-no-syscallbuf 2 0.214048 ptrace_attach_null_status 2 0.239081 ptrace_attach_null_status-no-syscallbuf 2 0.236807 ptrace_attach_running 2 0.323048 ptrace_attach_running-no-syscallbuf 2 0.318352 ptrace_attach_sleeping 2 0.252628 ptrace_attach_sleeping-no-syscallbuf 2 0.212059 ptrace_attach_stopped 2 0.238656 ptrace_attach_stopped-no-syscallbuf 2 0.232865 ptrace_attach_thread_running 2 7.74088 ptrace_attach_thread_running-no-syscallbuf 2 9.15948 ptrace_breakpoint 2 0.205013 ptrace_breakpoint-no-syscallbuf 2 0.201343 ptrace_change_patched_syscall 2 0.195804 ptrace_change_patched_syscall-no-syscallbuf 2 0.195317 x86/ptrace_debug_regs 2 0.193017 x86/ptrace_debug_regs-no-syscallbuf 2 0.184957 ptrace_exec 2 0.301733 ptrace_exec-no-syscallbuf 2 0.29256 x86/ptrace_exec32 2 0.182961 x86/ptrace_exec32-no-syscallbuf 2 0.171233 ptrace_kill_grandtracee 2 0.200743 ptrace_kill_grandtracee-no-syscallbuf 2 0.213814 x86/ptrace_tls 2 0.249886 x86/ptrace_tls-no-syscallbuf 2 0.223517 ptrace_seize 2 0.188635 ptrace_seize-no-syscallbuf 2 0.195198 ptrace_sigchld_blocked 2 0.18947 ptrace_sigchld_blocked-no-syscallbuf 2 0.168162 ptrace_signals 2 0.345102 ptrace_signals-no-syscallbuf 2 0.289898 ptrace_singlestep 2 0.421221 ptrace_singlestep-no-syscallbuf 2 0.194343 ptrace_syscall 2 0.218101 ptrace_syscall-no-syscallbuf 2 0.179181 ptrace_syscall_clone_untraced 2 0.253265 ptrace_syscall_clone_untraced-no-syscallbuf 2 0.273692 x86/ptrace_sysemu 2 0.280929 x86/ptrace_sysemu-no-syscallbuf 2 0.260563 ptrace_sysemu_syscall 2 0.237037 ptrace_sysemu_syscall-no-syscallbuf 2 0.195508 ptrace_trace_clone 2 0.239851 ptrace_trace_clone-no-syscallbuf 2 0.228288 ptrace_trace_exit 2 0.193537 ptrace_trace_exit-no-syscallbuf 2 0.205698 ptrace_traceme 2 0.23772 ptrace_traceme-no-syscallbuf 2 0.186207 ptracer_death 2 0.231978 ptracer_death-no-syscallbuf 2 0.21803 ptracer_death_multithread 2 0.438148 ptracer_death_multithread-no-syscallbuf 2 0.64384 ptracer_death_multithread_peer 2 0.423491 ptracer_death_multithread_peer-no-syscallbuf 2 0.454556 quotactl 2 0.217318 quotactl-no-syscallbuf 2 0.195922 x86/rdtsc 2 0.259211 x86/rdtsc-no-syscallbuf 2 0.237116 read_nothing 2 8.08106 read_nothing-no-syscallbuf 2 14.8259 readdir 2 0.188428 readdir-no-syscallbuf 2 0.173345 read_large 2 0.445476 read_large-no-syscallbuf 2 1.57062 read_oversize 2 0.179858 read_oversize-no-syscallbuf 2 0.178238 readlink 2 0.188913 readlink-no-syscallbuf 2 0.225705 readlinkat 2 0.183314 readlinkat-no-syscallbuf 2 0.202814 readv 2 0.188892 readv-no-syscallbuf 2 0.184751 record_replay_subject 2 3.55163 record_replay_subject-no-syscallbuf 2 3.51079 recvfrom 2 0.167764 recvfrom-no-syscallbuf 2 0.21244 redzone_integrity 2 1.14234 redzone_integrity-no-syscallbuf 2 1.15655 rename 2 0.184375 rename-no-syscallbuf 2 0.167062 rlimit 2 0.2005 rlimit-no-syscallbuf 2 0.170453 robust_futex 2 0.212345 robust_futex-no-syscallbuf 2 0.206051 rusage 2 0.19015 rusage-no-syscallbuf 2 0.165888 samask 2 0.38831 samask-no-syscallbuf 2 0.328854 save_data_fd 2 0.177818 save_data_fd-no-syscallbuf 2 0.16115 sched_attr 2 0.178041 sched_attr-no-syscallbuf 2 0.199286 sched_setaffinity 2 0.190171 sched_setaffinity-no-syscallbuf 2 0.177428 sched_setparam 2 0.183471 sched_setparam-no-syscallbuf 2 0.164994 sched_yield 2 0.270152 sched_yield-no-syscallbuf 2 0.240553 sched_yield_to_lower_priority 2 0.183273 sched_yield_to_lower_priority-no-syscallbuf 2 0.190446 scm_rights 2 0.671003 scm_rights-no-syscallbuf 2 0.687657 scratch_read 2 0.197939 scratch_read-no-syscallbuf 2 0.186784 seccomp 2 0.212935 seccomp-no-syscallbuf 2 0.201332 seccomp_cloning 2 0.186753 seccomp_cloning-no-syscallbuf 2 0.181473 seccomp_clone_fail 2 0.172286 seccomp_clone_fail-no-syscallbuf 2 0.179858 seccomp_desched 2 0.24543 seccomp_desched-no-syscallbuf 2 0.181397 seccomp_kill_exit 2 0.198507 seccomp_kill_exit-no-syscallbuf 2 0.189345 seccomp_null 2 0.178688 seccomp_null-no-syscallbuf 2 0.231606 seccomp_sigsys_args 2 0.181621 seccomp_sigsys_args-no-syscallbuf 2 0.178718 seccomp_sigsys_sigtrap 2 0.184341 seccomp_sigsys_sigtrap-no-syscallbuf 2 0.185873 seccomp_sigsys_syscallbuf 2 0.177722 seccomp_sigsys_syscallbuf-no-syscallbuf 2 0.21653 seccomp_tsync 2 0.224772 seccomp_tsync-no-syscallbuf 2 0.198235 seccomp_veto_exec 2 0.238224 seccomp_veto_exec-no-syscallbuf 2 0.182102 self_shebang 2 0.209619 self_shebang-no-syscallbuf 2 0.234194 self_sigint 2 0.16655 self_sigint-no-syscallbuf 2 0.162785 sem 2 0.240934 sem-no-syscallbuf 2 0.220949 send_block 2 0.254059 send_block-no-syscallbuf 2 1.77992 sendfile 2 0.198339 sendfile-no-syscallbuf 2 0.186502 set_ptracer 2 0.161933 set_ptracer-no-syscallbuf 2 0.17299 set_tid_address 2 0.190797 set_tid_address-no-syscallbuf 2 0.189553 setgid 2 0.173111 setgid-no-syscallbuf 2 0.152735 setgroups 2 0.181831 setgroups-no-syscallbuf 2 0.162135 setsid 2 0.188063 setsid-no-syscallbuf 2 0.187996 setuid 2 0.360408 setuid-no-syscallbuf 2 0.245904 shared_exec 2 0.172825 shared_exec-no-syscallbuf 2 0.174199 shared_monitor 2 0.172391 shared_monitor-no-syscallbuf 2 0.168222 shared_offset 2 0.242735 shared_offset-no-syscallbuf 2 0.162085 shared_write 2 0.183634 shared_write-no-syscallbuf 2 0.178486 shm 2 0.220493 shm-no-syscallbuf 2 0.241913 shm_unmap 2 0.186592 shm_unmap-no-syscallbuf 2 0.170327 sigaction_old 2 0.19866 sigaction_old-no-syscallbuf 2 0.220548 sigaltstack 2 0.180545 sigaltstack-no-syscallbuf 2 0.1682 sigchld_interrupt_signal 2 5.15186 sigchld_interrupt_signal-no-syscallbuf 2 4.65416 sigcont 2 0.189647 sigcont-no-syscallbuf 2 0.192489 sighandler_bad_rsp_sigsegv 2 0.192811 sighandler_bad_rsp_sigsegv-no-syscallbuf 2 0.16453 sighandler_fork 2 0.25179 sighandler_fork-no-syscallbuf 2 0.223138 sighandler_mask 2 0.192857 sighandler_mask-no-syscallbuf 2 0.169821 sigill 2 0.197423 sigill-no-syscallbuf 2 0.153897 signal_deferred 2 0.206914 signal_deferred-no-syscallbuf 2 0.201921 signal_during_preload_init 2 0.207008 signal_during_preload_init-no-syscallbuf 2 0.195584 signal_frame 2 0.196477 signal_frame-no-syscallbuf 2 0.176133 signal_unstoppable 2 0.202938 signal_unstoppable-no-syscallbuf 2 0.181003 signalfd 2 0.174708 signalfd-no-syscallbuf 2 0.174824 sigprocmask 2 0.553978 sigprocmask-no-syscallbuf 2 0.545624 sigprocmask_ensure_delivery 2 0.198806 sigprocmask_ensure_delivery-no-syscallbuf 2 0.211635 sigprocmask_exec 2 0.235655 sigprocmask_exec-no-syscallbuf 2 0.224897 sigprocmask_evil 2 0.163768 sigprocmask_evil-no-syscallbuf 2 0.171957 sigprocmask_in_syscallbuf_sighandler 2 1.15345 sigprocmask_in_syscallbuf_sighandler-no-syscallbuf 2 1.14064 sigprocmask_rr_sigs 2 0.203026 sigprocmask_rr_sigs-no-syscallbuf 2 0.178018 sigprocmask_syscallbuf 2 0.197603 sigprocmask_syscallbuf-no-syscallbuf 2 0.183032 sigqueueinfo 2 0.176825 sigqueueinfo-no-syscallbuf 2 0.222178 x86/sigreturn 2 0.207268 x86/sigreturn-no-syscallbuf 2 0.203306 sigreturn_reg 2 0.163808 sigreturn_reg-no-syscallbuf 2 0.208531 sigreturnmask 2 0.386215 sigreturnmask-no-syscallbuf 2 0.342396 sigrt 2 0.286826 sigrt-no-syscallbuf 2 0.279347 sigstop 2 0.259614 sigstop-no-syscallbuf 2 0.223598 sigstop2 2 0.2055 sigstop2-no-syscallbuf 2 0.173578 sigsuspend 2 0.193424 sigsuspend-no-syscallbuf 2 0.208011 sigtrap 2 0.208714 sigtrap-no-syscallbuf 2 0.193717 simple_threads_stress 2 2.02023 simple_threads_stress-no-syscallbuf 2 2.52639 sioc 2 0.611289 sioc-no-syscallbuf 2 0.791008 small_holes 2 0.186866 small_holes-no-syscallbuf 2 0.160374 sock_names_opts 2 0.196644 sock_names_opts-no-syscallbuf 2 0.200876 spinlock_priorities 2 0.883957 spinlock_priorities-no-syscallbuf 2 1.77485 splice 2 0.1984 splice-no-syscallbuf 2 0.168069 stack_growth_after_syscallbuf 2 0.184737 stack_growth_after_syscallbuf-no-syscallbuf 2 0.180046 stack_growth_syscallbuf 2 0.260481 stack_growth_syscallbuf-no-syscallbuf 2 9.22802 stack_growth_with_guard 2 0.203816 stack_growth_with_guard-no-syscallbuf 2 0.187512 stack_invalid 2 0.207892 stack_invalid-no-syscallbuf 2 0.178875 stack_overflow 2 0.291206 stack_overflow-no-syscallbuf 2 0.270871 stack_overflow_altstack 2 0.179979 stack_overflow_altstack-no-syscallbuf 2 0.1755 stack_overflow_with_guard 2 0.269542 stack_overflow_with_guard-no-syscallbuf 2 0.235477 statfs 2 0.180439 statfs-no-syscallbuf 2 0.205603 statx 2 0.194644 statx-no-syscallbuf 2 0.163539 stdout_child 2 0.225356 stdout_child-no-syscallbuf 2 0.25816 stdout_cloexec 2 0.258915 stdout_cloexec-no-syscallbuf 2 0.218053 stdout_dup 2 0.184115 stdout_dup-no-syscallbuf 2 0.153221 stdout_redirect 2 0.324121 stdout_redirect-no-syscallbuf 2 0.236225 switch_read 2 0.69731 switch_read-no-syscallbuf 2 0.670853 symlink 2 0.185111 symlink-no-syscallbuf 2 0.19091 sync 2 0.23794 sync-no-syscallbuf 2 0.18673 sync_file_range 2 0.173225 sync_file_range-no-syscallbuf 2 0.236857 syscall_bp 2 0.177799 syscall_bp-no-syscallbuf 2 0.156245 syscall_in_writable_mem 2 0.174906 syscall_in_writable_mem-no-syscallbuf 2 0.177983 syscallbuf_signal_reset 2 0.183644 syscallbuf_signal_reset-no-syscallbuf 2 0.184238 syscallbuf_signal_blocking 2 0.190065 syscallbuf_signal_blocking-no-syscallbuf 2 0.198511 syscallbuf_sigstop 2 9.68564 syscallbuf_sigstop-no-syscallbuf 2 28.1754 syscallbuf_timeslice 2 0.251698 syscallbuf_timeslice-no-syscallbuf 2 3.52121 syscallbuf_timeslice2 2 0.249679 syscallbuf_timeslice2-no-syscallbuf 2 9.23084 sysconf 2 0.186269 sysconf-no-syscallbuf 2 0.159917 sysctl 2 0.185003 sysctl-no-syscallbuf 2 0.158481 sysemu_singlestep 2 0.182821 sysemu_singlestep-no-syscallbuf 2 0.162622 x86/sysfs 2 0.227965 x86/sysfs-no-syscallbuf 2 0.237494 sysinfo 2 0.193888 sysinfo-no-syscallbuf 2 0.164539 tgkill 2 0.238723 tgkill-no-syscallbuf 2 0.185187 thread_yield 2 5.33778 thread_yield-no-syscallbuf 2 3.68714 timer 2 63.0617 timer-no-syscallbuf 2 65.8362 timerfd 2 0.313991 timerfd-no-syscallbuf 2 0.272372 times 2 0.20467 times-no-syscallbuf 2 0.160248 truncate_temp 2 0.208904 truncate_temp-no-syscallbuf 2 0.200974 tun 2 0.258967 tun-no-syscallbuf 2 0.207755 two_signals_with_mask 2 0.191743 two_signals_with_mask-no-syscallbuf 2 0.177604 ulimit_low 2 0.193423 ulimit_low-no-syscallbuf 2 0.187551 uname 2 0.189885 uname-no-syscallbuf 2 0.16555 unexpected_exit 2 0.244427 unexpected_exit-no-syscallbuf 2 0.54684 unexpected_exit_execve 2 0.267448 unexpected_exit_execve-no-syscallbuf 2 0.494722 unexpected_exit_execve_twice 2 1.3297 unexpected_exit_execve_twice-no-syscallbuf 2 1.43974 unexpected_exit_pid_ns 2 0.741595 unexpected_exit_pid_ns-no-syscallbuf 2 0.233324 unjoined_thread 2 0.182445 unjoined_thread-no-syscallbuf 2 0.172195 unshare 2 0.35039 unshare-no-syscallbuf 2 1.13442 userfaultfd 2 0.169134 userfaultfd-no-syscallbuf 2 0.183975 utimes 2 0.189225 utimes-no-syscallbuf 2 0.168458 vdso_parts 2 0.166982 vdso_parts-no-syscallbuf 2 0.159098 vfork_flush 2 0.20091 vfork_flush-no-syscallbuf 2 0.169755 vfork_shared 2 0.267209 vfork_shared-no-syscallbuf 2 0.171063 video_capture 2 0.17876 video_capture-no-syscallbuf 2 0.184013 vm_readv_writev 2 0.206471 vm_readv_writev-no-syscallbuf 2 0.189777 vsyscall 2 0.198179 vsyscall-no-syscallbuf 2 0.18036 vsyscall_timeslice 2 0.174966 vsyscall_timeslice-no-syscallbuf 2 0.223726 x86/x87env 2 0.192093 x86/x87env-no-syscallbuf 2 0.220662 wait 2 0.246499 wait-no-syscallbuf 2 0.176315 wait_sigstop 2 0.229046 wait_sigstop-no-syscallbuf 2 0.194761 write_race 2 2.69864 write_race-no-syscallbuf 2 1.78352 writev 2 0.277556 writev-no-syscallbuf 2 0.195201 xattr 2 0.187474 xattr-no-syscallbuf 2 0.194899 zero_length_read 2 0.1832 zero_length_read-no-syscallbuf 2 0.169866 std_random 2 0.229802 std_random-no-syscallbuf 2 0.241042 unwind_rr_page 2 1.15824 unwind_rr_page-no-syscallbuf 2 1.1443 abort_nonmain 2 0.477139 abort_nonmain-no-syscallbuf 2 0.439677 alternate_thread_diversion 2 0.937585 alternate_thread_diversion-no-syscallbuf 2 0.893965 args 2 0.204811 args-no-syscallbuf 2 0.165566 async_kill_with_syscallbuf 2 2.82132 async_kill_with_syscallbuf-no-syscallbuf 2 3.81114 async_kill_with_syscallbuf2 2 2.27422 async_kill_with_syscallbuf2-no-syscallbuf 2 2.30266 async_kill_with_threads 2 2.21251 async_kill_with_threads-no-syscallbuf 2 2.31443 async_kill_with_threads_main_running 2 2.28136 async_kill_with_threads_main_running-no-syscallbuf 2 2.30519 async_kill_with_threads_thread_running 2 6.19838 async_kill_with_threads_thread_running-no-syscallbuf 2 6.15817 async_segv 2 4.7611 async_segv-no-syscallbuf 2 4.72077 async_signal_syscalls 2 1.11327 async_signal_syscalls-no-syscallbuf 2 0.0309316 async_signal_syscalls2 2 0.609031 async_signal_syscalls2-no-syscallbuf 2 0.0218664 async_signal_syscalls_siginfo 2 1.06276 async_signal_syscalls_siginfo-no-syscallbuf 2 0.0292908 async_usr1 2 4.92085 async_usr1-no-syscallbuf 2 4.47758 blacklist 2 0.302815 blacklist-no-syscallbuf 2 0.178595 block_clone_checkpoint 2 1.12958 block_clone_checkpoint-no-syscallbuf 2 1.19362 block_clone_interrupted 2 6.1536 block_clone_interrupted-no-syscallbuf 2 5.39331 block_clone_syscallbuf_overflow 2 3.62867 block_clone_syscallbuf_overflow-no-syscallbuf 2 7.05479 block_intr_sigchld 2 7.05565 block_intr_sigchld-no-syscallbuf 2 5.91357 blocked_bad_ip 2 1.66722 blocked_bad_ip-no-syscallbuf 2 1.21329 blocked_sigill 2 0.475554 blocked_sigill-no-syscallbuf 2 0.473601 x86/blocked_sigsegv 2 0.454217 x86/blocked_sigsegv-no-syscallbuf 2 0.409446 breakpoint 2 0.884155 breakpoint-no-syscallbuf 2 0.911315 breakpoint_conditions 2 0.970024 breakpoint_conditions-no-syscallbuf 2 1.02469 breakpoint_overlap 2 0.936155 breakpoint_overlap-no-syscallbuf 2 0.985176 call_function 2 0.946274 call_function-no-syscallbuf 2 0.90338 call_gettid 2 0.958785 call_gettid-no-syscallbuf 2 0.903787 checkpoint_dying_threads 2 0.936716 checkpoint_dying_threads-no-syscallbuf 2 0.896077 checkpoint_mixed_mode 2 1.07028 checkpoint_mixed_mode-no-syscallbuf 2 1.19646 checksum_sanity 2 2.8623 checksum_sanity-no-syscallbuf 2 1.60946 check_lost_interrupts 2 1.68794 check_lost_interrupts-no-syscallbuf 2 0.0205135 clone_interruption 2 1.16011 clone_interruption-no-syscallbuf 2 1.75708 clone_vfork 2 0.316896 clone_vfork-no-syscallbuf 2 0.238336 conditional_breakpoint_calls 2 0.952078 conditional_breakpoint_calls-no-syscallbuf 2 0.973057 conditional_breakpoint_offload 2 96.5987 conditional_breakpoint_offload-no-syscallbuf 2 90.719 condvar_stress 2 18.4794 condvar_stress-no-syscallbuf 2 29.7448 cont_race 2 16.5406 cont_race-no-syscallbuf 2 58.61349 x86/cpuid_singlestep 2 0.841225 x86/cpuid_singlestep-no-syscallbuf 2 0.858188 crash 2 0.417805 crash-no-syscallbuf 2 0.399749 crash_in_function 2 1.48377 crash_in_function-no-syscallbuf 2 1.42794 daemon_read 2 0.29565 daemon_read-no-syscallbuf 2 0.219573 dconf_mock 2 0.20348 dconf_mock-no-syscallbuf 2 0.186553 dev_tty 2 0.168707 dev_tty-no-syscallbuf 2 0.153884 diversion_sigtrap 2 1.33568 diversion_sigtrap-no-syscallbuf 2 1.33729 diversion_syscall 2 1.11462 diversion_syscall-no-syscallbuf 2 1.09594 dlopen 2 8.35142 dlopen-no-syscallbuf 2 6.31706 early_error 2 0.473624 early_error-no-syscallbuf 2 0.483345 elapsed_time 2 1.85922 elapsed_time-no-syscallbuf 2 1.81124 exclusion_region 2 3.07792 exclusion_region-no-syscallbuf 2 2.39734 exec_failed 2 0.878224 exec_failed-no-syscallbuf 2 0.87247 exec_many 2 7.13534 exec_many-no-syscallbuf 2 14.2697 execve_loop 2 7.54661 execve_loop-no-syscallbuf 2 5.2741 exit_codes 2 0.478534 exit_codes-no-syscallbuf 2 0.450373 exit_group 2 0.184202 exit_group-no-syscallbuf 2 0.196952 exit_race 2 1.99785 exit_race-no-syscallbuf 2 1.24573 exit_status 2 0.129751 exit_status-no-syscallbuf 2 0.11848 x86/explicit_checkpoints 2 2.24325 x86/explicit_checkpoints-no-syscallbuf 2 2.22522 fd_limit 2 0.226523 fd_limit-no-syscallbuf 2 1.05535 fork_stress 2 2.18227 fork_stress-no-syscallbuf 2 1.28322 fork_syscalls 2 0.184567 fork_syscalls-no-syscallbuf 2 0.223932 function_calls 2 2.89782 function_calls-no-syscallbuf 2 2.684 x86/fxregs 2 0.885736 x86/fxregs-no-syscallbuf 2 0.903752 getcwd 2 0.161177 getcwd-no-syscallbuf 2 0.164677 gdb_bogus_breakpoint 2 0.858367 gdb_bogus_breakpoint-no-syscallbuf 2 0.868942 goto_event 2 3.62863 goto_event-no-syscallbuf 2 3.38462 hello 2 0.178746 hello-no-syscallbuf 2 0.168594 hooks 2 0.723224 hooks-no-syscallbuf 2 0.0217813 ignored_async_usr1 2 4.34846 ignored_async_usr1-no-syscallbuf 2 2.41312 ignored_sigsegv 2 0.512287 ignored_sigsegv-no-syscallbuf 2 0.48108 ignore_nested 2 0.319942 ignore_nested-no-syscallbuf 2 0.329892 immediate_restart 2 1.10799 immediate_restart-no-syscallbuf 2 1.05464 x86/int3_ok 2 0.196394 x86/int3_ok-no-syscallbuf 2 0.164032 interrupt 2 5.08432 interrupt-no-syscallbuf 2 5.10831 intr_ptrace_decline 2 4.76314 intr_ptrace_decline-no-syscallbuf 2 4.71857 invalid_interpreter 2 1.39091 invalid_interpreter-no-syscallbuf 2 1.42361 invalid_jump 2 3.35074 invalid_jump-no-syscallbuf 2 2.0456 jit_proc_mem 2 0.923116 jit_proc_mem-no-syscallbuf 2 0.903665 link 2 0.196809 link-no-syscallbuf 2 0.202282 madvise_dontfork 2 1.23451 madvise_dontfork-no-syscallbuf 2 1.13971 main_thread_exit 2 1.27429 main_thread_exit-no-syscallbuf 2 1.21927 many_yields 2 3.52293 many_yields-no-syscallbuf 2 2.76882 mmap_fd_reuse_checkpoint 2 1.20862 mmap_fd_reuse_checkpoint-no-syscallbuf 2 1.17124 mmap_replace_most_mappings 2 2.06602 mmap_replace_most_mappings-no-syscallbuf 2 2.16168 mmap_shared_prot 2 1.11115 mmap_shared_prot-no-syscallbuf 2 1.139 mmap_shared_write_exec_race 2 1.18982 mmap_shared_write_exec_race-no-syscallbuf 2 3.74276 mmap_tmpfs 2 0.203215 mmap_tmpfs-no-syscallbuf 2 0.183339 mmap_write 2 0.0232375 mmap_write-no-syscallbuf 2 0.0192538 mmap_write_private 2 0.33905 mmap_write_private-no-syscallbuf 2 0.278782 morestack_unwind 1 0.810541 morestack_unwind-no-syscallbuf 1 0.0187404 mprotect_growsdown 2 0.334716 mprotect_growsdown-no-syscallbuf 2 0.307453 mprotect_syscallbuf_overflow 2 0.286063 mprotect_syscallbuf_overflow-no-syscallbuf 2 14.2703 mutex_pi_stress 2 20.6333 mutex_pi_stress-no-syscallbuf 2 27.5912 nested_detach_wait 2 0.571855 nested_detach_wait-no-syscallbuf 2 0.56148 overflow_branch_counter 2 3.09102 overflow_branch_counter-no-syscallbuf 2 3.21047 patch_page_end 2 0.21362 patch_page_end-no-syscallbuf 2 0.0250506 x86/patch_40_80_f6_81 2 0.195303 x86/patch_40_80_f6_81-no-syscallbuf 2 0.021418 perf_event_mmap 2 1.40858 perf_event_mmap-no-syscallbuf 2 1.49223 priority 2 0.177644 priority-no-syscallbuf 2 0.181065 ptrace_remote_unmap 2 4.98646 ptrace_remote_unmap-no-syscallbuf 2 3.95753 read_big_struct 2 0.911023 read_big_struct-no-syscallbuf 2 0.917618 remove_latest_trace 2 1.27511 remove_latest_trace-no-syscallbuf 2 1.20751 restart_abnormal_exit 2 1.33336 restart_abnormal_exit-no-syscallbuf 2 1.30786 reverse_continue_breakpoint 2 1.8994 reverse_continue_breakpoint-no-syscallbuf 2 1.58048 reverse_continue_multiprocess 2 10.9357 reverse_continue_multiprocess-no-syscallbuf 2 9.9408 reverse_continue_process_signal 2 3.25294 reverse_continue_process_signal-no-syscallbuf 2 3.37532 reverse_many_breakpoints 2 4.68598 reverse_many_breakpoints-no-syscallbuf 2 4.30084 reverse_step_long 2 6.00979 reverse_step_long-no-syscallbuf 2 7.54732 reverse_step_threads 2 3.28319 reverse_step_threads-no-syscallbuf 2 3.54332 reverse_step_threads_break 2 4.64687 reverse_step_threads_break-no-syscallbuf 2 3.3659 rr_ps 2 0.430032 rr_ps-no-syscallbuf 2 0.374333 rr_ps_ns 2 0.214954 rr_ps_ns-no-syscallbuf 2 0.183478 rseq 2 20.632 rseq-no-syscallbuf 2 20.3601 search 2 0.934634 search-no-syscallbuf 2 0.899349 seccomp_blocks_rr 2 0.419835 seccomp_blocks_rr-no-syscallbuf 2 0.168566 seccomp_signals 2 2.46697 seccomp_signals-no-syscallbuf 2 1.80603 segfault 2 0.184121 segfault-no-syscallbuf 2 0.171907 shared_map 2 0.226207 shared_map-no-syscallbuf 2 0.237703 shared_persistent_file 2 1.2057 shared_persistent_file-no-syscallbuf 2 1.19317 signal_numbers 2 1.16584 signal_numbers-no-syscallbuf 2 1.16436 sigprocmask_race 2 0.201165 sigprocmask_race-no-syscallbuf 2 0.0287277 sigprocmask_rr_sigs_nondefault 2 0.0871455 sigprocmask_rr_sigs_nondefault-no-syscallbuf 2 0.0841662 simple 2 0.302632 simple-no-syscallbuf 2 0.26711 x86/singlestep_pushf 2 0.892934 x86/singlestep_pushf-no-syscallbuf 2 0.900637 stack_growth 2 0.89189 stack_growth-no-syscallbuf 2 0.866447 step_thread 2 2.30356 step_thread-no-syscallbuf 2 1.30052 strict_priorities 2 12.1928 strict_priorities-no-syscallbuf 2 12.6763 x86/string_instructions 2 0.595723 x86/string_instructions-no-syscallbuf 2 0.760244 x86/string_instructions_async_signals 2 0.232696 x86/string_instructions_async_signals-no-syscallbuf 2 71.325464 x86/string_instructions_async_signals_shared 2 0.28007 x86/string_instructions_async_signals_shared-no-syscallbuf 2 96.599713 x86/string_instructions_multiwatch 2 0.942324 x86/string_instructions_multiwatch-no-syscallbuf 2 0.880441 x86/string_instructions_replay 2 13.4061 x86/string_instructions_replay-no-syscallbuf 2 13.7113 x86/string_instructions_singlestep_fastforward 2 7.56842 x86/string_instructions_singlestep_fastforward-no-syscallbuf 2 7.6374 x86/string_instructions_watch 2 0.930721 x86/string_instructions_watch-no-syscallbuf 2 0.889983 syscallbuf_fd_disabling 2 0.271232 syscallbuf_fd_disabling-no-syscallbuf 2 0.228286 syscallbuf_signal_blocking_read 2 1.02176 syscallbuf_signal_blocking_read-no-syscallbuf 2 1.43656 sysconf_onln 2 0.35877 sysconf_onln-no-syscallbuf 2 0.31309 target_fork 2 1.73477 target_fork-no-syscallbuf 2 1.63206 target_process 2 1.7258 target_process-no-syscallbuf 2 1.50867 tcp_sockets 2 0.156821 tcp_sockets-no-syscallbuf 2 0.155016 term_nonmain 2 0.177305 term_nonmain-no-syscallbuf 2 0.213272 term_rr 2 3.42756 term_rr-no-syscallbuf 2 3.40614 term_trace_reset 2 0.248562 term_trace_reset-no-syscallbuf 2 0.214772 term_trace_syscall 2 0.264866 term_trace_syscall-no-syscallbuf 2 0.235758 thread_exit_signal 2 4.35567 thread_exit_signal-no-syscallbuf 2 4.51597 thread_open_race 2 35.2856 thread_open_race-no-syscallbuf 2 0.0207266 thread_stress 2 19.5549 thread_stress-no-syscallbuf 2 15.7079 threaded_syscall_spam 2 1.11981 threaded_syscall_spam-no-syscallbuf 2 0.0223191 threads 2 1.22788 threads-no-syscallbuf 2 1.15627 tls 2 0.912272 tls-no-syscallbuf 2 0.891445 ttyname 2 0.168692 ttyname-no-syscallbuf 2 0.165043 unexpected_stack_growth 2 1.10499 unexpected_stack_growth-no-syscallbuf 2 1.1247 user_ignore_sig 2 0.195225 user_ignore_sig-no-syscallbuf 2 0.16423 vdso_clock_gettime_stack 2 1.26935 vdso_clock_gettime_stack-no-syscallbuf 2 0.0191466 vdso_gettimeofday_stack 2 1.22336 vdso_gettimeofday_stack-no-syscallbuf 2 0.0223277 vdso_time_stack 2 1.09675 vdso_time_stack-no-syscallbuf 2 0.0224125 vfork 2 0.273825 vfork-no-syscallbuf 2 0.235919 vfork_read_clone_stress 2 2.57748 vfork_read_clone_stress-no-syscallbuf 2 1.52819 vsyscall_reverse_next 2 4.01991 vsyscall_reverse_next-no-syscallbuf 2 3.6232 wait_for_all 2 1.14849 wait_for_all-no-syscallbuf 2 1.14272 watchpoint 2 1.4021 watchpoint-no-syscallbuf 2 1.45019 watchpoint_at_sched 2 1.26641 watchpoint_at_sched-no-syscallbuf 2 2.16817 watchpoint_before_signal 2 0.889821 watchpoint_before_signal-no-syscallbuf 2 0.873965 watchpoint_no_progress 2 2.94675 watchpoint_no_progress-no-syscallbuf 2 1.77617 watchpoint_size_change 2 0.914569 watchpoint_size_change-no-syscallbuf 2 0.914861 watchpoint_syscall 2 1.10858 watchpoint_syscall-no-syscallbuf 2 0.967778 watchpoint_unaligned 2 0.885318 watchpoint_unaligned-no-syscallbuf 2 0.850577 async_signal_syscalls_100 2 0.216078 async_signal_syscalls_100-no-syscallbuf 2 3.64995 async_signal_syscalls_1000 2 0.210203 async_signal_syscalls_1000-no-syscallbuf 2 3.41878 bad_breakpoint 2 10.5237 bad_breakpoint-no-syscallbuf 2 9.75803 break_block 2 9.95668 break_block-no-syscallbuf 2 9.98023 break_clock 2 0.997136 break_clock-no-syscallbuf 2 1.19868 break_clone 2 0.860601 break_clone-no-syscallbuf 2 0.866168 break_exec 2 1.05496 break_exec-no-syscallbuf 2 0.928972 break_int3 2 0.84853 break_int3-no-syscallbuf 2 0.826974 break_mmap_private 2 2.2772 break_mmap_private-no-syscallbuf 2 2.3963 break_msg 2 2.33334 break_msg-no-syscallbuf 2 2.31454 x86/break_rdtsc 2 0.906172 x86/break_rdtsc-no-syscallbuf 2 0.898956 break_sigreturn 2 4.86845 break_sigreturn-no-syscallbuf 2 4.7959 break_sync_signal 2 0.889585 break_sync_signal-no-syscallbuf 2 0.953888 break_thread 2 0.987441 break_thread-no-syscallbuf 2 0.95672 break_time_slice 2 9.39381 break_time_slice-no-syscallbuf 2 9.5303 breakpoint_consistent 2 0.864593 breakpoint_consistent-no-syscallbuf 2 0.862363 call_exit 2 1.24356 call_exit-no-syscallbuf 2 1.24033 check_patched_pthread 2 2.17917 check_patched_pthread-no-syscallbuf 2 1.88506 checkpoint_async_signal_syscalls_1000 2 3.53824 checkpoint_async_signal_syscalls_1000-no-syscallbuf 2 0.0191438 checkpoint_mmap_shared 2 25.9316 checkpoint_mmap_shared-no-syscallbuf 2 19.8694 checkpoint_prctl_name 2 36.1448 checkpoint_prctl_name-no-syscallbuf 2 33.1562 checkpoint_simple 2 17.1481 checkpoint_simple-no-syscallbuf 2 20.2543 checksum_sanity_noclone 2 2.47752 checksum_sanity_noclone-no-syscallbuf 2 1.57982 comm 2 0.628376 comm-no-syscallbuf 2 0.752701 cont_signal 2 4.98586 cont_signal-no-syscallbuf 2 5.02741 x86/cpuid 2 0.967095 x86/cpuid-no-syscallbuf 2 0.967151 dead_thread_target 2 1.25742 dead_thread_target-no-syscallbuf 2 1.15103 desched_ticks 2 0.933889 desched_ticks-no-syscallbuf 2 0.0205327 deliver_async_signal_during_syscalls 2 0.708237 deliver_async_signal_during_syscalls-no-syscallbuf 2 0.0201994 env_newline 2 0.172633 env_newline-no-syscallbuf 2 0.176236 exec_deleted 2 0.174422 exec_deleted-no-syscallbuf 2 0.154924 exec_stop 2 0.971989 exec_stop-no-syscallbuf 2 0.932378 execp 2 0.179868 execp-no-syscallbuf 2 0.172007 explicit_checkpoint_clone 2 1.16827 explicit_checkpoint_clone-no-syscallbuf 2 1.13606 file_name_newline 2 0.206819 file_name_newline-no-syscallbuf 2 0.186296 final_sigkill 2 1.2137 final_sigkill-no-syscallbuf 2 1.16333 first_instruction 2 0.714622 first_instruction-no-syscallbuf 2 0.727912 fork_exec_info_thr 2 3.49358 fork_exec_info_thr-no-syscallbuf 2 3.3585 get_thread_list 2 1.14847 get_thread_list-no-syscallbuf 2 1.12759 hardlink_mmapped_files 2 2.21432 hardlink_mmapped_files-no-syscallbuf 2 2.22769 hbreak 2 0.98144 hbreak-no-syscallbuf 2 1.06173 mprotect_step 2 1.19977 mprotect_step-no-syscallbuf 2 1.17342 nested_detach 2 0.443621 nested_detach-no-syscallbuf 2 0.426141 nested_detach_kill 2 0.579554 nested_detach_kill-no-syscallbuf 2 0.57556 nested_release 2 0.416726 nested_release-no-syscallbuf 2 0.320548 parent_no_break_child_bkpt 2 1.35192 parent_no_break_child_bkpt-no-syscallbuf 2 1.43833 parent_no_stop_child_crash 2 1.27584 parent_no_stop_child_crash-no-syscallbuf 2 1.28431 post_exec_fpu_regs 2 0.609478 post_exec_fpu_regs-no-syscallbuf 2 0.616487 proc_maps 2 0.869244 proc_maps-no-syscallbuf 2 0.917006 read_bad_mem 2 0.884557 read_bad_mem-no-syscallbuf 2 0.852502 record_replay 2 141.185 record_replay-no-syscallbuf 2 153.148 remove_watchpoint 2 0.978183 remove_watchpoint-no-syscallbuf 2 0.94822 replay_overlarge_event_number 2 0.194689 replay_overlarge_event_number-no-syscallbuf 2 0.159605 replay_serve_files 2 3.66096 replay_serve_files-no-syscallbuf 2 3.62241 restart_invalid_checkpoint 2 1.21307 restart_invalid_checkpoint-no-syscallbuf 2 1.27762 restart_unstable 2 1.19906 restart_unstable-no-syscallbuf 2 1.13003 restart_diversion 2 1.43123 restart_diversion-no-syscallbuf 2 1.32596 reverse_alarm 2 4.70091 reverse_alarm-no-syscallbuf 2 5.96815 reverse_continue_exec_subprocess 2 2.35457 reverse_continue_exec_subprocess-no-syscallbuf 2 2.94139 reverse_continue_fork_subprocess 2 1.28133 reverse_continue_fork_subprocess-no-syscallbuf 2 1.29387 reverse_continue_int3 2 3.80374 reverse_continue_int3-no-syscallbuf 2 5.79677 reverse_continue_start 2 3.17295 reverse_continue_start-no-syscallbuf 2 2.41344 reverse_finish 2 3.16817 reverse_finish-no-syscallbuf 2 1.85083 reverse_step_breakpoint 2 3.3135 reverse_step_breakpoint-no-syscallbuf 2 3.69605 reverse_step_signal 2 4.24914 reverse_step_signal-no-syscallbuf 2 4.50127 reverse_step_threads2 2 4.63635 reverse_step_threads2-no-syscallbuf 2 4.4955 reverse_watchpoint 2 3.56927 reverse_watchpoint-no-syscallbuf 2 3.64424 reverse_watchpoint_syscall 2 7.81341 reverse_watchpoint_syscall-no-syscallbuf 2 7.23407 run_end 2 1.51075 run_end-no-syscallbuf 2 1.48436 run_in_function 2 1.06551 run_in_function-no-syscallbuf 2 1.22989 sanity 2 1.01111 sanity-no-syscallbuf 2 0.787738 seekticks 2 1.84473 seekticks-no-syscallbuf 2 1.93585 shm_checkpoint 2 1.19522 shm_checkpoint-no-syscallbuf 2 1.15687 siginfo 2 0.772502 siginfo-no-syscallbuf 2 0.865984 x86/sigreturn_checksum 2 0.778667 x86/sigreturn_checksum-no-syscallbuf 2 0.733952 signal_stop 2 0.907598 signal_stop-no-syscallbuf 2 0.881811 signal_checkpoint 2 1.2013 signal_checkpoint-no-syscallbuf 2 1.15467 simple_script 2 0.197487 simple_script-no-syscallbuf 2 0.185854 simple_script_debug 2 0.86698 simple_script_debug-no-syscallbuf 2 0.876364 simple_winch 2 2.97679 simple_winch-no-syscallbuf 2 2.26149 stack_overflow_debug 2 7.46715 stack_overflow_debug-no-syscallbuf 2 6.59686 step1 2 0.895537 step1-no-syscallbuf 2 0.955415 x86/step_rdtsc 2 1.0748 x86/step_rdtsc-no-syscallbuf 2 0.934643 step_signal 2 0.947377 step_signal-no-syscallbuf 2 0.959475 x86/string_instructions_break 2 6.82516 x86/string_instructions_break-no-syscallbuf 2 7.17043 x86/string_instructions_replay_quirk 2 7.50406 x86/string_instructions_replay_quirk-no-syscallbuf 2 6.87678 subprocess_exit_ends_session 2 1.27728 subprocess_exit_ends_session-no-syscallbuf 2 1.21137 switch_processes 2 3.19626 switch_processes-no-syscallbuf 2 2.6858 syscallbuf_timeslice_250 2 0.249806 syscallbuf_timeslice_250-no-syscallbuf 2 3.29356 trace_version 2 2.53497 trace_version-no-syscallbuf 2 2.70248 term_trace_cpu 2 0.514448 term_trace_cpu-no-syscallbuf 2 0.565433 trace_events 2 0.134258 trace_events-no-syscallbuf 2 0.127315 tty 2 0.151032 tty-no-syscallbuf 2 0.150354 unmap_vdso 2 0.353832 unmap_vdso-no-syscallbuf 2 0.341826 unwind_on_signal 2 0.948899 unwind_on_signal-no-syscallbuf 2 0.914195 vfork_exec 2 0.910633 vfork_exec-no-syscallbuf 2 0.922927 vfork_break_parent 2 0.932378 vfork_break_parent-no-syscallbuf 2 0.953784 vsyscall_singlestep 2 0.162971 vsyscall_singlestep-no-syscallbuf 2 0.145158 watch_code 2 3.72841 watch_code-no-syscallbuf 2 2.43417 watchpoint_cond 2 0.891707 watchpoint_cond-no-syscallbuf 2 0.837046 when 2 0.866097 when-no-syscallbuf 2 0.885084 64bit_child-32 2 0.343532 64bit_child-32-no-syscallbuf 2 0.344224 _llseek-32 2 0.236283 _llseek-32-no-syscallbuf 2 0.159258 abort-32 2 0.440587 abort-32-no-syscallbuf 2 0.415051 accept-32 2 0.303637 accept-32-no-syscallbuf 2 0.273315 acct-32 2 0.175651 acct-32-no-syscallbuf 2 0.159558 adjtimex-32 2 0.178372 adjtimex-32-no-syscallbuf 2 0.157448 aio-32 2 0.156827 aio-32-no-syscallbuf 2 0.148936 alarm-32 2 1.95172 alarm-32-no-syscallbuf 2 2.48883 alarm2-32 2 1.14684 alarm2-32-no-syscallbuf 2 1.15135 alsa_ioctl-32 2 0.164307 alsa_ioctl-32-no-syscallbuf 2 0.170314 arch_prctl-32 2 0.183115 arch_prctl-32-no-syscallbuf 2 0.1533 async_segv_ignored-32 2 0.187489 async_segv_ignored-32-no-syscallbuf 2 0.16181 at_threadexit-32 2 0.233616 at_threadexit-32-no-syscallbuf 2 0.240465 bad_ip-32 2 0.161617 bad_ip-32-no-syscallbuf 2 0.153276 bad_syscall-32 2 0.172995 bad_syscall-32-no-syscallbuf 2 0.17333 barrier-32 2 0.304388 barrier-32-no-syscallbuf 2 0.367683 big_buffers-32 2 0.17837 big_buffers-32-no-syscallbuf 2 0.197323 block-32 2 9.32206 block-32-no-syscallbuf 2 9.33081 block_open-32 2 0.190076 block_open-32-no-syscallbuf 2 0.185152 bpf-32 2 0.192451 bpf-32-no-syscallbuf 2 0.202114 bpf_map-32 2 0.169593 bpf_map-32-no-syscallbuf 2 0.177982 brk-32 2 0.172165 brk-32-no-syscallbuf 2 0.153512 brk2-32 2 0.194184 brk2-32-no-syscallbuf 2 0.152208 capget-32 2 0.2065 capget-32-no-syscallbuf 2 0.175296 chew_cpu-32 2 8.73853 chew_cpu-32-no-syscallbuf 2 8.70924 x86/chew_cpu_cpuid-32 2 0.622714 x86/chew_cpu_cpuid-32-no-syscallbuf 2 0.4366 chmod-32 2 0.21103 chmod-32-no-syscallbuf 2 0.174042 chown-32 2 0.21456 chown-32-no-syscallbuf 2 0.223343 clock-32 2 0.255467 clock-32-no-syscallbuf 2 0.358987 clock_adjtime-32 2 0.194787 clock_adjtime-32-no-syscallbuf 2 0.162301 clock_nanosleep-32 2 1.19261 clock_nanosleep-32-no-syscallbuf 2 1.22023 clock_time64-32 2 0.182615 clock_time64-32-no-syscallbuf 2 0.169495 clone-32 2 0.198422 clone-32-no-syscallbuf 2 0.162276 clone_bad_stack-32 2 0.258172 clone_bad_stack-32-no-syscallbuf 2 0.227873 clone_bad_tls-32 2 0.1688 clone_bad_tls-32-no-syscallbuf 2 0.150977 clone_cleartid_coredump-32 2 0.380296 clone_cleartid_coredump-32-no-syscallbuf 2 0.357681 clone_fail-32 2 0.175359 clone_fail-32-no-syscallbuf 2 0.160996 clone_file_range-32 2 0.179275 clone_file_range-32-no-syscallbuf 2 0.166524 clone_immediate_exit-32 2 0.180154 clone_immediate_exit-32-no-syscallbuf 2 0.179751 clone_newflags-32 2 0.213326 clone_newflags-32-no-syscallbuf 2 1.04818 clone_parent-32 2 0.195602 clone_parent-32-no-syscallbuf 2 0.159968 clone_untraced-32 2 0.173588 clone_untraced-32-no-syscallbuf 2 0.205649 clone_vfork_pidfd-32 2 0.183834 clone_vfork_pidfd-32-no-syscallbuf 2 0.170767 cloned_sigmask-32 2 0.31069 cloned_sigmask-32-no-syscallbuf 2 0.264808 constructor-32 2 2.18134 constructor-32-no-syscallbuf 2 2.23034 copy_file_range-32 2 0.170721 copy_file_range-32-no-syscallbuf 2 0.185973 x86/cpuid_same_state-32 2 0.168649 x86/cpuid_same_state-32-no-syscallbuf 2 0.176264 creat_address_not_truncated-32 2 0.172016 creat_address_not_truncated-32-no-syscallbuf 2 0.160827 x86/cross_arch-32 2 0.173734 x86/cross_arch-32-no-syscallbuf 2 0.186919 cwd_inaccessible-32 2 0.230593 cwd_inaccessible-32-no-syscallbuf 2 0.228416 daemon-32 2 0.185343 daemon-32-no-syscallbuf 2 0.159593 desched_blocking_poll-32 2 0.191223 desched_blocking_poll-32-no-syscallbuf 2 0.177699 desched_sigkill-32 2 0.268607 desched_sigkill-32-no-syscallbuf 2 0.238282 detach_state-32 2 0.255978 detach_state-32-no-syscallbuf 2 0.217356 detach_threads-32 2 0.273998 detach_threads-32-no-syscallbuf 2 0.234168 detach_sigkill-32 2 0.270231 detach_sigkill-32-no-syscallbuf 2 0.204298 detach_sigkill_exit-32 2 0.266415 detach_sigkill_exit-32-no-syscallbuf 2 0.181115 deterministic_sigsys-32 2 0.341587 deterministic_sigsys-32-no-syscallbuf 2 0.313071 dev_zero-32 2 0.166296 dev_zero-32-no-syscallbuf 2 0.163734 direct-32 2 0.256896 direct-32-no-syscallbuf 2 0.200562 dup-32 2 0.174304 dup-32-no-syscallbuf 2 0.175881 doublesegv-32 2 0.384812 doublesegv-32-no-syscallbuf 2 0.339395 epoll_create-32 2 0.168445 epoll_create-32-no-syscallbuf 2 0.177292 epoll_create1-32 2 0.24383 epoll_create1-32-no-syscallbuf 2 0.176546 epoll_edge-32 2 0.172137 epoll_edge-32-no-syscallbuf 2 0.17578 epoll_pwait_eintr_sigmask-32 2 2.18362 epoll_pwait_eintr_sigmask-32-no-syscallbuf 2 2.17528 eventfd-32 2 0.170002 eventfd-32-no-syscallbuf 2 0.195814 exec_flags-32 2 0.176703 exec_flags-32-no-syscallbuf 2 0.158051 exec_no_env-32 2 0.210186 exec_no_env-32-no-syscallbuf 2 0.223995 exec_self-32 2 0.212399 exec_self-32-no-syscallbuf 2 0.278598 exec_from_main_thread-32 2 0.256533 exec_from_main_thread-32-no-syscallbuf 2 0.2547 exec_from_other_thread-32 2 11.4391 exec_from_other_thread-32-no-syscallbuf 2 7.97065 exec_stopsig-32 2 0.248389 exec_stopsig-32-no-syscallbuf 2 0.227471 execveat-32 2 0.33122 execveat-32-no-syscallbuf 2 0.212293 exit_with_syscallbuf_signal-32 2 0.208462 exit_with_syscallbuf_signal-32-no-syscallbuf 2 0.177881 fadvise-32 2 0.189585 fadvise-32-no-syscallbuf 2 0.188268 fanotify-32 2 0.15869 fanotify-32-no-syscallbuf 2 0.176326 fatal_init_signal-32 2 0.19146 fatal_init_signal-32-no-syscallbuf 2 0.217814 fatal_sigsegv_thread-32 2 0.664268 fatal_sigsegv_thread-32-no-syscallbuf 2 0.527767 x86/fault_in_code_page-32 2 0.182553 x86/fault_in_code_page-32-no-syscallbuf 2 0.176254 fcntl_dupfd-32 2 0.177373 fcntl_dupfd-32-no-syscallbuf 2 0.188962 fcntl_misc-32 2 0.164345 fcntl_misc-32-no-syscallbuf 2 0.16434 fcntl_notify-32 2 0.190707 fcntl_notify-32-no-syscallbuf 2 0.165259 fcntl_owner_ex-32 2 0.197163 fcntl_owner_ex-32-no-syscallbuf 2 0.155602 fcntl_rw_hints-32 2 0.193598 fcntl_rw_hints-32-no-syscallbuf 2 0.179665 fcntl_seals-32 2 0.169677 fcntl_seals-32-no-syscallbuf 2 0.155835 fcntl_sig-32 2 0.188346 fcntl_sig-32-no-syscallbuf 2 0.1586 fd_cleanup-32 2 0.186579 fd_cleanup-32-no-syscallbuf 2 0.186616 fd_tracking_across_threads-32 2 0.194344 fd_tracking_across_threads-32-no-syscallbuf 2 0.212191 fds_clean-32 2 0.236395 fds_clean-32-no-syscallbuf 2 0.235536 flock-32 2 0.710811 flock-32-no-syscallbuf 2 1.46662 flock_ofd-32 2 0.194817 flock_ofd-32-no-syscallbuf 2 0.958058 flock2-32 2 0.160203 flock2-32-no-syscallbuf 2 0.154417 fork_brk-32 2 0.180829 fork_brk-32-no-syscallbuf 2 0.192136 fork_child_crash-32 2 0.286437 fork_child_crash-32-no-syscallbuf 2 0.25726 fork_many-32 2 1.1483 fork_many-32-no-syscallbuf 2 0.676637 futex_exit_race-32 2 0.473886 futex_exit_race-32-no-syscallbuf 2 0.398697 futex_exit_race_sigsegv-32 2 0.768718 futex_exit_race_sigsegv-32-no-syscallbuf 2 0.395598 futex_pi-32 2 0.169976 futex_pi-32-no-syscallbuf 2 0.171053 futex_priorities-32 2 0.199389 futex_priorities-32-no-syscallbuf 2 0.193813 futex_requeue-32 2 1.16674 futex_requeue-32-no-syscallbuf 2 1.17668 gcrypt_rdrand-32 2 0.198064 gcrypt_rdrand-32-no-syscallbuf 2 0.17078 getcpu-32 2 0.203612 getcpu-32-no-syscallbuf 2 0.162867 getgroups-32 2 0.194776 getgroups-32-no-syscallbuf 2 0.210254 getpwnam-32 2 0.249385 getpwnam-32-no-syscallbuf 2 0.218375 getrandom-32 2 0.219942 getrandom-32-no-syscallbuf 2 0.163141 setitimer-32 2 0.197037 setitimer-32-no-syscallbuf 2 0.174438 getsid-32 2 0.163107 getsid-32-no-syscallbuf 2 0.173976 gettimeofday-32 2 0.215287 gettimeofday-32-no-syscallbuf 2 0.183324 grandchild_threads-32 2 0.194359 grandchild_threads-32-no-syscallbuf 2 0.18839 grandchild_threads_main_running-32 2 2.25777 grandchild_threads_main_running-32-no-syscallbuf 2 2.27805 grandchild_threads_thread_running-32 2 5.25249 grandchild_threads_thread_running-32-no-syscallbuf 2 5.23783 grandchild_threads_parent_alive-32 2 0.199 grandchild_threads_parent_alive-32-no-syscallbuf 2 0.187577 x86/hle-32 2 2.22635 x86/hle-32-no-syscallbuf 2 0.881525 inotify-32 2 0.162154 inotify-32-no-syscallbuf 2 0.16806 int3-32 2 0.159417 int3-32-no-syscallbuf 2 0.157118 intr_futex_wait_restart-32 2 1.69813 intr_futex_wait_restart-32-no-syscallbuf 2 1.69332 intr_poll-32 2 3.68626 intr_poll-32-no-syscallbuf 2 3.66997 intr_ppoll-32 2 21.4084 intr_ppoll-32-no-syscallbuf 2 21.2981 intr_pselect-32 2 9.15019 intr_pselect-32-no-syscallbuf 2 9.19517 intr_read_no_restart-32 2 1.28784 intr_read_no_restart-32-no-syscallbuf 2 1.28253 intr_read_restart-32 2 2.30173 intr_read_restart-32-no-syscallbuf 2 2.27716 intr_sleep-32 2 4.18891 intr_sleep-32-no-syscallbuf 2 4.16715 intr_sleep_no_restart-32 2 1.77529 intr_sleep_no_restart-32-no-syscallbuf 2 1.78142 invalid_exec-32 2 0.17303 invalid_exec-32-no-syscallbuf 2 0.156343 invalid_fcntl-32 2 0.17745 invalid_fcntl-32-no-syscallbuf 2 0.148237 invalid_ioctl-32 2 0.167459 invalid_ioctl-32-no-syscallbuf 2 0.153341 io-32 2 0.153516 io-32-no-syscallbuf 2 0.152155 io_uring-32 2 0.156336 io_uring-32-no-syscallbuf 2 0.172072 ioctl-32 2 0.168334 ioctl-32-no-syscallbuf 2 0.174499 ioctl_blk-32 2 0.172997 ioctl_blk-32-no-syscallbuf 2 0.14378 ioctl_fb-32 2 0.15604 ioctl_fb-32-no-syscallbuf 2 0.177303 ioctl_fs-32 2 0.1805 ioctl_fs-32-no-syscallbuf 2 0.182854 ioctl_pty-32 2 0.245727 ioctl_pty-32-no-syscallbuf 2 0.198318 ioctl_sg-32 2 0.171162 ioctl_sg-32-no-syscallbuf 2 0.154041 ioctl_tty-32 2 0.264824 ioctl_tty-32-no-syscallbuf 2 0.225188 ioctl_vt-32 2 0.170598 ioctl_vt-32-no-syscallbuf 2 0.16299 ioprio-32 2 0.169689 ioprio-32-no-syscallbuf 2 0.179517 x86/ioperm-32 2 0.169697 x86/ioperm-32-no-syscallbuf 2 0.156024 x86/iopl-32 2 0.163983 x86/iopl-32-no-syscallbuf 2 0.1679 join_threads-32 2 1.54955 join_threads-32-no-syscallbuf 2 1.7967 joystick-32 2 0.180789 joystick-32-no-syscallbuf 2 0.188287 kcmp-32 2 0.172476 kcmp-32-no-syscallbuf 2 0.173309 keyctl-32 2 0.213803 keyctl-32-no-syscallbuf 2 0.185761 kill_newborn-32 2 0.434392 kill_newborn-32-no-syscallbuf 2 0.352364 kill_ptracee-32 2 0.245936 kill_ptracee-32-no-syscallbuf 2 1.77335 large_hole-32 2 0.19012 large_hole-32-no-syscallbuf 2 0.152114 large_write_deadlock-32 2 0.187997 large_write_deadlock-32-no-syscallbuf 2 0.157017 legacy_ugid-32 2 0.172081 legacy_ugid-32-no-syscallbuf 2 0.154932 x86/lsl-32 2 0.174847 x86/lsl-32-no-syscallbuf 2 0.182991 madvise-32 2 0.171796 madvise-32-no-syscallbuf 2 0.168836 madvise_free-32 2 0.162956 madvise_free-32-no-syscallbuf 2 0.146138 madvise_wipeonfork-32 2 0.298329 madvise_wipeonfork-32-no-syscallbuf 2 0.248712 map_fixed-32 2 0.171006 map_fixed-32-no-syscallbuf 2 0.156162 map_shared_syscall-32 2 0.240547 map_shared_syscall-32-no-syscallbuf 2 0.188137 membarrier-32 2 0.230167 membarrier-32-no-syscallbuf 2 0.262814 memfd_create-32 2 0.159255 memfd_create-32-no-syscallbuf 2 0.159951 memfd_create_shared-32 2 0.168803 memfd_create_shared-32-no-syscallbuf 2 0.169416 memfd_create_shared_huge-32 2 0.166679 memfd_create_shared_huge-32-no-syscallbuf 2 0.168568 mincore-32 2 0.195951 mincore-32-no-syscallbuf 2 0.179282 mknod-32 2 0.1707 mknod-32-no-syscallbuf 2 0.220839 mlock-32 2 0.178006 mlock-32-no-syscallbuf 2 0.169444 mmap_adjacent_to_rr_usage-32 2 0.173778 mmap_adjacent_to_rr_usage-32-no-syscallbuf 2 0.168001 mmap_private-32 2 1.00159 mmap_private-32-no-syscallbuf 2 0.99163 mmap_private_grow_under_map-32 2 0.173808 mmap_private_grow_under_map-32-no-syscallbuf 2 0.165926 mmap_recycle-32 2 0.191217 mmap_recycle-32-no-syscallbuf 2 0.224146 mmap_ro-32 2 0.174387 mmap_ro-32-no-syscallbuf 2 0.168227 mmap_self_maps_shared-32 2 0.235438 mmap_self_maps_shared-32-no-syscallbuf 2 0.255654 mmap_shared-32 2 0.223286 mmap_shared-32-no-syscallbuf 2 0.177803 mmap_shared_dev_zero-32 2 0.162855 mmap_shared_dev_zero-32-no-syscallbuf 2 0.187624 mmap_shared_grow-32 2 0.172259 mmap_shared_grow-32-no-syscallbuf 2 0.167488 mmap_shared_grow_under_map-32 2 0.177782 mmap_shared_grow_under_map-32-no-syscallbuf 2 0.24619 mmap_shared_multiple-32 2 0.165807 mmap_shared_multiple-32-no-syscallbuf 2 0.182459 mmap_shared_subpage-32 2 0.178498 mmap_shared_subpage-32-no-syscallbuf 2 0.208066 mmap_shared_write-32 2 7.86342 mmap_shared_write-32-no-syscallbuf 2 7.60719 mmap_shared_write_fork-32 2 3.38886 mmap_shared_write_fork-32-no-syscallbuf 2 4.51516 mmap_short_file-32 2 0.17335 mmap_short_file-32-no-syscallbuf 2 0.167058 mmap_write_complex-32 2 0.172373 mmap_write_complex-32-no-syscallbuf 2 0.214238 mmap_zero_size_fd-32 2 0.169857 mmap_zero_size_fd-32-no-syscallbuf 2 0.178777 x86/modify_ldt-32 2 0.180909 x86/modify_ldt-32-no-syscallbuf 2 0.158959 mount_ns_exec-32 2 0.242339 mount_ns_exec-32-no-syscallbuf 2 0.231321 mount_ns_exec2-32 2 0.226181 mount_ns_exec2-32-no-syscallbuf 2 0.239407 mprotect-32 2 0.181316 mprotect-32-no-syscallbuf 2 0.175856 mprotect_heterogenous-32 2 0.173654 mprotect_heterogenous-32-no-syscallbuf 2 0.165665 mprotect_none-32 2 0.180685 mprotect_none-32-no-syscallbuf 2 0.165696 mprotect_stack-32 2 0.172227 mprotect_stack-32-no-syscallbuf 2 0.164478 mq-32 2 2.15956 mq-32-no-syscallbuf 2 2.16518 mremap-32 2 2.73883 mremap-32-no-syscallbuf 2 3.268 mremap_after_coalesce-32 2 0.1893 mremap_after_coalesce-32-no-syscallbuf 2 0.158341 mremap_grow-32 2 0.197778 mremap_grow-32-no-syscallbuf 2 0.157984 mremap_grow_shared-32 2 0.182198 mremap_grow_shared-32-no-syscallbuf 2 0.224127 mremap_non_page_size-32 2 0.19472 mremap_non_page_size-32-no-syscallbuf 2 0.978314 mremap_overwrite-32 2 0.196536 mremap_overwrite-32-no-syscallbuf 2 0.169694 mremap_private_grow_under_map-32 2 0.250805 mremap_private_grow_under_map-32-no-syscallbuf 2 0.154396 mremap_shrink-32 2 0.166243 mremap_shrink-32-no-syscallbuf 2 0.181565 msg-32 2 1.68107 msg-32-no-syscallbuf 2 1.68818 msg_trunc-32 2 0.176434 msg_trunc-32-no-syscallbuf 2 0.165524 msync-32 2 1.04094 msync-32-no-syscallbuf 2 1.28593 mtio-32 2 0.189331 mtio-32-no-syscallbuf 2 0.167114 multiple_pending_signals-32 2 0.188975 multiple_pending_signals-32-no-syscallbuf 2 0.192054 multiple_pending_signals_sequential-32 2 0.190957 multiple_pending_signals_sequential-32-no-syscallbuf 2 0.179665 munmap_segv-32 2 0.218063 munmap_segv-32-no-syscallbuf 2 0.173991 munmap_discontinuous-32 2 0.192355 munmap_discontinuous-32-no-syscallbuf 2 0.167422 nanosleep-32 2 1.17149 nanosleep-32-no-syscallbuf 2 1.15108 netfilter-32 2 0.236264 netfilter-32-no-syscallbuf 2 0.256435 netlink_mmap_disable-32 2 0.157522 netlink_mmap_disable-32-no-syscallbuf 2 0.165765 no_mask_timeslice-32 2 0.850216 no_mask_timeslice-32-no-syscallbuf 2 0.821699 nscd-32 2 0.179882 nscd-32-no-syscallbuf 2 0.160008 numa-32 2 0.197031 numa-32-no-syscallbuf 2 0.166399 x86/old_fork-32 2 0.21567 x86/old_fork-32-no-syscallbuf 2 0.18302 orphan_process-32 2 0.206772 orphan_process-32-no-syscallbuf 2 0.184401 packet_mmap_disable-32 2 0.163268 packet_mmap_disable-32-no-syscallbuf 2 0.151608 pause-32 2 1.15926 pause-32-no-syscallbuf 2 1.14328 perf_event-32 2 0.207887 perf_event-32-no-syscallbuf 2 0.182937 personality-32 2 0.184296 personality-32-no-syscallbuf 2 0.223865 pid_ns_kill_child-32 2 0.385645 pid_ns_kill_child-32-no-syscallbuf 2 0.3028 pid_ns_kill_child_threads-32 2 0.254021 pid_ns_kill_child_threads-32-no-syscallbuf 2 0.20647 pid_ns_kill_child_zombie-32 2 0.223784 pid_ns_kill_child_zombie-32-no-syscallbuf 2 0.196851 pid_ns_kill_threads-32 2 0.230732 pid_ns_kill_threads-32-no-syscallbuf 2 0.212946 pid_ns_kill_threads_exit_wait-32 2 0.328763 pid_ns_kill_threads_exit_wait-32-no-syscallbuf 2 0.248139 pid_ns_reap-32 2 0.227847 pid_ns_reap-32-no-syscallbuf 2 0.243935 pid_ns_segv-32 2 0.292908 pid_ns_segv-32-no-syscallbuf 2 0.241546 pid_ns_shutdown-32 2 0.508542 pid_ns_shutdown-32-no-syscallbuf 2 0.309565 pidfd-32 2 0.194201 pidfd-32-no-syscallbuf 2 0.179072 x86/pkeys-32 2 0.16721 x86/pkeys-32-no-syscallbuf 2 0.151803 poll_sig_race-32 2 2.94428 poll_sig_race-32-no-syscallbuf 2 2.89483 ppoll-32 2 5.72427 ppoll-32-no-syscallbuf 2 5.72158 prctl-32 2 0.243426 prctl-32-no-syscallbuf 2 0.168644 prctl_caps-32 2 0.417118 prctl_caps-32-no-syscallbuf 2 0.362039 prctl_deathsig-32 2 0.253595 prctl_deathsig-32-no-syscallbuf 2 0.175611 prctl_name-32 2 0.28042 prctl_name-32-no-syscallbuf 2 0.240042 prctl_short_name-32 2 0.180561 prctl_short_name-32-no-syscallbuf 2 0.153409 prctl_speculation_ctrl-32 2 0.177203 prctl_speculation_ctrl-32-no-syscallbuf 2 0.168493 x86/prctl_tsc-32 2 0.195859 x86/prctl_tsc-32-no-syscallbuf 2 0.180645 privileged_net_ioctl-32 2 0.435754 privileged_net_ioctl-32-no-syscallbuf 2 0.449377 proc_fds-32 2 0.622971 proc_fds-32-no-syscallbuf 2 0.766995 proc_mem-32 2 0.386866 proc_mem-32-no-syscallbuf 2 0.376723 protect_rr_fds-32 2 3.15255 protect_rr_fds-32-no-syscallbuf 2 3.95277 prw-32 2 0.163791 prw-32-no-syscallbuf 2 0.175139 pthread_condvar_locking-32 2 0.195831 pthread_condvar_locking-32-no-syscallbuf 2 0.198542 pthread_mutex_timedlock-32 2 0.172525 pthread_mutex_timedlock-32-no-syscallbuf 2 0.171628 pthread_pi_mutex-32 2 0.194426 pthread_pi_mutex-32-no-syscallbuf 2 0.147285 pthread_rwlocks-32 2 0.188592 pthread_rwlocks-32-no-syscallbuf 2 0.167646 x86/ptrace-32 2 0.225572 x86/ptrace-32-no-syscallbuf 2 0.230391 ptrace_attach_null_status-32 2 0.21989 ptrace_attach_null_status-32-no-syscallbuf 2 0.221423 ptrace_attach_running-32 2 0.322088 ptrace_attach_running-32-no-syscallbuf 2 1.81167 ptrace_attach_sleeping-32 2 0.229337 ptrace_attach_sleeping-32-no-syscallbuf 2 0.259472 ptrace_attach_stopped-32 2 0.254542 ptrace_attach_stopped-32-no-syscallbuf 2 0.254623 ptrace_attach_thread_running-32 2 7.71519 ptrace_attach_thread_running-32-no-syscallbuf 2 9.21576 ptrace_breakpoint-32 2 0.207377 ptrace_breakpoint-32-no-syscallbuf 2 0.205281 ptrace_change_patched_syscall-32 2 0.214222 ptrace_change_patched_syscall-32-no-syscallbuf 2 0.198213 x86/ptrace_debug_regs-32 2 0.180522 x86/ptrace_debug_regs-32-no-syscallbuf 2 0.192852 ptrace_exec-32 2 0.311664 ptrace_exec-32-no-syscallbuf 2 0.265444 x86/ptrace_exec32-32 2 0.169313 x86/ptrace_exec32-32-no-syscallbuf 2 0.209205 ptrace_kill_grandtracee-32 2 0.218563 ptrace_kill_grandtracee-32-no-syscallbuf 2 0.185387 x86/ptrace_tls-32 2 0.264243 x86/ptrace_tls-32-no-syscallbuf 2 0.186428 ptrace_seize-32 2 0.183568 ptrace_seize-32-no-syscallbuf 2 0.159259 ptrace_sigchld_blocked-32 2 0.200425 ptrace_sigchld_blocked-32-no-syscallbuf 2 0.200648 ptrace_signals-32 2 0.304559 ptrace_signals-32-no-syscallbuf 2 0.278018 ptrace_singlestep-32 2 0.639743 ptrace_singlestep-32-no-syscallbuf 2 0.222714 ptrace_syscall-32 2 0.189698 ptrace_syscall-32-no-syscallbuf 2 0.198213 ptrace_syscall_clone_untraced-32 2 0.303777 ptrace_syscall_clone_untraced-32-no-syscallbuf 2 0.238925 x86/ptrace_sysemu-32 2 0.258322 x86/ptrace_sysemu-32-no-syscallbuf 2 0.320688 ptrace_sysemu_syscall-32 2 0.185471 ptrace_sysemu_syscall-32-no-syscallbuf 2 0.220217 ptrace_trace_clone-32 2 0.212671 ptrace_trace_clone-32-no-syscallbuf 2 0.203035 ptrace_trace_exit-32 2 0.183704 ptrace_trace_exit-32-no-syscallbuf 2 0.167785 ptrace_traceme-32 2 0.184022 ptrace_traceme-32-no-syscallbuf 2 0.18187 ptracer_death-32 2 0.248333 ptracer_death-32-no-syscallbuf 2 0.249883 ptracer_death_multithread-32 2 0.388901 ptracer_death_multithread-32-no-syscallbuf 2 0.369612 ptracer_death_multithread_peer-32 2 0.419266 ptracer_death_multithread_peer-32-no-syscallbuf 2 0.395806 quotactl-32 2 0.182772 quotactl-32-no-syscallbuf 2 0.201297 x86/rdtsc-32 2 0.255399 x86/rdtsc-32-no-syscallbuf 2 0.229242 read_nothing-32 2 7.46141 read_nothing-32-no-syscallbuf 2 7.72549 readdir-32 2 0.174061 readdir-32-no-syscallbuf 2 0.167578 read_large-32 2 0.428546 read_large-32-no-syscallbuf 2 1.15199 read_oversize-32 2 0.162112 read_oversize-32-no-syscallbuf 2 0.162055 readlink-32 2 0.186308 readlink-32-no-syscallbuf 2 0.212726 readlinkat-32 2 0.165631 readlinkat-32-no-syscallbuf 2 0.160991 readv-32 2 0.190512 readv-32-no-syscallbuf 2 0.175585 record_replay_subject-32 2 3.25793 record_replay_subject-32-no-syscallbuf 2 3.43256 recvfrom-32 2 0.186827 recvfrom-32-no-syscallbuf 2 0.168322 redzone_integrity-32 2 1.14239 redzone_integrity-32-no-syscallbuf 2 1.13652 rename-32 2 0.178096 rename-32-no-syscallbuf 2 0.168682 rlimit-32 2 0.182187 rlimit-32-no-syscallbuf 2 0.165564 robust_futex-32 2 0.202095 robust_futex-32-no-syscallbuf 2 0.198996 rusage-32 2 0.168172 rusage-32-no-syscallbuf 2 0.206066 samask-32 2 0.337743 samask-32-no-syscallbuf 2 0.301596 save_data_fd-32 2 0.22658 save_data_fd-32-no-syscallbuf 2 0.16278 sched_attr-32 2 0.164539 sched_attr-32-no-syscallbuf 2 0.162087 sched_setaffinity-32 2 0.20929 sched_setaffinity-32-no-syscallbuf 2 0.173467 sched_setparam-32 2 0.173069 sched_setparam-32-no-syscallbuf 2 0.161447 sched_yield-32 2 0.293288 sched_yield-32-no-syscallbuf 2 0.246936 sched_yield_to_lower_priority-32 2 0.177359 sched_yield_to_lower_priority-32-no-syscallbuf 2 0.167451 scm_rights-32 2 0.700044 scm_rights-32-no-syscallbuf 2 0.681345 scratch_read-32 2 0.19306 scratch_read-32-no-syscallbuf 2 0.193434 seccomp-32 2 0.289931 seccomp-32-no-syscallbuf 2 0.188005 seccomp_cloning-32 2 0.174185 seccomp_cloning-32-no-syscallbuf 2 0.158513 seccomp_clone_fail-32 2 0.20893 seccomp_clone_fail-32-no-syscallbuf 2 0.162227 seccomp_desched-32 2 0.213358 seccomp_desched-32-no-syscallbuf 2 0.162138 seccomp_kill_exit-32 2 0.189344 seccomp_kill_exit-32-no-syscallbuf 2 0.155638 seccomp_null-32 2 0.16527 seccomp_null-32-no-syscallbuf 2 0.194281 seccomp_sigsys_args-32 2 0.200211 seccomp_sigsys_args-32-no-syscallbuf 2 0.161084 seccomp_sigsys_sigtrap-32 2 0.210447 seccomp_sigsys_sigtrap-32-no-syscallbuf 2 0.201276 seccomp_sigsys_syscallbuf-32 2 0.209561 seccomp_sigsys_syscallbuf-32-no-syscallbuf 2 0.162703 seccomp_tsync-32 2 0.187879 seccomp_tsync-32-no-syscallbuf 2 0.176498 seccomp_veto_exec-32 2 0.173933 seccomp_veto_exec-32-no-syscallbuf 2 0.203032 self_shebang-32 2 0.24153 self_shebang-32-no-syscallbuf 2 0.231292 self_sigint-32 2 0.159231 self_sigint-32-no-syscallbuf 2 0.153099 sem-32 2 0.250009 sem-32-no-syscallbuf 2 0.256186 send_block-32 2 1.14479 send_block-32-no-syscallbuf 2 2.88824 sendfile-32 2 0.200584 sendfile-32-no-syscallbuf 2 0.165255 set_ptracer-32 2 0.189767 set_ptracer-32-no-syscallbuf 2 0.167316 set_tid_address-32 2 0.239881 set_tid_address-32-no-syscallbuf 2 0.196003 setgid-32 2 0.172545 setgid-32-no-syscallbuf 2 0.205069 setgroups-32 2 0.171244 setgroups-32-no-syscallbuf 2 0.167926 setsid-32 2 0.169398 setsid-32-no-syscallbuf 2 0.152853 setuid-32 2 0.233078 setuid-32-no-syscallbuf 2 0.239728 shared_exec-32 2 0.16772 shared_exec-32-no-syscallbuf 2 0.156276 shared_monitor-32 2 0.183911 shared_monitor-32-no-syscallbuf 2 0.170766 shared_offset-32 2 0.196638 shared_offset-32-no-syscallbuf 2 0.159853 shared_write-32 2 0.181719 shared_write-32-no-syscallbuf 2 0.164443 shm-32 2 0.217793 shm-32-no-syscallbuf 2 0.201548 shm_unmap-32 2 0.1996 shm_unmap-32-no-syscallbuf 2 0.165694 sigaction_old-32 2 0.163398 sigaction_old-32-no-syscallbuf 2 0.148241 sigaltstack-32 2 0.168017 sigaltstack-32-no-syscallbuf 2 0.177744 sigchld_interrupt_signal-32 2 4.17391 sigchld_interrupt_signal-32-no-syscallbuf 2 4.64724 sigcont-32 2 0.182719 sigcont-32-no-syscallbuf 2 0.17846 sighandler_bad_rsp_sigsegv-32 2 0.165357 sighandler_bad_rsp_sigsegv-32-no-syscallbuf 2 0.200135 sighandler_fork-32 2 0.229891 sighandler_fork-32-no-syscallbuf 2 0.201362 sighandler_mask-32 2 0.198156 sighandler_mask-32-no-syscallbuf 2 0.197925 sigill-32 2 0.175937 sigill-32-no-syscallbuf 2 0.151933 signal_deferred-32 2 0.238068 signal_deferred-32-no-syscallbuf 2 0.22673 signal_during_preload_init-32 2 0.205667 signal_during_preload_init-32-no-syscallbuf 2 0.16874 signal_frame-32 2 0.183135 signal_frame-32-no-syscallbuf 2 0.163214 signal_unstoppable-32 2 0.172697 signal_unstoppable-32-no-syscallbuf 2 0.176041 signalfd-32 2 0.163957 signalfd-32-no-syscallbuf 2 0.173083 sigprocmask-32 2 0.635516 sigprocmask-32-no-syscallbuf 2 0.512729 sigprocmask_ensure_delivery-32 2 0.188993 sigprocmask_ensure_delivery-32-no-syscallbuf 2 0.197066 sigprocmask_exec-32 2 0.224889 sigprocmask_exec-32-no-syscallbuf 2 0.203679 sigprocmask_evil-32 2 0.173367 sigprocmask_evil-32-no-syscallbuf 2 0.161906 sigprocmask_in_syscallbuf_sighandler-32 2 1.1356 sigprocmask_in_syscallbuf_sighandler-32-no-syscallbuf 2 1.16151 sigprocmask_rr_sigs-32 2 0.203465 sigprocmask_rr_sigs-32-no-syscallbuf 2 0.154496 sigprocmask_syscallbuf-32 2 0.257067 sigprocmask_syscallbuf-32-no-syscallbuf 2 0.163535 sigqueueinfo-32 2 0.192575 sigqueueinfo-32-no-syscallbuf 2 0.183718 x86/sigreturn-32 2 0.200426 x86/sigreturn-32-no-syscallbuf 2 0.203151 sigreturn_reg-32 2 0.164388 sigreturn_reg-32-no-syscallbuf 2 0.157327 sigreturnmask-32 2 0.360989 sigreturnmask-32-no-syscallbuf 2 0.329691 sigrt-32 2 0.279908 sigrt-32-no-syscallbuf 2 0.273772 sigstop-32 2 0.272004 sigstop-32-no-syscallbuf 2 0.21508 sigstop2-32 2 0.173375 sigstop2-32-no-syscallbuf 2 0.1829 sigsuspend-32 2 0.179968 sigsuspend-32-no-syscallbuf 2 0.181647 sigtrap-32 2 0.194202 sigtrap-32-no-syscallbuf 2 0.198646 simple_threads_stress-32 2 1.55506 simple_threads_stress-32-no-syscallbuf 2 1.91246 sioc-32 2 0.597241 sioc-32-no-syscallbuf 2 0.984665 small_holes-32 2 0.216759 small_holes-32-no-syscallbuf 2 0.190171 sock_names_opts-32 2 0.24013 sock_names_opts-32-no-syscallbuf 2 0.208039 spinlock_priorities-32 2 1.18743 spinlock_priorities-32-no-syscallbuf 2 2.21217 splice-32 2 0.211997 splice-32-no-syscallbuf 2 0.175437 stack_growth_after_syscallbuf-32 2 0.176133 stack_growth_after_syscallbuf-32-no-syscallbuf 2 0.180134 stack_growth_syscallbuf-32 2 0.224124 stack_growth_syscallbuf-32-no-syscallbuf 2 7.43736 stack_growth_with_guard-32 2 0.176287 stack_growth_with_guard-32-no-syscallbuf 2 0.171927 stack_invalid-32 2 0.198923 stack_invalid-32-no-syscallbuf 2 0.158112 stack_overflow-32 2 0.299813 stack_overflow-32-no-syscallbuf 2 0.306432 stack_overflow_altstack-32 2 0.172148 stack_overflow_altstack-32-no-syscallbuf 2 0.180087 stack_overflow_with_guard-32 2 0.277676 stack_overflow_with_guard-32-no-syscallbuf 2 0.259982 statfs-32 2 0.247123 statfs-32-no-syscallbuf 2 0.223844 statx-32 2 0.18761 statx-32-no-syscallbuf 2 0.156038 stdout_child-32 2 0.228882 stdout_child-32-no-syscallbuf 2 0.202993 stdout_cloexec-32 2 0.37312 stdout_cloexec-32-no-syscallbuf 2 0.233512 stdout_dup-32 2 0.158335 stdout_dup-32-no-syscallbuf 2 0.180505 stdout_redirect-32 2 0.233056 stdout_redirect-32-no-syscallbuf 2 0.228063 switch_read-32 2 0.679615 switch_read-32-no-syscallbuf 2 0.705301 symlink-32 2 0.153647 symlink-32-no-syscallbuf 2 0.151247 sync-32 2 0.224612 sync-32-no-syscallbuf 2 0.373438 sync_file_range-32 2 0.176229 sync_file_range-32-no-syscallbuf 2 0.185675 syscall_bp-32 2 0.227873 syscall_bp-32-no-syscallbuf 2 0.152013 syscall_in_writable_mem-32 2 0.16217 syscall_in_writable_mem-32-no-syscallbuf 2 0.162692 syscallbuf_signal_reset-32 2 0.171769 syscallbuf_signal_reset-32-no-syscallbuf 2 0.172879 syscallbuf_signal_blocking-32 2 0.182044 syscallbuf_signal_blocking-32-no-syscallbuf 2 0.174233 syscallbuf_sigstop-32 2 8.3546 syscallbuf_sigstop-32-no-syscallbuf 2 19.726 syscallbuf_timeslice-32 2 0.195219 syscallbuf_timeslice-32-no-syscallbuf 2 3.18591 syscallbuf_timeslice2-32 2 0.235487 syscallbuf_timeslice2-32-no-syscallbuf 2 8.84544 sysconf-32 2 0.216995 sysconf-32-no-syscallbuf 2 0.183905 sysctl-32 2 0.17549 sysctl-32-no-syscallbuf 2 0.182945 sysemu_singlestep-32 2 0.188511 sysemu_singlestep-32-no-syscallbuf 2 0.160338 x86/sysfs-32 2 0.216447 x86/sysfs-32-no-syscallbuf 2 0.258669 sysinfo-32 2 0.169013 sysinfo-32-no-syscallbuf 2 0.159985 tgkill-32 2 0.211264 tgkill-32-no-syscallbuf 2 0.224148 thread_yield-32 2 3.26443 thread_yield-32-no-syscallbuf 2 3.18955 timer-32 2 65.4883 timer-32-no-syscallbuf 2 61.5215 timerfd-32 2 0.259309 timerfd-32-no-syscallbuf 2 0.25765 times-32 2 0.190807 times-32-no-syscallbuf 2 0.165569 truncate_temp-32 2 0.192476 truncate_temp-32-no-syscallbuf 2 0.183864 tun-32 2 0.266826 tun-32-no-syscallbuf 2 0.224363 two_signals_with_mask-32 2 0.193805 two_signals_with_mask-32-no-syscallbuf 2 0.200266 ulimit_low-32 2 0.202149 ulimit_low-32-no-syscallbuf 2 0.185735 uname-32 2 0.164955 uname-32-no-syscallbuf 2 0.178798 unexpected_exit-32 2 0.230114 unexpected_exit-32-no-syscallbuf 2 0.474315 unexpected_exit_execve-32 2 0.310605 unexpected_exit_execve-32-no-syscallbuf 2 0.483972 unexpected_exit_execve_twice-32 2 1.27987 unexpected_exit_execve_twice-32-no-syscallbuf 2 1.60515 unexpected_exit_pid_ns-32 2 0.676718 unexpected_exit_pid_ns-32-no-syscallbuf 2 0.21609 unjoined_thread-32 2 0.171839 unjoined_thread-32-no-syscallbuf 2 0.16768 unshare-32 2 0.402218 unshare-32-no-syscallbuf 2 1.21276 userfaultfd-32 2 0.189497 userfaultfd-32-no-syscallbuf 2 0.161169 utimes-32 2 0.182642 utimes-32-no-syscallbuf 2 0.170065 vdso_parts-32 2 0.156907 vdso_parts-32-no-syscallbuf 2 0.160536 vfork_flush-32 2 0.240043 vfork_flush-32-no-syscallbuf 2 0.182477 vfork_shared-32 2 0.164851 vfork_shared-32-no-syscallbuf 2 0.157664 video_capture-32 2 0.1824 video_capture-32-no-syscallbuf 2 0.151597 vm_readv_writev-32 2 0.178979 vm_readv_writev-32-no-syscallbuf 2 0.187688 vsyscall-32 2 0.210085 vsyscall-32-no-syscallbuf 2 0.158692 vsyscall_timeslice-32 2 0.188542 vsyscall_timeslice-32-no-syscallbuf 2 0.176226 x86/x87env-32 2 0.178634 x86/x87env-32-no-syscallbuf 2 0.156739 wait-32 2 0.207803 wait-32-no-syscallbuf 2 0.181911 wait_sigstop-32 2 0.245489 wait_sigstop-32-no-syscallbuf 2 0.20551 write_race-32 2 2.81023 write_race-32-no-syscallbuf 2 1.62455 writev-32 2 0.216639 writev-32-no-syscallbuf 2 0.158551 xattr-32 2 0.175634 xattr-32-no-syscallbuf 2 0.185673 zero_length_read-32 2 0.20377 zero_length_read-32-no-syscallbuf 2 0.175449 std_random-32 2 0.215086 std_random-32-no-syscallbuf 2 0.213033 unwind_rr_page-32 2 1.15772 unwind_rr_page-32-no-syscallbuf 2 1.19208 abort_nonmain-32 2 0.419818 abort_nonmain-32-no-syscallbuf 2 0.514262 alternate_thread_diversion-32 2 0.765083 alternate_thread_diversion-32-no-syscallbuf 2 0.754914 args-32 2 0.160012 args-32-no-syscallbuf 2 0.164688 async_kill_with_syscallbuf-32 2 2.96233 async_kill_with_syscallbuf-32-no-syscallbuf 2 3.71867 async_kill_with_syscallbuf2-32 2 2.31923 async_kill_with_syscallbuf2-32-no-syscallbuf 2 2.7653 async_kill_with_threads-32 2 2.28526 async_kill_with_threads-32-no-syscallbuf 2 2.25265 async_kill_with_threads_main_running-32 2 2.32524 async_kill_with_threads_main_running-32-no-syscallbuf 2 2.35965 async_kill_with_threads_thread_running-32 2 6.16048 async_kill_with_threads_thread_running-32-no-syscallbuf 2 6.17832 async_segv-32 2 4.58465 async_segv-32-no-syscallbuf 2 4.26574 async_signal_syscalls-32 2 1.24413 async_signal_syscalls-32-no-syscallbuf 2 0.0191033 async_signal_syscalls2-32 2 0.488521 async_signal_syscalls2-32-no-syscallbuf 2 0.0195984 async_signal_syscalls_siginfo-32 2 2.3458 async_signal_syscalls_siginfo-32-no-syscallbuf 2 0.0198688 async_usr1-32 2 4.18131 async_usr1-32-no-syscallbuf 2 4.37104 blacklist-32 2 0.199968 blacklist-32-no-syscallbuf 2 0.169581 block_clone_checkpoint-32 2 0.972619 block_clone_checkpoint-32-no-syscallbuf 2 1.0248 block_clone_interrupted-32 2 6.1161 block_clone_interrupted-32-no-syscallbuf 2 5.00848 block_clone_syscallbuf_overflow-32 2 7.25687 block_clone_syscallbuf_overflow-32-no-syscallbuf 2 7.23739 block_intr_sigchld-32 2 6.66237 block_intr_sigchld-32-no-syscallbuf 2 5.92013 blocked_bad_ip-32 2 1.24533 blocked_bad_ip-32-no-syscallbuf 2 1.06315 blocked_sigill-32 2 0.449607 blocked_sigill-32-no-syscallbuf 2 0.436688 x86/blocked_sigsegv-32 2 0.427648 x86/blocked_sigsegv-32-no-syscallbuf 2 0.440355 breakpoint-32 2 0.781104 breakpoint-32-no-syscallbuf 2 0.756534 breakpoint_conditions-32 2 0.931492 breakpoint_conditions-32-no-syscallbuf 2 0.899478 breakpoint_overlap-32 2 0.907463 breakpoint_overlap-32-no-syscallbuf 2 0.796609 call_function-32 2 0.895301 call_function-32-no-syscallbuf 2 0.835369 call_gettid-32 2 0.809616 call_gettid-32-no-syscallbuf 2 0.797137 checkpoint_dying_threads-32 2 0.870958 checkpoint_dying_threads-32-no-syscallbuf 2 0.886313 checkpoint_mixed_mode-32 2 0.781085 checkpoint_mixed_mode-32-no-syscallbuf 2 0.810389 checksum_sanity-32 2 2.25127 checksum_sanity-32-no-syscallbuf 2 1.54868 check_lost_interrupts-32 2 1.64176 check_lost_interrupts-32-no-syscallbuf 2 0.0340491 clone_interruption-32 2 1.89058 clone_interruption-32-no-syscallbuf 2 1.7642 clone_vfork-32 2 0.228699 clone_vfork-32-no-syscallbuf 2 0.284549 conditional_breakpoint_calls-32 2 0.8973 conditional_breakpoint_calls-32-no-syscallbuf 2 0.853431 conditional_breakpoint_offload-32 2 97.907 conditional_breakpoint_offload-32-no-syscallbuf 2 98.3436 condvar_stress-32 2 20.5983 condvar_stress-32-no-syscallbuf 2 55.8068 cont_race-32 2 18.4938 cont_race-32-no-syscallbuf 2 7.5187 x86/cpuid_singlestep-32 2 0.788428 x86/cpuid_singlestep-32-no-syscallbuf 2 0.775394 crash-32 2 0.375658 crash-32-no-syscallbuf 2 0.373543 crash_in_function-32 2 1.19966 crash_in_function-32-no-syscallbuf 2 1.1729 daemon_read-32 2 0.239872 daemon_read-32-no-syscallbuf 2 0.206172 dconf_mock-32 2 0.190717 dconf_mock-32-no-syscallbuf 2 0.210945 dev_tty-32 2 0.15771 dev_tty-32-no-syscallbuf 2 0.152782 diversion_sigtrap-32 2 1.14728 diversion_sigtrap-32-no-syscallbuf 2 1.14366 diversion_syscall-32 2 0.958771 diversion_syscall-32-no-syscallbuf 2 0.973065 dlopen-32 2 5.03807 dlopen-32-no-syscallbuf 2 6.69 early_error-32 2 0.407332 early_error-32-no-syscallbuf 2 0.493056 elapsed_time-32 2 1.80462 elapsed_time-32-no-syscallbuf 2 1.81788 exclusion_region-32 2 0.18669 exclusion_region-32-no-syscallbuf 2 0.165941 exec_failed-32 2 0.730666 exec_failed-32-no-syscallbuf 2 0.759682 exec_many-32 2 6.45334 exec_many-32-no-syscallbuf 2 4.53874 execve_loop-32 2 6.09311 execve_loop-32-no-syscallbuf 2 4.70132 exit_codes-32 2 0.445052 exit_codes-32-no-syscallbuf 2 0.477203 exit_group-32 2 0.181852 exit_group-32-no-syscallbuf 2 0.176978 exit_race-32 2 1.38796 exit_race-32-no-syscallbuf 2 1.21512 exit_status-32 2 0.116335 exit_status-32-no-syscallbuf 2 0.114189 x86/explicit_checkpoints-32 2 1.30454 x86/explicit_checkpoints-32-no-syscallbuf 2 1.26135 fd_limit-32 2 0.187127 fd_limit-32-no-syscallbuf 2 0.997723 fork_stress-32 2 3.61525 fork_stress-32-no-syscallbuf 2 1.40335 fork_syscalls-32 2 0.168964 fork_syscalls-32-no-syscallbuf 2 0.208122 function_calls-32 2 3.03073 function_calls-32-no-syscallbuf 2 3.51453 x86/fxregs-32 2 0.765217 x86/fxregs-32-no-syscallbuf 2 0.772687 getcwd-32 2 0.169073 getcwd-32-no-syscallbuf 2 0.169593 gdb_bogus_breakpoint-32 2 0.772146 gdb_bogus_breakpoint-32-no-syscallbuf 2 0.760645 goto_event-32 2 3.68828 goto_event-32-no-syscallbuf 2 1.69596 hello-32 2 0.202056 hello-32-no-syscallbuf 2 0.182372 hooks-32 2 1.00194 hooks-32-no-syscallbuf 2 0.0212301 ignored_async_usr1-32 2 2.39089 ignored_async_usr1-32-no-syscallbuf 2 3.59592 ignored_sigsegv-32 2 0.467501 ignored_sigsegv-32-no-syscallbuf 2 0.448767 ignore_nested-32 2 0.328183 ignore_nested-32-no-syscallbuf 2 0.321336 immediate_restart-32 2 0.901743 immediate_restart-32-no-syscallbuf 2 0.944109 x86/int3_ok-32 2 0.170286 x86/int3_ok-32-no-syscallbuf 2 0.175991 interrupt-32 2 4.98112 interrupt-32-no-syscallbuf 2 4.48325 intr_ptrace_decline-32 2 5.18875 intr_ptrace_decline-32-no-syscallbuf 2 4.69277 invalid_interpreter-32 2 1.16413 invalid_interpreter-32-no-syscallbuf 2 1.12651 invalid_jump-32 2 3.69128 invalid_jump-32-no-syscallbuf 2 1.89185 jit_proc_mem-32 2 0.871428 jit_proc_mem-32-no-syscallbuf 2 0.869078 link-32 2 0.190435 link-32-no-syscallbuf 2 0.166527 madvise_dontfork-32 2 0.915018 madvise_dontfork-32-no-syscallbuf 2 0.916182 main_thread_exit-32 2 0.976596 main_thread_exit-32-no-syscallbuf 2 0.988971 many_yields-32 2 2.70468 many_yields-32-no-syscallbuf 2 2.14603 mmap_fd_reuse_checkpoint-32 2 0.941569 mmap_fd_reuse_checkpoint-32-no-syscallbuf 2 0.939913 mmap_replace_most_mappings-32 2 2.68514 mmap_replace_most_mappings-32-no-syscallbuf 2 2.41771 mmap_shared_prot-32 2 0.929817 mmap_shared_prot-32-no-syscallbuf 2 0.92472 mmap_shared_write_exec_race-32 2 3.81915 mmap_shared_write_exec_race-32-no-syscallbuf 2 4.60813 mmap_tmpfs-32 2 0.194743 mmap_tmpfs-32-no-syscallbuf 2 0.18605 mmap_write-32 2 0.0192549 mmap_write-32-no-syscallbuf 2 0.0200511 mmap_write_private-32 2 0.308225 mmap_write_private-32-no-syscallbuf 2 0.324777 morestack_unwind-32 1 0.537327 morestack_unwind-32-no-syscallbuf 1 0.0202514 mprotect_growsdown-32 2 0.339205 mprotect_growsdown-32-no-syscallbuf 2 0.31326 mprotect_syscallbuf_overflow-32 2 0.280952 mprotect_syscallbuf_overflow-32-no-syscallbuf 2 8.061 mutex_pi_stress-32 2 22.3886 mutex_pi_stress-32-no-syscallbuf 2 27.1594 nested_detach_wait-32 2 0.562413 nested_detach_wait-32-no-syscallbuf 2 0.555944 overflow_branch_counter-32 2 2.97918 overflow_branch_counter-32-no-syscallbuf 2 3.1523 patch_page_end-32 2 0.168311 patch_page_end-32-no-syscallbuf 2 0.027147 x86/patch_40_80_f6_81-32 2 0.164649 x86/patch_40_80_f6_81-32-no-syscallbuf 2 0.0250984 perf_event_mmap-32 2 1.20662 perf_event_mmap-32-no-syscallbuf 2 1.20416 priority-32 2 0.188146 priority-32-no-syscallbuf 2 0.187545 ptrace_remote_unmap-32 2 4.96867 ptrace_remote_unmap-32-no-syscallbuf 2 4.48549 read_big_struct-32 2 0.895185 read_big_struct-32-no-syscallbuf 2 0.971367 remove_latest_trace-32 2 1.19413 remove_latest_trace-32-no-syscallbuf 2 1.18728 restart_abnormal_exit-32 2 1.15147 restart_abnormal_exit-32-no-syscallbuf 2 1.11481 reverse_continue_breakpoint-32 2 1.68965 reverse_continue_breakpoint-32-no-syscallbuf 2 1.79149 reverse_continue_multiprocess-32 2 10.6853 reverse_continue_multiprocess-32-no-syscallbuf 2 10.3271 reverse_continue_process_signal-32 2 3.62473 reverse_continue_process_signal-32-no-syscallbuf 2 5.01924 reverse_many_breakpoints-32 2 4.38134 reverse_many_breakpoints-32-no-syscallbuf 2 5.46287 reverse_step_long-32 2 6.24439 reverse_step_long-32-no-syscallbuf 2 9.08036 reverse_step_threads-32 2 2.50329 reverse_step_threads-32-no-syscallbuf 2 2.31751 reverse_step_threads_break-32 2 5.17554 reverse_step_threads_break-32-no-syscallbuf 2 5.72419 rr_ps-32 2 0.436267 rr_ps-32-no-syscallbuf 2 0.374363 rr_ps_ns-32 2 0.234007 rr_ps_ns-32-no-syscallbuf 2 0.240328 rseq-32 2 67.2539 rseq-32-no-syscallbuf 2 83.9439 search-32 2 0.864452 search-32-no-syscallbuf 2 0.817897 seccomp_blocks_rr-32 2 0.445652 seccomp_blocks_rr-32-no-syscallbuf 2 0.191702 seccomp_signals-32 2 2.71263 seccomp_signals-32-no-syscallbuf 2 3.18287 segfault-32 2 0.187947 segfault-32-no-syscallbuf 2 0.167304 shared_map-32 2 0.231787 shared_map-32-no-syscallbuf 2 0.217581 shared_persistent_file-32 2 0.989357 shared_persistent_file-32-no-syscallbuf 2 0.914795 signal_numbers-32 2 1.06353 signal_numbers-32-no-syscallbuf 2 1.02158 sigprocmask_race-32 2 0.183223 sigprocmask_race-32-no-syscallbuf 2 0.0195983 sigprocmask_rr_sigs_nondefault-32 2 0.0950051 sigprocmask_rr_sigs_nondefault-32-no-syscallbuf 2 0.0818044 simple-32 2 0.324804 simple-32-no-syscallbuf 2 0.29707 x86/singlestep_pushf-32 2 0.870134 x86/singlestep_pushf-32-no-syscallbuf 2 0.75588 stack_growth-32 2 0.835045 stack_growth-32-no-syscallbuf 2 0.865223 step_thread-32 2 1.00272 step_thread-32-no-syscallbuf 2 1.03955 strict_priorities-32 2 10.905 strict_priorities-32-no-syscallbuf 2 12.5371 x86/string_instructions-32 2 0.732684 x86/string_instructions-32-no-syscallbuf 2 0.690063 x86/string_instructions_async_signals-32 2 0.243053 x86/string_instructions_async_signals-32-no-syscallbuf 2 70.768027 x86/string_instructions_async_signals_shared-32 2 0.280141 x86/string_instructions_async_signals_shared-32-no-syscallbuf 2 91.424003 x86/string_instructions_multiwatch-32 2 0.885137 x86/string_instructions_multiwatch-32-no-syscallbuf 2 0.815804 x86/string_instructions_replay-32 2 13.1396 x86/string_instructions_replay-32-no-syscallbuf 2 13.083 x86/string_instructions_singlestep_fastforward-32 2 7.99402 x86/string_instructions_singlestep_fastforward-32-no-syscallbuf 2 8.02594 x86/string_instructions_watch-32 2 0.944771 x86/string_instructions_watch-32-no-syscallbuf 2 0.935292 syscallbuf_fd_disabling-32 2 0.305953 syscallbuf_fd_disabling-32-no-syscallbuf 2 0.282688 syscallbuf_signal_blocking_read-32 2 1.23466 syscallbuf_signal_blocking_read-32-no-syscallbuf 2 1.14413 sysconf_onln-32 2 0.344965 sysconf_onln-32-no-syscallbuf 2 0.327284 target_fork-32 2 2.71431 target_fork-32-no-syscallbuf 2 1.43742 target_process-32 2 1.32236 target_process-32-no-syscallbuf 2 1.21721 tcp_sockets-32 2 0.14743 tcp_sockets-32-no-syscallbuf 2 0.151912 term_nonmain-32 2 0.21022 term_nonmain-32-no-syscallbuf 2 0.208668 term_rr-32 2 3.27718 term_rr-32-no-syscallbuf 2 3.26641 term_trace_reset-32 2 0.214047 term_trace_reset-32-no-syscallbuf 2 0.213101 term_trace_syscall-32 2 0.228815 term_trace_syscall-32-no-syscallbuf 2 0.24171 thread_exit_signal-32 2 3.09934 thread_exit_signal-32-no-syscallbuf 2 2.99841 thread_open_race-32 2 33.8294 thread_open_race-32-no-syscallbuf 2 0.0212188 thread_stress-32 2 13.7434 thread_stress-32-no-syscallbuf 2 16.5669 threaded_syscall_spam-32 2 1.71365 threaded_syscall_spam-32-no-syscallbuf 2 0.0223637 threads-32 2 1.20922 threads-32-no-syscallbuf 2 1.14972 tls-32 2 0.891054 tls-32-no-syscallbuf 2 0.863566 ttyname-32 2 0.155298 ttyname-32-no-syscallbuf 2 0.185634 unexpected_stack_growth-32 2 0.960644 unexpected_stack_growth-32-no-syscallbuf 2 0.938926 user_ignore_sig-32 2 0.169817 user_ignore_sig-32-no-syscallbuf 2 0.166966 vdso_clock_gettime_stack-32 2 0.0198637 vdso_clock_gettime_stack-32-no-syscallbuf 2 0.0198518 vdso_gettimeofday_stack-32 2 0.0196665 vdso_gettimeofday_stack-32-no-syscallbuf 2 0.0200317 vdso_time_stack-32 2 0.0221022 vdso_time_stack-32-no-syscallbuf 2 0.0202745 vfork-32 2 0.236849 vfork-32-no-syscallbuf 2 0.305823 vfork_read_clone_stress-32 2 1.58314 vfork_read_clone_stress-32-no-syscallbuf 2 2.62239 vsyscall_reverse_next-32 2 211.0415 vsyscall_reverse_next-32-no-syscallbuf 2 205.237 wait_for_all-32 2 1.14266 wait_for_all-32-no-syscallbuf 2 1.14552 watchpoint-32 2 1.22955 watchpoint-32-no-syscallbuf 2 1.35199 watchpoint_at_sched-32 2 1.13428 watchpoint_at_sched-32-no-syscallbuf 2 1.25188 watchpoint_before_signal-32 2 0.910176 watchpoint_before_signal-32-no-syscallbuf 2 0.854977 watchpoint_no_progress-32 2 3.67209 watchpoint_no_progress-32-no-syscallbuf 2 1.86114 watchpoint_size_change-32 2 0.887745 watchpoint_size_change-32-no-syscallbuf 2 0.902858 watchpoint_syscall-32 2 0.901712 watchpoint_syscall-32-no-syscallbuf 2 0.813837 watchpoint_unaligned-32 2 0.781136 watchpoint_unaligned-32-no-syscallbuf 2 0.800987 async_signal_syscalls_100-32 2 0.197139 async_signal_syscalls_100-32-no-syscallbuf 2 4.47429 async_signal_syscalls_1000-32 2 0.18801 async_signal_syscalls_1000-32-no-syscallbuf 2 3.44946 bad_breakpoint-32 2 7.34282 bad_breakpoint-32-no-syscallbuf 2 8.09281 break_block-32 2 9.83223 break_block-32-no-syscallbuf 2 9.83806 break_clock-32 2 0.946965 break_clock-32-no-syscallbuf 2 1.12394 break_clone-32 2 0.751169 break_clone-32-no-syscallbuf 2 0.820749 break_exec-32 2 0.84265 break_exec-32-no-syscallbuf 2 0.874733 break_int3-32 2 0.780189 break_int3-32-no-syscallbuf 2 0.759644 break_mmap_private-32 2 1.98541 break_mmap_private-32-no-syscallbuf 2 1.89648 break_msg-32 2 2.89203 break_msg-32-no-syscallbuf 2 3.05882 x86/break_rdtsc-32 2 0.848203 x86/break_rdtsc-32-no-syscallbuf 2 0.841489 break_sigreturn-32 2 4.78895 break_sigreturn-32-no-syscallbuf 2 4.88569 break_sync_signal-32 2 0.753919 break_sync_signal-32-no-syscallbuf 2 0.750371 break_thread-32 2 0.912845 break_thread-32-no-syscallbuf 2 1.16875 break_time_slice-32 2 9.33131 break_time_slice-32-no-syscallbuf 2 9.28867 breakpoint_consistent-32 2 0.792691 breakpoint_consistent-32-no-syscallbuf 2 0.775111 call_exit-32 2 0.981659 call_exit-32-no-syscallbuf 2 0.993623 check_patched_pthread-32 2 1.86368 check_patched_pthread-32-no-syscallbuf 2 1.84408 checkpoint_async_signal_syscalls_1000-32 2 2.25777 checkpoint_async_signal_syscalls_1000-32-no-syscallbuf 2 0.0206373 checkpoint_mmap_shared-32 2 19.0184 checkpoint_mmap_shared-32-no-syscallbuf 2 13.8387 checkpoint_prctl_name-32 2 24.3736 checkpoint_prctl_name-32-no-syscallbuf 2 23.9934 checkpoint_simple-32 2 10.1515 checkpoint_simple-32-no-syscallbuf 2 7.59328 checksum_sanity_noclone-32 2 1.41863 checksum_sanity_noclone-32-no-syscallbuf 2 1.55539 comm-32 2 0.617483 comm-32-no-syscallbuf 2 0.60854 cont_signal-32 2 4.82038 cont_signal-32-no-syscallbuf 2 4.83081 x86/cpuid-32 2 1.34973 x86/cpuid-32-no-syscallbuf 2 0.999242 dead_thread_target-32 2 0.989117 dead_thread_target-32-no-syscallbuf 2 0.952645 desched_ticks-32 2 0.872766 desched_ticks-32-no-syscallbuf 2 0.0242881 deliver_async_signal_during_syscalls-32 2 0.708833 deliver_async_signal_during_syscalls-32-no-syscallbuf 2 0.0201246 env_newline-32 2 0.168631 env_newline-32-no-syscallbuf 2 0.156516 exec_deleted-32 2 0.170722 exec_deleted-32-no-syscallbuf 2 0.168588 exec_stop-32 2 0.897157 exec_stop-32-no-syscallbuf 2 0.909516 execp-32 2 0.180209 execp-32-no-syscallbuf 2 0.161059 explicit_checkpoint_clone-32 2 0.995701 explicit_checkpoint_clone-32-no-syscallbuf 2 0.952514 file_name_newline-32 2 0.187082 file_name_newline-32-no-syscallbuf 2 0.159259 final_sigkill-32 2 0.985235 final_sigkill-32-no-syscallbuf 2 0.96928 first_instruction-32 2 0.741433 first_instruction-32-no-syscallbuf 2 0.718259 fork_exec_info_thr-32 2 3.71603 fork_exec_info_thr-32-no-syscallbuf 2 3.23103 get_thread_list-32 2 0.995385 get_thread_list-32-no-syscallbuf 2 0.946593 hardlink_mmapped_files-32 2 2.21809 hardlink_mmapped_files-32-no-syscallbuf 2 2.19885 hbreak-32 2 0.87166 hbreak-32-no-syscallbuf 2 0.89237 mprotect_step-32 2 1.05356 mprotect_step-32-no-syscallbuf 2 1.12296 nested_detach-32 2 0.48179 nested_detach-32-no-syscallbuf 2 0.407968 nested_detach_kill-32 2 0.620392 nested_detach_kill-32-no-syscallbuf 2 0.522451 nested_release-32 2 0.310225 nested_release-32-no-syscallbuf 2 0.303973 parent_no_break_child_bkpt-32 2 1.07206 parent_no_break_child_bkpt-32-no-syscallbuf 2 1.04337 parent_no_stop_child_crash-32 2 1.06683 parent_no_stop_child_crash-32-no-syscallbuf 2 0.994023 post_exec_fpu_regs-32 2 0.58547 post_exec_fpu_regs-32-no-syscallbuf 2 0.584769 proc_maps-32 2 0.743487 proc_maps-32-no-syscallbuf 2 0.81206 read_bad_mem-32 2 0.741059 read_bad_mem-32-no-syscallbuf 2 0.746854 record_replay-32 2 142.449 record_replay-32-no-syscallbuf 2 142.207 remove_watchpoint-32 2 0.887807 remove_watchpoint-32-no-syscallbuf 2 0.848615 replay_overlarge_event_number-32 2 0.151729 replay_overlarge_event_number-32-no-syscallbuf 2 0.145948 replay_serve_files-32 2 3.53657 replay_serve_files-32-no-syscallbuf 2 3.72855 restart_invalid_checkpoint-32 2 0.979497 restart_invalid_checkpoint-32-no-syscallbuf 2 0.936837 restart_unstable-32 2 0.976465 restart_unstable-32-no-syscallbuf 2 0.946823 restart_diversion-32 2 1.05779 restart_diversion-32-no-syscallbuf 2 1.06658 reverse_alarm-32 2 5.55884 reverse_alarm-32-no-syscallbuf 2 4.26309 reverse_continue_exec_subprocess-32 2 3.53948 reverse_continue_exec_subprocess-32-no-syscallbuf 2 3.5475 reverse_continue_fork_subprocess-32 2 1.11541 reverse_continue_fork_subprocess-32-no-syscallbuf 2 1.12745 reverse_continue_int3-32 2 4.08885 reverse_continue_int3-32-no-syscallbuf 2 3.79756 reverse_continue_start-32 2 1.74623 reverse_continue_start-32-no-syscallbuf 2 1.84734 reverse_finish-32 2 2.71542 reverse_finish-32-no-syscallbuf 2 2.34464 reverse_step_breakpoint-32 2 4.20755 reverse_step_breakpoint-32-no-syscallbuf 2 4.30403 reverse_step_signal-32 2 4.36456 reverse_step_signal-32-no-syscallbuf 2 4.44296 reverse_step_threads2-32 2 4.43858 reverse_step_threads2-32-no-syscallbuf 2 4.3075 reverse_watchpoint-32 2 3.28588 reverse_watchpoint-32-no-syscallbuf 2 3.61425 reverse_watchpoint_syscall-32 2 7.9328 reverse_watchpoint_syscall-32-no-syscallbuf 2 7.61239 run_end-32 2 1.0957 run_end-32-no-syscallbuf 2 1.08614 run_in_function-32 2 0.9406 run_in_function-32-no-syscallbuf 2 0.975155 sanity-32 2 0.730954 sanity-32-no-syscallbuf 2 0.718052 seekticks-32 2 1.89101 seekticks-32-no-syscallbuf 2 1.71722 shm_checkpoint-32 2 0.979113 shm_checkpoint-32-no-syscallbuf 2 0.95366 siginfo-32 2 0.730536 siginfo-32-no-syscallbuf 2 0.768976 x86/sigreturn_checksum-32 2 0.736871 x86/sigreturn_checksum-32-no-syscallbuf 2 0.69224 signal_stop-32 2 0.766017 signal_stop-32-no-syscallbuf 2 0.824693 signal_checkpoint-32 2 0.958273 signal_checkpoint-32-no-syscallbuf 2 1.01121 simple_script-32 2 0.177902 simple_script-32-no-syscallbuf 2 0.191716 simple_script_debug-32 2 0.888283 simple_script_debug-32-no-syscallbuf 2 0.894607 simple_winch-32 2 2.32163 simple_winch-32-no-syscallbuf 2 2.12602 stack_overflow_debug-32 2 6.3049 stack_overflow_debug-32-no-syscallbuf 2 6.60805 step1-32 2 0.841349 step1-32-no-syscallbuf 2 0.766025 x86/step_rdtsc-32 2 0.883087 x86/step_rdtsc-32-no-syscallbuf 2 0.927036 step_signal-32 2 0.879484 step_signal-32-no-syscallbuf 2 0.916336 x86/string_instructions_break-32 2 6.07348 x86/string_instructions_break-32-no-syscallbuf 2 7.3036 x86/string_instructions_replay_quirk-32 2 6.26888 x86/string_instructions_replay_quirk-32-no-syscallbuf 2 6.62562 subprocess_exit_ends_session-32 2 1.00101 subprocess_exit_ends_session-32-no-syscallbuf 2 0.979481 switch_processes-32 2 5.00449 switch_processes-32-no-syscallbuf 2 3.96395 syscallbuf_timeslice_250-32 2 0.2163 syscallbuf_timeslice_250-32-no-syscallbuf 2 3.28733 trace_version-32 2 2.55284 trace_version-32-no-syscallbuf 2 2.55163 term_trace_cpu-32 2 0.541342 term_trace_cpu-32-no-syscallbuf 2 0.621864 trace_events-32 2 0.131738 trace_events-32-no-syscallbuf 2 0.127949 tty-32 2 0.166929 tty-32-no-syscallbuf 2 0.147314 unmap_vdso-32 2 0.336841 unmap_vdso-32-no-syscallbuf 2 0.340823 unwind_on_signal-32 2 0.803562 unwind_on_signal-32-no-syscallbuf 2 0.792202 vfork_exec-32 2 0.942089 vfork_exec-32-no-syscallbuf 2 0.957233 vfork_break_parent-32 2 0.884067 vfork_break_parent-32-no-syscallbuf 2 0.873047 vsyscall_singlestep-32 2 0.0226703 vsyscall_singlestep-32-no-syscallbuf 2 0.0210348 watch_code-32 2 1.8636 watch_code-32-no-syscallbuf 2 1.79997 watchpoint_cond-32 2 0.788546 watchpoint_cond-32-no-syscallbuf 2 0.766512 when-32 2 0.761362 when-32-no-syscallbuf 2 0.787807 close_range 1 0.187604 close_range-no-syscallbuf 1 0.169697 dlchecksum 1 0.240732 dlchecksum-no-syscallbuf 1 0.206452 epoll_many 1 0.216123 epoll_many-no-syscallbuf 1 0.240203 epoll_pwait2 1 1.30869 epoll_pwait2-no-syscallbuf 1 1.22883 futex_invalid_op 1 0.17379 futex_invalid_op-no-syscallbuf 1 0.213894 futex_restart_race 1 0.263257 futex_restart_race-no-syscallbuf 1 0.22909 futex_restart_clone 1 0.205975 futex_restart_clone-no-syscallbuf 1 0.196681 ioctl_br 1 0.40846 ioctl_br-no-syscallbuf 1 0.591008 mremap_dontunmap 1 0.222197 mremap_dontunmap-no-syscallbuf 1 0.192309 ppoll_deliver 1 0.165071 ppoll_deliver-no-syscallbuf 1 0.158939 x86/rdtsc_flags 1 0.161648 x86/rdtsc_flags-no-syscallbuf 1 0.149144 x86/signal_xmm_state 1 6.5201 x86/signal_xmm_state-no-syscallbuf 1 6.15106 sigpwr 1 0.145139 sigpwr-no-syscallbuf 1 0.161406 sock_name_null 1 0.173122 sock_name_null-no-syscallbuf 1 0.134844 sysconf_conf 1 0.155498 sysconf_conf-no-syscallbuf 1 0.13819 chaos_oom 1 0.719051 chaos_oom-no-syscallbuf 1 0.486065 x86/diversion_rdtsc 1 0.97806 x86/diversion_rdtsc-no-syscallbuf 1 1.08858 gdb_qpasssignals 1 110.1108 gdb_qpasssignals-no-syscallbuf 1 97.4456 madvise_fracture_flags 1 57.982 madvise_fracture_flags-no-syscallbuf 1 88.7955 x86/morestack_unwind 1 1.05934 x86/morestack_unwind-no-syscallbuf 1 0.033799 nested_detach_kill_stuck 1 5.52231 nested_detach_kill_stuck-no-syscallbuf 1 5.47973 x86/rdtsc_loop 1 7.49252 x86/rdtsc_loop-no-syscallbuf 1 0.0176659 x86/rdtsc_loop2 1 3.23895 x86/rdtsc_loop2-no-syscallbuf 1 0.0587636 rseq_syscallbuf 1 1.68397 rseq_syscallbuf-no-syscallbuf 1 0.0232907 seccomp_open 1 0.256711 seccomp_open-no-syscallbuf 1 0.224981 x86/syscallbuf_branch_check 1 0.263251 x86/syscallbuf_branch_check-no-syscallbuf 1 0.0358348 x86/syscallbuf_rdtsc_page 1 0.224774 x86/syscallbuf_rdtsc_page-no-syscallbuf 1 0.0200236 term_rr_ok 1 0.239889 term_rr_ok-no-syscallbuf 1 0.411474 copy_all 1 1.14145 copy_all-no-syscallbuf 1 0.99862 tick0 1 0.810798 tick0-no-syscallbuf 1 0.781358 tick0_less 1 0.774784 tick0_less-no-syscallbuf 1 0.708288 watchpoint_unaligned2 1 1.04218 watchpoint_unaligned2-no-syscallbuf 1 1.04466 close_range-32 1 0.145352 close_range-32-no-syscallbuf 1 0.189966 dlchecksum-32 1 0.25934 dlchecksum-32-no-syscallbuf 1 0.256462 epoll_many-32 1 0.152067 epoll_many-32-no-syscallbuf 1 0.201357 epoll_pwait2-32 1 1.2001 epoll_pwait2-32-no-syscallbuf 1 1.14867 futex_invalid_op-32 1 0.20886 futex_invalid_op-32-no-syscallbuf 1 0.211214 futex_restart_race-32 1 0.25145 futex_restart_race-32-no-syscallbuf 1 0.209322 futex_restart_clone-32 1 0.186733 futex_restart_clone-32-no-syscallbuf 1 0.187653 ioctl_br-32 1 0.192089 ioctl_br-32-no-syscallbuf 1 0.168439 mremap_dontunmap-32 1 0.204069 mremap_dontunmap-32-no-syscallbuf 1 0.121815 ppoll_deliver-32 1 0.149065 ppoll_deliver-32-no-syscallbuf 1 0.16973 x86/rdtsc_flags-32 1 0.158374 x86/rdtsc_flags-32-no-syscallbuf 1 0.161395 x86/signal_xmm_state-32 1 5.82273 x86/signal_xmm_state-32-no-syscallbuf 1 5.93857 sigpwr-32 1 0.135917 sigpwr-32-no-syscallbuf 1 0.12206 sock_name_null-32 1 0.216395 sock_name_null-32-no-syscallbuf 1 0.18803 sysconf_conf-32 1 0.236007 sysconf_conf-32-no-syscallbuf 1 0.170377 chaos_oom-32 1 0.258801 chaos_oom-32-no-syscallbuf 1 0.218391 x86/diversion_rdtsc-32 1 1.00397 x86/diversion_rdtsc-32-no-syscallbuf 1 1.03605 gdb_qpasssignals-32 1 75.7295 gdb_qpasssignals-32-no-syscallbuf 1 95.712 madvise_fracture_flags-32 1 58.9242 madvise_fracture_flags-32-no-syscallbuf 1 86.3502 x86/morestack_unwind-32 1 1.2353 x86/morestack_unwind-32-no-syscallbuf 1 0.134301 nested_detach_kill_stuck-32 1 5.67408 nested_detach_kill_stuck-32-no-syscallbuf 1 5.53669 x86/rdtsc_loop-32 1 0.189036 x86/rdtsc_loop-32-no-syscallbuf 1 0.0190103 x86/rdtsc_loop2-32 1 0.119683 x86/rdtsc_loop2-32-no-syscallbuf 1 0.0781239 rseq_syscallbuf-32 1 1.66755 rseq_syscallbuf-32-no-syscallbuf 1 0.0220513 seccomp_open-32 1 0.240722 seccomp_open-32-no-syscallbuf 1 0.183111 x86/syscallbuf_branch_check-32 1 0.320975 x86/syscallbuf_branch_check-32-no-syscallbuf 1 0.0538966 x86/syscallbuf_rdtsc_page-32 1 0.187748 x86/syscallbuf_rdtsc_page-32-no-syscallbuf 1 0.0202657 term_rr_ok-32 1 0.251002 term_rr_ok-32-no-syscallbuf 1 0.222906 copy_all-32 1 1.00337 copy_all-32-no-syscallbuf 1 0.934465 tick0-32 1 0.61251 tick0-32-no-syscallbuf 1 0.634147 tick0_less-32 1 0.605018 tick0_less-32-no-syscallbuf 1 0.652338 watchpoint_unaligned2-32 1 0.860627 watchpoint_unaligned2-32-no-syscallbuf 1 0.854681 --- rr-5.7.0/.buildkite/Manifest.toml000066400000000000000000000072661450675474200166740ustar00rootroot00000000000000# This file is machine-generated - editing it directly is not advised julia_version = "1.7.2" manifest_format = "2.0" [[deps.ArgTools]] uuid = "0dad84c5-d112-42e6-8d28-ef12dabb789f" [[deps.Artifacts]] uuid = "56f22d72-fd6d-98f1-02f0-08ddc0907c33" [[deps.Base64]] uuid = "2a0f44e3-6c83-55bd-87e4-b1978d98bd5f" [[deps.Dates]] deps = ["Printf"] uuid = "ade2ca70-3891-5945-98fb-dc099432e06a" [[deps.Downloads]] deps = ["ArgTools", "LibCURL", "NetworkOptions"] uuid = "f43a241f-c20a-4ad4-852c-f6b1247861c6" [[deps.InteractiveUtils]] deps = ["Markdown"] uuid = "b77e0a4c-d291-57a0-90e8-8db25a27a240" [[deps.JLLWrappers]] deps = ["Preferences"] git-tree-sha1 = "abc9885a7ca2052a736a600f7fa66209f96506e1" uuid = "692b3bcd-3c85-4b1f-b108-f13ce0eb3210" version = "1.4.1" [[deps.LibCURL]] deps = ["LibCURL_jll", "MozillaCACerts_jll"] uuid = "b27032c2-a3e7-50c8-80cd-2d36dbcbfd21" [[deps.LibCURL_jll]] deps = ["Artifacts", "LibSSH2_jll", "Libdl", "MbedTLS_jll", "Zlib_jll", "nghttp2_jll"] uuid = "deac9b47-8bc7-5906-a0fe-35ac56dc84c0" [[deps.LibGit2]] deps = ["Base64", "NetworkOptions", "Printf", "SHA"] uuid = "76f85450-5226-5b5a-8eaa-529ad045b433" [[deps.LibSSH2_jll]] deps = ["Artifacts", "Libdl", "MbedTLS_jll"] uuid = "29816b5a-b9ab-546f-933c-edad1886dfa8" [[deps.Libdl]] uuid = "8f399da3-3557-5675-b5ff-fb832c97cbdb" [[deps.Libiconv_jll]] deps = ["Artifacts", "JLLWrappers", "Libdl", "Pkg"] git-tree-sha1 = "42b62845d70a619f063a7da093d995ec8e15e778" uuid = "94ce4f54-9a6c-5748-9c1c-f9c7231a4531" version = "1.16.1+1" [[deps.Logging]] uuid = "56ddb016-857b-54e1-b83d-db4d58db5568" [[deps.Markdown]] deps = ["Base64"] uuid = "d6f4376e-aef5-505a-96c1-9c027394607a" [[deps.MbedTLS_jll]] deps = ["Artifacts", "Libdl"] uuid = "c8ffd9c3-330d-5841-b78e-0817d7145fa1" [[deps.MozillaCACerts_jll]] uuid = "14a3606d-f60d-562e-9121-12d972cd8159" [[deps.NetworkOptions]] uuid = "ca575930-c2e3-43a9-ace4-1e988b2c1908" [[deps.Pkg]] deps = ["Artifacts", "Dates", "Downloads", "LibGit2", "Libdl", "Logging", "Markdown", "Printf", "REPL", "Random", "SHA", "Serialization", "TOML", "Tar", "UUIDs", "p7zip_jll"] uuid = "44cfe95a-1eb2-52ea-b672-e2afdf69b78f" [[deps.Preferences]] deps = ["TOML"] git-tree-sha1 = "47e5f437cc0e7ef2ce8406ce1e7e24d44915f88d" uuid = "21216c6a-2e73-6563-6e65-726566657250" version = "1.3.0" [[deps.Printf]] deps = ["Unicode"] uuid = "de0858da-6303-5e67-8744-51eddeeeb8d7" [[deps.REPL]] deps = ["InteractiveUtils", "Markdown", "Sockets", "Unicode"] uuid = "3fa0cd96-eef1-5676-8a61-b3b8758bbffb" [[deps.Random]] deps = ["SHA", "Serialization"] uuid = "9a3f8284-a2c9-5f02-9a11-845980a1fd5c" [[deps.SHA]] uuid = "ea8e919c-243c-51af-8825-aaa63cd721ce" [[deps.Serialization]] uuid = "9e88b42a-f829-5b0c-bbe9-9e923198166b" [[deps.Sockets]] uuid = "6462fe0b-24de-5631-8697-dd941f90decc" [[deps.StringEncodings]] deps = ["Libiconv_jll"] git-tree-sha1 = "50ccd5ddb00d19392577902f0079267a72c5ab04" uuid = "69024149-9ee7-55f6-a4c4-859efe599b68" version = "0.3.5" [[deps.TOML]] deps = ["Dates"] uuid = "fa267f1f-6049-4f14-aa54-33bafae1ed76" [[deps.Tar]] deps = ["ArgTools", "SHA"] uuid = "a4e569a6-e804-4fa4-b0f3-eef7a1d5b13e" [[deps.UUIDs]] deps = ["Random", "SHA"] uuid = "cf7118a7-6976-5b1a-9a39-7adc72f591a4" [[deps.Unicode]] uuid = "4ec0a83e-493e-50e2-b9ac-8f72acf5a8f5" [[deps.YAML]] deps = ["Base64", "Dates", "Printf", "StringEncodings"] git-tree-sha1 = "3c6e8b9f5cdaaa21340f841653942e1a6b6561e5" uuid = "ddb6d928-2868-570f-bddf-ab3f9cf99eb6" version = "0.4.7" [[deps.Zlib_jll]] deps = ["Libdl"] uuid = "83775a58-1f1d-513f-b197-d71354ab007a" [[deps.nghttp2_jll]] deps = ["Artifacts", "Libdl"] uuid = "8e850ede-7688-5339-a07c-302acd2aaf8d" [[deps.p7zip_jll]] deps = ["Artifacts", "Libdl"] uuid = "3f19e933-33d8-53b3-aaab-bd5110c3b7a0" rr-5.7.0/.buildkite/Project.toml000066400000000000000000000000651450675474200165220ustar00rootroot00000000000000[deps] YAML = "ddb6d928-2868-570f-bddf-ab3f9cf99eb6" rr-5.7.0/.buildkite/capture_tmpdir.jl000066400000000000000000000111511450675474200175660ustar00rootroot00000000000000import Dates import Pkg import Tar function my_exit(process::Base.Process) wait(process) @info( "", process.exitcode, process.termsignal, ) # Pass the exit code back up if process.termsignal != 0 ccall(:raise, Cvoid, (Cint,), process.termsignal) # If for some reason the signal did not cause an exit, we'll exit manually. # We need to make sure that we exit with a non-zero exit code. if process.exitcode != 0 exit(process.exitcode) else exit(1) end end exit(process.exitcode) end function get_from_env(name::AbstractString) value = ENV[name] result = convert(String, strip(value))::String return result end cleanup_string(str::AbstractString) = replace(str, r"[^A-Za-z0-9_]" => "_") if Base.VERSION < v"1.6" throw(ErrorException("The `$(basename(@__FILE__))` script requires Julia 1.6 or greater")) end if length(ARGS) < 1 throw(ErrorException("Usage: julia $(basename(@__FILE__)) [command...]")) end const build_number = get_from_env("BUILDKITE_BUILD_NUMBER") |> cleanup_string const commit_full = get_from_env("BUILDKITE_COMMIT") |> cleanup_string const job_name = get_from_env("BUILDKITE_STEP_KEY") |> cleanup_string const buildkite_timeout_minutes_string = get_from_env("BUILDKITE_TIMEOUT") const commit_short = first(commit_full, 10) const buildkite_timeout_minutes = parse(Int, buildkite_timeout_minutes_string)::Int const cleanup_minutes = 15 const ctest_timeout_minutes = buildkite_timeout_minutes - cleanup_minutes if ctest_timeout_minutes < 1 msg = "ctest_timeout_minutes must be strictly positive" @error( msg, ctest_timeout_minutes, buildkite_timeout_minutes, cleanup_minutes, ) throw(ErrorException(msg)) end @info( "", build_number, job_name, commit_full, commit_short, ctest_timeout_minutes, buildkite_timeout_minutes, cleanup_minutes, ) const my_archives_dir = joinpath(pwd(), "my_archives_dir") const my_temp_parent_dir = joinpath(pwd(), "my_temp_parent_dir") mkpath(my_archives_dir) mkpath(my_temp_parent_dir) const TMPDIR = mktempdir(my_temp_parent_dir) proc = nothing mktempdir(my_temp_parent_dir) do dir Pkg.activate(dir) Pkg.add("Zstd_jll") zstd_jll = Base.require(Base.PkgId(Base.UUID("3161d3a3-bdf6-5164-811a-617609db77b4"), "Zstd_jll")) # zstdmt(func) = Base.invokelatest(zstd_jll.zstdmt, func; adjust_LIBPATH=false) zstdmt(func) = Base.invokelatest(zstd_jll.zstdmt, func) new_env = copy(ENV) new_env["TMPDIR"] = TMPDIR command = setenv(`$ARGS`, new_env) global proc = run(command, (stdin, stdout, stderr); wait = false) # Start asynchronous timer that will kill `ctest` @async begin sleep(ctest_timeout_minutes * 60) # If we've exceeded the timeout and `ctest` is still running, kill it if isopen(proc) @error( string( "Process timed out ", "(with a timeout of $(ctest_timeout_minutes) minutes). ", "Killing with SIGTERM.", ) ) kill(proc, Base.SIGTERM) end end # Wait for `ctest` to finish, either through naturally finishing its run, or `SIGTERM` wait(proc) if proc.termsignal != 0 @info "Command signalled $(proc.termsignal)" else @info "Command returned $(proc.exitcode)" end date_str = Dates.format(Dates.now(), Dates.dateformat"yyyy_mm_dd_HH_MM_SS") artifact_specifications = [ ("TMPDIR", TMPDIR), ] for (artifact_name, artifact_input_dir) in artifact_specifications dst_file_name = string( artifact_name, "--build_$(build_number)", "--$(job_name)", "--commit_$(commit_short)", "--$(date_str)", ".tar.zst", ) dst_full_path = joinpath(my_archives_dir, dst_file_name) run(`find . -type p`) # list the named pipes before we delete them run(`find . -type p -delete`) run(`find . -type s`) # list the sockets before we delete them run(`find . -type s -delete`) zstdmt() do zstdp tarproc = open(`$(zstdp) -o $(dst_full_path)`, "w") Tar.create(artifact_input_dir, tarproc) close(tarproc.in) end buildkite_upload_cmd = `buildkite-agent artifact upload $(dst_file_name)` if !success(proc) run(setenv(buildkite_upload_cmd; dir = my_archives_dir)) end end end my_exit(proc) rr-5.7.0/.buildkite/lib/000077500000000000000000000000001450675474200147645ustar00rootroot00000000000000rr-5.7.0/.buildkite/lib/common.jl000066400000000000000000000003561450675474200166070ustar00rootroot00000000000000include(joinpath(@__DIR__, "types.jl")) function get_yaml_path(platform::Platform) lib_dir = @__DIR__ buildkite_dir = dirname(lib_dir) yaml_path = joinpath(buildkite_dir, "test-$(platform.arch).yml") return yaml_path end rr-5.7.0/.buildkite/lib/generate.jl000066400000000000000000000062251450675474200171120ustar00rootroot00000000000000import YAML include(joinpath(@__DIR__, "common.jl")) function generate(platform::Platform) commands = """ echo "--- Print kernel information" uname -a echo "--- Print CPU information" # These machines have multiple cores. However, it should be sufficient to # just print the information for one of the cores. sed -n '1,/^\$\$/p' /proc/cpuinfo if [[ "$(platform.arch)" == "aarch64" ]]; then echo "--- Patch glibc host environment" curl -LO https://github.com/JuliaBinaryWrappers/DebianGlibc_jll.jl/releases/download/DebianGlibc-v2.33.0%2B1/DebianGlibc.v2.33.0.aarch64-linux-gnu.tar.gz tar -C / -xf DebianGlibc.v2.33.0.aarch64-linux-gnu.tar.gz fi echo "--- Generate build environment" cmake --version rm -rf obj mkdir obj cd obj cmake .. echo "--- Build" make --output-sync -j\$\${JULIA_CPU_THREADS:?} echo "--- Test" mkdir -p Testing/Temporary mv ../.buildkite/CTestCostData.txt Testing/Temporary if bin/rr record bin/simple; then julia ../.buildkite/capture_tmpdir.jl ctest --output-on-failure -j\$\$(expr \$\${JULIA_CPU_THREADS:?} - 2) else echo -n -e "rr seems not able to run, skipping running test suite.\nhostname: " hostname exit 1 fi """ job_label = "Test $(platform.arch)" job_key = "test-$(platform.arch)" yaml = Dict( "steps" => [ Dict( "label" => job_label, "key" => job_key, "timeout_in_minutes" => 45, "agents" => Dict( "sandbox_capable" => "true", "queue" => "juliaecosystem", "arch" => "$(platform.arch)", "os" => "linux", ), "commands" => commands, "plugins" => [ Dict( "JuliaCI/julia#v1" => Dict( "persist_depot_dirs" => "packages,artifacts,compiled", "version" => "1.7", ), ), Dict( "staticfloat/sandbox#v1" => Dict( "rootfs_treehash" => "$(platform.rootfs_treehash)", "verbose" => true, "rootfs_url" => "https://github.com/JuliaCI/rootfs-images/releases/download/$(platform.rootfs_tag)/rr.$(platform.arch).tar.gz", "workspaces" => ["/cache:/cache"], ), ), ], "soft_fail" => "$(platform.allow_fail)", "retry" => Dict("manual" => Dict("permit_on_passed" => true)) ), ], ) if platform.commit_status let notify = [ Dict( "github_commit_status" => Dict( "context" => job_key, ), ), ] only(yaml["steps"])["notify"] = notify end end yaml_path = get_yaml_path(platform) rm(yaml_path; force = true) YAML.write_file(yaml_path, yaml) end generate.(platforms) rr-5.7.0/.buildkite/lib/launch.jl000066400000000000000000000003471450675474200165710ustar00rootroot00000000000000include(joinpath(@__DIR__, "common.jl")) function launch(platform::Platform) yaml_path = get_yaml_path(platform) cmd = `buildkite-agent pipeline upload $(yaml_path)` run(cmd) return nothing end launch.(platforms) rr-5.7.0/.buildkite/lib/types.jl000066400000000000000000000017111450675474200164570ustar00rootroot00000000000000Base.@kwdef struct Platform arch::String rootfs_tag::String rootfs_treehash::String allow_fail::Bool commit_status::Bool end struct Platforms ps::Vector{Platform} end Base.length(platforms::Platforms) = Base.length(platforms.ps) Base.iterate(platforms::Platforms) = Base.iterate(platforms.ps) Base.iterate(platforms::Platforms, state) = Base.iterate(platforms.ps, state) const platforms = Platforms( [ Platform(; arch = "x86_64", rootfs_tag = "v5.22", rootfs_treehash = "1cd67e278881dcfeed695282256b26fad603e15d", allow_fail = false, commit_status = true, ), Platform(; arch = "aarch64", rootfs_tag = "v5.22", rootfs_treehash = "7a63218e46996b36aa108b55746a3d94a3e312c1", allow_fail = false, commit_status = true, ), ] ) rr-5.7.0/.buildkite/pipeline.yml000066400000000000000000000013201450675474200165420ustar00rootroot00000000000000steps: - label: "Launch jobs" plugins: - JuliaCI/julia#v1: persist_depot_dirs: packages,artifacts,compiled version: '1.7' timeout_in_minutes: 15 agents: queue: "juliaecosystem" os: "linux" commands: | echo "--- Instantiate the environment" julia --project=.buildkite -e 'import Pkg; Pkg.instantiate()' echo "--- Precompile the environment" julia --project=.buildkite -e 'import Pkg; Pkg.precompile()' echo "--- Generate the Buildkite pipeline YAML files" julia --project=.buildkite .buildkite/lib/generate.jl echo "--- Upload the Buildkite pipeline YAML files" julia --project=.buildkite .buildkite/lib/launch.jl rr-5.7.0/.clang-format000066400000000000000000000025561450675474200145470ustar00rootroot00000000000000# BasedOnStyle: Mozilla AccessModifierOffset: -2 ConstructorInitializerIndentWidth: 4 AlignEscapedNewlinesLeft: false AlignTrailingComments: true AllowAllParametersOfDeclarationOnNextLine: false AllowShortIfStatementsOnASingleLine: false AllowShortLoopsOnASingleLine: false AlwaysBreakTemplateDeclarations: false AlwaysBreakBeforeMultilineStrings: false BreakBeforeBinaryOperators: false BreakBeforeTernaryOperators: true BreakConstructorInitializersBeforeComma: false BinPackParameters: true ColumnLimit: 80 ConstructorInitializerAllOnOneLineOrOnePerLine: true DerivePointerBinding: false ExperimentalAutoDetectBinPacking: false IndentCaseLabels: true MaxEmptyLinesToKeep: 1 NamespaceIndentation: None ObjCSpaceBeforeProtocolList: false PenaltyBreakBeforeFirstCallParameter: 19 PenaltyBreakComment: 60 PenaltyBreakString: 1000 PenaltyBreakFirstLessLess: 120 PenaltyExcessCharacter: 1000000 PenaltyReturnTypeOnItsOwnLine: 200 PointerBindsToType: true SpacesBeforeTrailingComments: 1 Cpp11BracedListStyle: false Standard: Cpp11 IndentWidth: 2 TabWidth: 8 UseTab: Never BreakBeforeBraces: Attach IndentFunctionDeclarationAfterType: false SpacesInParentheses: false SpacesInAngles: false SpaceInEmptyParentheses: false SpacesInCStyleCastParentheses: false SpaceAfterControlStatementKeyword: true SpaceBeforeAssignmentOperators: true ContinuationIndentWidth: 4 rr-5.7.0/.github/000077500000000000000000000000001450675474200135245ustar00rootroot00000000000000rr-5.7.0/.github/workflows/000077500000000000000000000000001450675474200155615ustar00rootroot00000000000000rr-5.7.0/.github/workflows/build.yml000066400000000000000000000011711450675474200174030ustar00rootroot00000000000000name: Build on: [push, pull_request] jobs: build: name: Build runs-on: ubuntu-latest steps: - uses: actions/checkout@v3 - name: Build Dockerfile shell: bash run: docker build -t rr-android .android - name: Create dist dir shell: bash run: mkdir -p obj/dist - name: Build RR shell: bash run: | docker run --rm \ -v $(pwd):/src/rr \ -v $(pwd)/obj/dist:/dist \ rr-android - uses: actions/upload-artifact@v3 with: name: rr path: obj/dist/rr-*-Android-x86_64.tar.gz rr-5.7.0/.gitignore000066400000000000000000000006221450675474200141540ustar00rootroot00000000000000*~ .cache .cproject CMakeCache.txt CMakeFiles/ cmake_install.cmake CPackConfig.cmake CPackSourceConfig.cmake _CPack_Packages/ CTestTestfile.cmake compile_commands.json Debug dist/ install_manifest.txt Makefile Profile obj/ .project .vscode/ *.log *.orig *.rej *.pyc *.record *.replay .settings/ Testing/ .idea/ .*.swp *generated build/ bin/ lib/ share/ libbrotli* extra_version_string.h git_revision.h rr-5.7.0/CMakeLists.txt000066400000000000000000001726611450675474200147410ustar00rootroot00000000000000# *-* Mode: cmake; *-* cmake_minimum_required(VERSION 3.5) project(rr C CXX ASM) # "Do not add flags to export symbols from executables without the ENABLE_EXPORTS target property." # This avoids linking executables with -rdynamic. -rdynamic has been observed # to cause rr_exec_stub to be linked with the dynamic linker with some # version(s) of clang (but linked to an incorrect file name, causing # exec of rr_exec_stub to fail). if(POLICY CMP0065) cmake_policy(SET CMP0065 NEW) endif() # On single configuration generators, make Debug the default configuration if(NOT CMAKE_CONFIGURATION_TYPES) if(NOT CMAKE_BUILD_TYPE) set(CMAKE_BUILD_TYPE "Debug" CACHE STRING "Whether to build in `Debug` or `Release` mode." FORCE) endif() endif() enable_testing() set(BUILD_SHARED_LIBS ON) set(EXECUTABLE_OUTPUT_PATH ${PROJECT_BINARY_DIR}/bin) set(CMAKE_LIBRARY_OUTPUT_DIRECTORY ${PROJECT_BINARY_DIR}/lib/rr) set(CMAKE_EXPORT_COMPILE_COMMANDS ON) set(BUILD_TESTS ON CACHE BOOL "Build tests") set(WILL_RUN_TESTS ${BUILD_TESTS} CACHE BOOL "Run tests") option(INSTALL_TESTSUITE "Install the testsuite") # CAREFUL! "-" is an invalid character in RPM package names, while # debian is happy with it. However, "_" is illegal in debs, while RPM # is cool with it. Sigh. set(rr_VERSION_MAJOR 5) set(rr_VERSION_MINOR 7) set(rr_VERSION_PATCH 0) if(ANDROID) find_package(CapnProto REQUIRED) endif() add_definitions(-DRR_VERSION="${rr_VERSION_MAJOR}.${rr_VERSION_MINOR}.${rr_VERSION_PATCH}") execute_process( COMMAND git rev-parse HEAD WORKING_DIRECTORY ${CMAKE_SOURCE_DIR} OUTPUT_VARIABLE GIT_REVISION OUTPUT_STRIP_TRAILING_WHITESPACE ) configure_file( ${CMAKE_SOURCE_DIR}/src/git_revision.h.in ${CMAKE_BINARY_DIR}/git_revision.h ) configure_file( ${CMAKE_SOURCE_DIR}/src/extra_version_string.h.in ${CMAKE_BINARY_DIR}/extra_version_string.h ) set(FLAGS_COMMON "-D__USE_LARGEFILE64 -pthread") set(supports32bit true) set(x86ish false) set(has_syscallbuf false) if (${CMAKE_SYSTEM_PROCESSOR} STREQUAL "aarch64") set(has_syscallbuf true) set(supports32bit false) set(FLAGS_COMMON "${FLAGS_COMMON} -march=armv8.3-a -moutline-atomics") set(PRELOAD_LIBRARY_PAGE_SIZE 65536) set(VDSO_NAME "LINUX_2.6.39") else() set(x86ish true) set(has_syscallbuf true) set(FLAGS_COMMON "${FLAGS_COMMON} -msse2 -D__MMX__ -D__SSE__ -D__SSE2__") set(PRELOAD_LIBRARY_PAGE_SIZE 4096) set(VDSO_NAME "LINUX_2.6") endif() configure_file(src/preload/rr_page.ld.in src/preload/rr_page.ld @ONLY) include(CheckCCompilerFlag) CHECK_C_COMPILER_FLAG("-fmacro-prefix-map=foo=bar" SUPPORTS_MACRO_PREFIX_MAP) if (SUPPORTS_MACRO_PREFIX_MAP) set(FLAGS_COMMON "${FLAGS_COMMON} -fmacro-prefix-map=${CMAKE_SOURCE_DIR}/=") endif() set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} ${FLAGS_COMMON} -Wstrict-prototypes -std=gnu11") # Define __STDC_LIMIT_MACROS so |#include | works as expected. # Define __STDC_FORMAT_MACROS so |#include | works as expected. set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} ${FLAGS_COMMON} -D__STDC_LIMIT_MACROS -D__STDC_FORMAT_MACROS -std=c++14") # We support three build types: # DEBUG: suitable for debugging rr # RELEASE: suitable for using rr in production (but keeps rr debuginfo) # OTHER: suitable for using rr in production, but honouring distro/user opt/debug settings # (which we assume are suitable for production use) # Base settings for debug and release/unspecified builds. # Use -Werror for debug builds because we assume a developer is building, not a user. set(RR_FLAGS_DEBUG "-Wall -Wextra -DDEBUG -UNDEBUG") set(RR_FLAGS_RELEASE "-Wall -Wextra -UDEBUG -DNDEBUG") # The following settings are the defaults for the OTHER build type. # Flags used to build the preload library. MUST have debuginfo enabled. SHOULD be optimized. set(PRELOAD_COMPILE_FLAGS "${RR_FLAGS_RELEASE} -fno-stack-protector -g3 -U_FORTIFY_SOURCE") # Flags used to build Brotli. SHOULD be optimized. MUST NOT error on warnings. set(BROTLI_COMPILE_FLAGS ${RR_FLAGS_RELEASE}) # Flags used to build tests. MUST have -DDEBUG and debuginfo enabled, MUST NOT be optimized. set(RR_TEST_FLAGS "${RR_FLAGS_DEBUG} -g3 -O0") # Flags used to build other files. Entirely build-type-dependent. set(RR_FLAGS ${RR_FLAGS_RELEASE}) # Now override for build type. string(TOLOWER ${CMAKE_BUILD_TYPE} LOWERCASE_CMAKE_BUILD_TYPE) if(LOWERCASE_CMAKE_BUILD_TYPE STREQUAL "debug") set(PRELOAD_COMPILE_FLAGS "${PRELOAD_COMPILE_FLAGS} -O2 -Werror") set(BROTLI_COMPILE_FLAGS "${RR_FLAGS_RELEASE} -O2") set(RR_TEST_FLAGS "${RR_TEST_FLAGS} -Werror") set(RR_FLAGS "${RR_FLAGS_DEBUG} -g3 -Werror") elseif(LOWERCASE_CMAKE_BUILD_TYPE STREQUAL "release") # CMake itself will add optimization flags set(RR_FLAGS "${RR_FLAGS_RELEASE} -g3 -flto") endif() set(LINKER_FLAGS "") if(CMAKE_C_COMPILER_ID STREQUAL "GNU") # Gcc generates bogus R_386_GOTOFF relocations in .debug_info which # lld 9 rejects set(LINKER_FLAGS "-fuse-ld=bfd") endif() if(CMAKE_C_COMPILER_ID STREQUAL "Clang") set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -Wno-unused-command-line-argument") endif() if(CMAKE_CXX_COMPILER_ID STREQUAL "Clang") set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Wno-unused-command-line-argument") endif() # -fno-integrated-as tells Clang to use whatever "as" happens to be. For an # Android build that will end up being whatever /usr/bin/as is, and whatever it # is, it's the wrong assembler for Android, because Android only supports the # Clang assembler. if (CMAKE_ASM_COMPILER_ID STREQUAL "Clang" AND NOT ANDROID) set(CMAKE_ASM_FLAGS "${CMAKE_ASM_FLAGS} -fno-integrated-as") endif() option(force32bit "Force a 32-bit rr build, rather than both 64 and 32-bit. rr will only be able to record and replay 32-bit processes.") option(disable32bit "On a 64-bit platform, avoid requiring a 32-bit cross-compilation toolchain by not building 32-bit components. rr will be able to record 32-bit processes but not replay them.") if(force32bit) set(rr_32BIT true) set(rr_64BIT false) set(rr_MBITNESS_OPTION -m32) else() if(CMAKE_SIZEOF_VOID_P EQUAL 8) if(disable32bit OR NOT supports32bit) set(rr_32BIT false) else() set(rr_32BIT true) endif() set(rr_64BIT true) else() set(rr_32BIT true) set(rr_64BIT false) endif() set(rr_MBITNESS_OPTION) endif() option(staticlibs "Force usage of static linkage for non-standard libraries like capnproto") # Check that compiling 32-bit code on a 64-bit target works, if required. if(CMAKE_HOST_SYSTEM_PROCESSOR STREQUAL "x86_64" AND rr_32BIT) # try_compile won't accept LINK_FLAGS, so do this manually. file(WRITE "${CMAKE_BINARY_DIR}/test32.c" "int main() { return 0; }") execute_process(COMMAND ${CMAKE_C_COMPILER} -o ${CMAKE_BINARY_DIR}/test32 ${CMAKE_BINARY_DIR}/test32.c -m32 RESULT_VARIABLE COMPILER_32BIT_RESULT) if(NOT (COMPILER_32BIT_RESULT EQUAL 0)) message(FATAL_ERROR "Your toolchain doesn't support 32-bit cross-compilation. Install the required packages or pass -Ddisable32bit=ON to cmake.") endif() endif() set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} ${rr_MBITNESS_OPTION}") set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} ${rr_MBITNESS_OPTION}") set(CMAKE_ASM_FLAGS "${CMAKE_ASM_FLAGS} ${rr_MBITNESS_OPTION}") # If SKIP_PKGCONFIG is set then ${PKG}_CFLAGS and ${PKG}_LDFLAGS must be # provided as well. if(NOT SKIP_PKGCONFIG) find_package(PkgConfig REQUIRED) # If we're cross-compiling a 32-bit rr build on a 64-bit host we need # to ensure we're looking for the right libraries. # This has been tested on Ubuntu and Fedora. if(CMAKE_HOST_SYSTEM_PROCESSOR STREQUAL "x86_64" AND NOT rr_64BIT) set(LIBDIR32_CANDIDATES /usr/lib/i386-linux-gnu/pkgconfig/ /usr/lib/pkgconfig/ ) foreach(libdir ${LIBDIR32_CANDIDATES}) if(IS_DIRECTORY ${libdir}) set(ENV{PKG_CONFIG_LIBDIR} ${libdir}) break() endif() endforeach(libdir) if(NOT DEFINED ENV{PKG_CONFIG_LIBDIR}) message(FATAL_ERROR "Couldn't find a suitable 32-bit pkgconfig lib dir. You probably need to install a 32-bit pkgconfig package (pkgconfig.i686 for Fedora or pkg-config:i386 for Ubuntu") endif() endif() endif() find_program(CAPNP capnp) if(${CAPNP} STREQUAL "CAPNP-NOTFOUND") message(FATAL_ERROR "Can't find 'capnp' command; install Capnproto packages? https://github.com/rr-debugger/rr/wiki/Building-And-Installing#tldr") endif() set(REQUIRED_LIBS capnp # zlib is required to handle ELF compression zlib ) foreach(required_lib ${REQUIRED_LIBS}) string(TOUPPER ${required_lib} PKG) if(NOT SKIP_PKGCONFIG) pkg_check_modules(${PKG} REQUIRED ${required_lib}) endif() if(staticlibs) string(REPLACE ";" " " ${PKG}_STATIC_CFLAGS "${${PKG}_STATIC_CFLAGS}") set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} ${${PKG}_STATIC_CFLAGS}") set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} ${${PKG}_STATIC_CFLAGS}") else() string(REPLACE ";" " " ${PKG}_CFLAGS "${${PKG}_CFLAGS}") set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} ${${PKG}_CFLAGS}") set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} ${${PKG}_CFLAGS}") endif() endforeach(required_lib) # ==== brotli ==== set(BROTLI_FILES third-party/brotli/common/constants.c third-party/brotli/common/context.c third-party/brotli/common/dictionary.c third-party/brotli/common/platform.c third-party/brotli/common/shared_dictionary_internal.h third-party/brotli/common/shared_dictionary.c third-party/brotli/common/transform.c third-party/brotli/dec/bit_reader.c third-party/brotli/dec/decode.c third-party/brotli/dec/huffman.c third-party/brotli/dec/state.c third-party/brotli/enc/backward_references.c third-party/brotli/enc/backward_references.h third-party/brotli/enc/backward_references_hq.c third-party/brotli/enc/backward_references_hq.h third-party/brotli/enc/backward_references_inc.h third-party/brotli/enc/bit_cost.c third-party/brotli/enc/bit_cost.h third-party/brotli/enc/bit_cost_inc.h third-party/brotli/enc/block_encoder_inc.h third-party/brotli/enc/block_splitter.c third-party/brotli/enc/block_splitter.h third-party/brotli/enc/block_splitter_inc.h third-party/brotli/enc/brotli_bit_stream.c third-party/brotli/enc/brotli_bit_stream.h third-party/brotli/enc/command.c third-party/brotli/enc/cluster.c third-party/brotli/enc/cluster.h third-party/brotli/enc/cluster_inc.h third-party/brotli/enc/command.h third-party/brotli/enc/compress_fragment.c third-party/brotli/enc/compress_fragment.h third-party/brotli/enc/compound_dictionary.c third-party/brotli/enc/compound_dictionary.h third-party/brotli/enc/compress_fragment_two_pass.c third-party/brotli/enc/compress_fragment_two_pass.h third-party/brotli/enc/dictionary_hash.c third-party/brotli/enc/dictionary_hash.h third-party/brotli/enc/encode.c third-party/brotli/enc/encoder_dict.c third-party/brotli/enc/entropy_encode.c third-party/brotli/enc/entropy_encode.h third-party/brotli/enc/entropy_encode_static.h third-party/brotli/enc/fast_log.c third-party/brotli/enc/fast_log.h third-party/brotli/enc/find_match_length.h third-party/brotli/enc/hash_forgetful_chain_inc.h third-party/brotli/enc/hash.h third-party/brotli/enc/hash_longest_match64_inc.h third-party/brotli/enc/hash_longest_match_inc.h third-party/brotli/enc/hash_longest_match_quickly_inc.h third-party/brotli/enc/hash_to_binary_tree_inc.h third-party/brotli/enc/histogram.c third-party/brotli/enc/histogram.h third-party/brotli/enc/histogram_inc.h third-party/brotli/enc/literal_cost.c third-party/brotli/enc/literal_cost.h third-party/brotli/enc/memory.c third-party/brotli/enc/memory.h third-party/brotli/enc/metablock.c third-party/brotli/enc/metablock.h third-party/brotli/enc/metablock_inc.h third-party/brotli/enc/prefix.h third-party/brotli/enc/quality.h third-party/brotli/enc/ringbuffer.h third-party/brotli/enc/state.h third-party/brotli/enc/static_dict.c third-party/brotli/enc/static_dict.h third-party/brotli/enc/static_dict_lut.h third-party/brotli/enc/utf8_util.c third-party/brotli/enc/utf8_util.h third-party/brotli/enc/write_bits.h third-party/brotli/include/brotli/decode.h third-party/brotli/include/brotli/encode.h third-party/brotli/include/brotli/port.h third-party/brotli/include/brotli/shared_dictionary.h ) add_library(brotli STATIC ${BROTLI_FILES}) set_source_files_properties(${BROTLI_FILES} PROPERTIES COMPILE_FLAGS ${BROTLI_COMPILE_FLAGS}) # ==== brotli ==== find_library(LIBRT rt) set_property(GLOBAL PROPERTY FIND_LIBRARY_USE_LIB64_PATHS OFF) set_property(GLOBAL PROPERTY FIND_LIBRARY_USE_LIB32_PATHS ON) find_library(LIBRT_32 rt PATHS "/usr/lib32" "/usr/lib" NO_DEFAULT_PATH) set_property(GLOBAL PROPERTY FIND_LIBRARY_USE_LIB64_PATHS ON) set_property(GLOBAL PROPERTY FIND_LIBRARY_USE_LIB32_PATHS OFF) find_path(SECCOMP NAMES "linux/seccomp.h") if(NOT SECCOMP) message(FATAL_ERROR "Couldn't find linux/seccomp.h. You may need to upgrade your kernel.") endif() find_path(PROC_SERVICE_H NAMES "proc_service.h") if(PROC_SERVICE_H) add_definitions(-DPROC_SERVICE_H=1) else() message(AUTHOR_WARNING "proc_service.h not present. Support for libthread_db.so is disabled.") endif() # Test only includes find_path(MQUEUE_H NAMES "mqueue.h") if(MQUEUE_H) add_definitions(-DMQUEUE_H=1) endif() find_path(FANOTIFY_H NAMES "sys/fanotify.h") if(FANOTIFY_H) add_definitions(-DFANOTIFY_H=1) endif() include(CheckSymbolExists) list(APPEND CMAKE_REQUIRED_DEFINITIONS -D_GNU_SOURCE) check_symbol_exists(LAV_CURRENT "link.h" RTLD_AUDIT) if(NOT RTLD_AUDIT) message(AUTHOR_WARNING "Couldn't find rtld-audit support. librraudit skipped.") endif() list(REMOVE_ITEM CMAKE_REQUIRED_DEFINITIONS -D_GNU_SOURCE) check_symbol_exists(backtrace "execinfo.h" EXECINFO_BACKTRACE) if(EXECINFO_BACKTRACE) add_definitions(-DEXECINFO_BACKTRACE) else() message(AUTHOR_WARNING "backtrace(3) not present in execinfo.h. Automatic backtraces for failures in rr are disabled.") endif() # Test only symbols check_symbol_exists(pthread_mutexattr_setrobust "pthread.h" HAVE_ROBUST_MUTEX) set(Python_ADDITIONAL_VERSIONS 3 3.8 3.7 3.6 3.5 3.4 3.3 3.2 3.1 3.0) find_package(PythonInterp 3 REQUIRED) execute_process(COMMAND "${PYTHON_EXECUTABLE}" "-c" "# nothing" RESULT_VARIABLE python_status) if(python_status) message(FATAL_ERROR "Couldn't run python interpreter ${PYTHON_EXECUTABLE}.") endif() # Check for required Python modules if(WILL_RUN_TESTS) if(NOT BUILD_TESTS) message(FATAL_ERROR "Running tests requires building them") endif() set(REQUIRED_PYTHON_MODULES pexpect ) else() set(REQUIRED_PYTHON_MODULES) endif() foreach(py_module ${REQUIRED_PYTHON_MODULES}) execute_process(COMMAND "${PYTHON_EXECUTABLE}" "-c" "import ${py_module}" RESULT_VARIABLE module_status) if(module_status) message(FATAL_ERROR "Couldn't find required Python module ${py_module}.") endif() endforeach(py_module) if(WILL_RUN_TESTS) # Check for gdb execute_process(COMMAND "gdb" "--version" RESULT_VARIABLE module_status OUTPUT_QUIET) if(module_status) message(FATAL_ERROR "Couldn't find gdb.") endif() endif() include_directories("${PROJECT_SOURCE_DIR}/include") include_directories("${PROJECT_SOURCE_DIR}/third-party/proc-service") include_directories("${PROJECT_SOURCE_DIR}/third-party/brotli/include") # We need to know where our generated files are. include_directories("${CMAKE_CURRENT_BINARY_DIR}") set(RR_PAGE_FILES rr_page.S ) set(RR_PAGE_SOURCE_FILES ${RR_PAGE_FILES} rr_page_instructions.S rr_vdso.S ) add_library(rrpage) foreach(file ${RR_PAGE_FILES}) target_sources(rrpage PUBLIC "${CMAKE_SOURCE_DIR}/src/preload/${file}") set_source_files_properties("${CMAKE_SOURCE_DIR}/src/preload/${file}" PROPERTIES COMPILE_FLAGS ${PRELOAD_COMPILE_FLAGS}) endforeach(file) # Since librrpage replaces the kernel vDSO for processes exec'd by rr, # we want it to have the same SONAME as the real vDSO to trick things # like AddressSanitizer into recognising it as the vDSO. set_target_properties(rrpage PROPERTIES NO_SONAME ON) set_target_properties(rrpage PROPERTIES LINK_FLAGS "-Wl,-T -Wl,${CMAKE_BINARY_DIR}/src/preload/rr_page.ld -Wl,--hash-style=both -nostartfiles -nostdlib -Wl,-z,max-page-size=${PRELOAD_LIBRARY_PAGE_SIZE} -Wl,-soname,linux-vdso.so.1 ${LINKER_FLAGS}") set_target_properties(rrpage PROPERTIES LINK_DEPENDS ${CMAKE_BINARY_DIR}/src/preload/rr_page.ld) # CMake seems to have trouble generating the link line without this set_target_properties(rrpage PROPERTIES LINKER_LANGUAGE C) add_custom_command(TARGET rrpage POST_BUILD COMMAND ${CMAKE_SOURCE_DIR}/src/preload/tweak_librrpage.py $ ${PRELOAD_LIBRARY_PAGE_SIZE}) # Order matters here! syscall_hook.S must be immediately before syscallbuf.c, # raw_syscall.S must be before overrides.c, which must be last. if(has_syscallbuf) set(PRELOAD_FILES syscall_hook.S syscallbuf.c raw_syscall.S overrides.c ) else() set(PRELOAD_FILES overrides.c ) endif() set(PRELOAD_SOURCE_FILES ${PRELOAD_FILES} preload_interface.h rrcalls.h syscallbuf.h ) add_library(rrpreload) foreach(file ${PRELOAD_FILES}) target_sources(rrpreload PUBLIC "${CMAKE_SOURCE_DIR}/src/preload/${file}") set_source_files_properties("${CMAKE_SOURCE_DIR}/src/preload/${file}" PROPERTIES COMPILE_FLAGS ${PRELOAD_COMPILE_FLAGS}) endforeach(file) set_target_properties(rrpreload PROPERTIES LINK_FLAGS "-nostartfiles ${LINKER_FLAGS}") set_target_properties(rrpreload PROPERTIES INSTALL_RPATH "\$ORIGIN") if(RTLD_AUDIT) set(AUDIT_FILES rtld-audit.c stap-note-iter.c ../preload/raw_syscall.S ) set(AUDIT_SOURCE_FILES ${AUDIT_FILES} rtld-audit.h stap-note-iter.h ../preload/preload_interface.h ../preload/rrcalls.h ) add_library(rraudit) foreach(file ${AUDIT_FILES}) target_sources(rraudit PUBLIC "${CMAKE_SOURCE_DIR}/src/audit/${file}") set_source_files_properties("${CMAKE_SOURCE_DIR}/src/audit/${file}" PROPERTIES COMPILE_FLAGS ${PRELOAD_COMPILE_FLAGS}) endforeach(file) set_target_properties(rraudit PROPERTIES LINK_FLAGS "-nostartfiles -ldl ${LINKER_FLAGS}") endif() # Ensure that CMake knows about our generated files. # # Alphabetical, please. set(GENERATED_FILES AssemblyTemplates.generated CheckSyscallNumbers.generated SyscallEnumsX64.generated SyscallEnumsX86.generated SyscallEnumsGeneric.generated SyscallEnumsForTestsX64.generated SyscallEnumsForTestsX86.generated SyscallEnumsForTestsGeneric.generated SyscallHelperFunctions.generated SyscallnameArch.generated SyscallRecordCase.generated ) foreach(generated_file ${GENERATED_FILES}) set_source_files_properties(${generated_file} PROPERTIES GENERATED true HEADER_FILE_ONLY true) add_custom_command(OUTPUT "${CMAKE_CURRENT_BINARY_DIR}/${generated_file}" COMMAND "${CMAKE_CURRENT_SOURCE_DIR}/src/generate_syscalls.py" "${CMAKE_CURRENT_BINARY_DIR}/${generated_file}" DEPENDS "${CMAKE_CURRENT_SOURCE_DIR}/src/generate_syscalls.py" "${CMAKE_CURRENT_SOURCE_DIR}/src/syscalls.py" "${CMAKE_CURRENT_SOURCE_DIR}/src/assembly_templates.py") endforeach(generated_file) add_custom_target(Generated DEPENDS ${GENERATED_FILES}) add_custom_command(OUTPUT "${CMAKE_CURRENT_BINARY_DIR}/rr_trace.capnp.c++" "${CMAKE_CURRENT_BINARY_DIR}/rr_trace.capnp.h" COMMAND capnp compile "--src-prefix=${CMAKE_CURRENT_SOURCE_DIR}/src" "-oc++:${CMAKE_CURRENT_BINARY_DIR}" "${CMAKE_CURRENT_SOURCE_DIR}/src/rr_trace.capnp" DEPENDS "${CMAKE_CURRENT_SOURCE_DIR}/src/rr_trace.capnp") set_source_files_properties("${CMAKE_CURRENT_BINARY_DIR}/rr_trace.capnp.c++" PROPERTIES GENERATED true) set_source_files_properties("${CMAKE_CURRENT_BINARY_DIR}/rr_trace.capnp.h" PROPERTIES GENERATED true HEADER_FILE_ONLY true) if (${CMAKE_SYSTEM_PROCESSOR} STREQUAL "aarch64") set(BLAKE_ARCH_DIR third-party/blake2/neon) else() set(BLAKE_ARCH_DIR third-party/blake2/sse) endif() set(RR_SOURCES src/AddressSpace.cc src/AutoRemoteSyscalls.cc src/BuildidCommand.cc src/Command.cc src/CompressedReader.cc src/CompressedWriter.cc src/CPUFeaturesCommand.cc src/CPUIDBugDetector.cc src/DiversionSession.cc src/DumpCommand.cc src/Dwarf.cc src/ElfReader.cc src/EmuFs.cc src/Event.cc src/ExtraRegisters.cc src/fast_forward.cc src/FdTable.cc src/FileMonitor.cc src/FileNameCommand.cc src/Flags.cc src/ftrace.cc src/GdbCommand.cc src/GdbCommandHandler.cc src/GdbConnection.cc src/GdbExpression.cc src/GdbInitCommand.cc src/GdbServer.cc src/HasTaskSet.cc src/HelpCommand.cc src/ExportImportCheckpoints.cc src/kernel_abi.cc src/kernel_metadata.cc src/log.cc src/LsCommand.cc src/main.cc src/MagicSaveDataMonitor.cc src/MmappedFileMonitor.cc src/MonitoredSharedMemory.cc src/Monkeypatcher.cc src/MvCommand.cc src/PackCommand.cc src/PerfCounters.cc src/PidFdMonitor.cc src/ProcFdDirMonitor.cc src/ProcMemMonitor.cc src/ProcStatMonitor.cc src/PsCommand.cc src/RecordCommand.cc src/RecordSession.cc src/record_signal.cc src/record_syscall.cc src/RecordTask.cc src/Registers.cc src/remote_code_ptr.cc src/ReplayCommand.cc src/ReplaySession.cc src/replay_syscall.cc src/ReplayTask.cc src/ReplayTimeline.cc src/RerunCommand.cc src/ReturnAddressList.cc src/RmCommand.cc src/Scheduler.cc src/SeccompFilterRewriter.cc src/Session.cc src/SourcesCommand.cc src/StdioMonitor.cc src/SysCpuMonitor.cc src/Task.cc src/ThreadGroup.cc src/TraceeAttentionSet.cc src/TraceFrame.cc src/TraceInfoCommand.cc src/TraceStream.cc src/VirtualPerfCounterMonitor.cc src/util.cc src/WaitManager.cc src/WaitStatus.cc ${CMAKE_CURRENT_BINARY_DIR}/rr_trace.capnp.c++ ${BLAKE_ARCH_DIR}/blake2b.c ) if(PROC_SERVICE_H) set(RR_SOURCES ${RR_SOURCES} src/ThreadDb.cc) endif() if (x86ish) set(RR_SOURCES ${RR_SOURCES} src/test/x86/cpuid_loop.S) endif() option(asan "Build with address sanitizer enabled.") if (asan) set(ASAN_FLAGS "-fsanitize=address -fno-omit-frame-pointer") # Without no-omit-frame-pointer incomplete backtraces get stored. set(RR_FLAGS "${ASAN_FLAGS} ${RR_FLAGS}") endif() set_source_files_properties(${RR_SOURCES} PROPERTIES COMPILE_FLAGS ${RR_FLAGS}) function(post_build_executable target) # grsecurity needs these. But if we add them ourselves, they may conflict # with other flags added in other ways, and they all have to match :-(. So # don't do this until a better solution presents itself # add_custom_command(TARGET ${target} # POST_BUILD # COMMAND setfattr ARGS -n user.pax.flags -v m $) endfunction(post_build_executable) if(UNIX) include(GNUInstallDirs) else() set(CMAKE_INSTALL_LIBDIR "lib") set(CMAKE_INSTALL_BINDIR "bin") set(CMAKE_INSTALL_DATADIR "share") set(CMAKE_INSTALL_DOCDIR "${CMAKE_INSTALL_DATADIR}/doc") set(CMAKE_INSTALL_INCLUDEDIR "include") endif() add_executable(rr ${RR_SOURCES}) set_target_properties(rr PROPERTIES ENABLE_EXPORTS true) post_build_executable(rr) set(RR_BIN rr) add_dependencies(rr Generated) option(strip "Strip debug info from rr binary") set(RR_MAIN_LINKER_FLAGS ${LINKER_FLAGS}) if(strip) set(RR_MAIN_LINKER_FLAGS "-s ${RR_MAIN_LINKER_FLAGS}") endif() if (asan) set(RR_MAIN_LINKER_FLAGS " ${ASAN_FLAGS} ${RR_MAIN_LINKER_FLAGS}") endif() # Add -flto option to linking step if release if(LOWERCASE_CMAKE_BUILD_TYPE STREQUAL "release") CHECK_C_COMPILER_FLAG("-flto=auto" SUPPORTS_LTO_AUTO) if(SUPPORTS_LTO_AUTO) set(RR_MAIN_LINKER_FLAGS "${RR_MAIN_LINKER_FLAGS} -flto=auto") else() set(RR_MAIN_LINKER_FLAGS "${RR_MAIN_LINKER_FLAGS} -flto") endif() endif() if(LIBRT) target_link_libraries(rr ${LIBRT}) endif() target_link_libraries(rr ${CMAKE_DL_LIBS} ${ZLIB_LDFLAGS} brotli ) if(staticlibs) # Urgh ... this might not work for everyone, but there doesn't seem to be # a way to persuade pkg-confing/pkg_check_modules to produce the right flags target_link_libraries(rr -L/home/roc/lib -l:libcapnp.a -l:libkj.a) # Note that this works for both clang++ and g++ set(RR_MAIN_LINKER_FLAGS "-static-libstdc++ ${RR_MAIN_LINKER_FLAGS}") elseif(ANDROID) target_link_libraries(rr CapnProto::capnp) else() target_link_libraries(rr ${CAPNP_LDFLAGS}) endif() set_target_properties(rr PROPERTIES LINK_FLAGS "${RR_MAIN_LINKER_FLAGS}") target_link_libraries(rrpreload ${CMAKE_DL_LIBS} ) add_executable(rr_exec_stub src/exec_stub.c) post_build_executable(rr_exec_stub) set_target_properties(rr_exec_stub PROPERTIES LINK_FLAGS "-static -nostartfiles -nodefaultlibs ${LINKER_FLAGS}") set_source_files_properties(src/exec_stub.c COMPILE_FLAGS "-fno-stack-protector") set(RR_GDB_RESOURCES 32bit-avx.xml 32bit-core.xml 32bit-linux.xml 32bit-sse.xml 32bit-pkeys.xml 64bit-avx.xml 64bit-core.xml 64bit-linux.xml 64bit-seg.xml 64bit-sse.xml 64bit-pkeys.xml amd64-pkeys-linux.xml amd64-avx-linux.xml amd64-linux.xml i386-pkeys-linux.xml i386-avx-linux.xml i386-linux.xml aarch64-core.xml aarch64-fpu.xml aarch64-pauth.xml ) foreach(file ${RR_GDB_RESOURCES}) configure_file("${CMAKE_CURRENT_SOURCE_DIR}/third-party/gdb/${file}" "${CMAKE_CURRENT_BINARY_DIR}/share/rr/${file}" COPYONLY) install(FILES third-party/gdb/${file} DESTINATION ${CMAKE_INSTALL_DATADIR}/rr) endforeach(file) foreach(file ${PRELOAD_SOURCE_FILES}) configure_file("${CMAKE_CURRENT_SOURCE_DIR}/src/preload/${file}" "${CMAKE_CURRENT_BINARY_DIR}/share/rr/src/preload/${file}" COPYONLY) install(FILES src/preload/${file} DESTINATION ${CMAKE_INSTALL_DATADIR}/rr/src/preload) endforeach(file) foreach(file ${RR_PAGE_SOURCE_FILES}) configure_file("${CMAKE_CURRENT_SOURCE_DIR}/src/preload/${file}" "${CMAKE_CURRENT_BINARY_DIR}/share/rr/src/preload/${file}" COPYONLY) install(FILES src/preload/${file} DESTINATION ${CMAKE_INSTALL_DATADIR}/rr/src/preload) endforeach(file) configure_file("${CMAKE_CURRENT_BINARY_DIR}/src/preload/rr_page.ld" "${CMAKE_CURRENT_BINARY_DIR}/share/rr/src/preload/rr_page.ld" COPYONLY) install(FILES "${CMAKE_CURRENT_BINARY_DIR}/src/preload/rr_page.ld" DESTINATION ${CMAKE_INSTALL_DATADIR}/rr/src/preload) configure_file("${CMAKE_CURRENT_SOURCE_DIR}/scripts/rr-collect-symbols.py" "${CMAKE_CURRENT_BINARY_DIR}/bin/rr-collect-symbols.py" COPYONLY) install(PROGRAMS scripts/signal-rr-recording.sh scripts/rr-collect-symbols.py DESTINATION ${CMAKE_INSTALL_BINDIR}) install(PROGRAMS scripts/rr_completion DESTINATION ${CMAKE_INSTALL_DATADIR}/bash-completion/completions RENAME rr) set(RR_INSTALL_LIBS rrpreload rrpage rr_exec_stub) if(RTLD_AUDIT) set(RR_INSTALL_LIBS ${RR_INSTALL_LIBS} rraudit) endif() install(TARGETS ${RR_BIN} ${RR_INSTALL_LIBS} RUNTIME DESTINATION ${CMAKE_INSTALL_BINDIR} LIBRARY DESTINATION ${CMAKE_INSTALL_LIBDIR}/rr ARCHIVE DESTINATION ${CMAKE_INSTALL_LIBDIR}/rr) if(EXTRA_EXTERNAL_SOLIBS) install(PROGRAMS ${EXTRA_EXTERNAL_SOLIBS} DESTINATION ${CMAKE_INSTALL_LIBDIR}) endif() # Build 32-bit librrpreload and librraudit on 64-bit builds. # We copy the source files into '32' subdirectories in the output # directory, so we can set different compile options on them. # This sucks but I can't find a better way to get CMake to build # the same source file in two different ways. if(rr_32BIT AND rr_64BIT) set(RR_INSTALL_LIBS_32 rrpreload_32 rrpage_32 rr_exec_stub_32) add_library(rrpage_32) foreach(file ${RR_PAGE_SOURCE_FILES}) configure_file("${CMAKE_CURRENT_SOURCE_DIR}/src/preload/${file}" "${CMAKE_CURRENT_BINARY_DIR}/32/preload/${file}" COPYONLY) endforeach(file) foreach(file ${RR_PAGE_FILES}) target_sources(rrpage_32 PUBLIC "${CMAKE_CURRENT_BINARY_DIR}/32/preload/${file}") set_source_files_properties("${CMAKE_CURRENT_BINARY_DIR}/32/preload/${file}" PROPERTIES COMPILE_FLAGS "-m32 ${PRELOAD_COMPILE_FLAGS}") endforeach(file) set_target_properties(rrpage_32 PROPERTIES NO_SONAME ON) set_target_properties(rrpage_32 PROPERTIES LINK_FLAGS "-m32 -Wl,-T -Wl,${CMAKE_BINARY_DIR}/src/preload/rr_page.ld -Wl,--hash-style=both -nostartfiles -nostdlib -Wl,-soname,linux-vdso.so.1 ${LINKER_FLAGS}") set_target_properties(rrpage_32 PROPERTIES LINK_DEPENDS ${CMAKE_BINARY_DIR}/src/preload/rr_page.ld) set_target_properties(rrpage_32 PROPERTIES LINKER_LANGUAGE C) add_custom_command(TARGET rrpage_32 POST_BUILD COMMAND ${CMAKE_SOURCE_DIR}/src/preload/tweak_librrpage.py $ ${PRELOAD_LIBRARY_PAGE_SIZE}) add_library(rrpreload_32) foreach(file ${PRELOAD_SOURCE_FILES}) configure_file("${CMAKE_CURRENT_SOURCE_DIR}/src/preload/${file}" "${CMAKE_CURRENT_BINARY_DIR}/32/preload/${file}" COPYONLY) endforeach(file) foreach(file ${PRELOAD_FILES}) target_sources(rrpreload_32 PUBLIC "${CMAKE_CURRENT_BINARY_DIR}/32/preload/${file}") set_source_files_properties("${CMAKE_CURRENT_BINARY_DIR}/32/preload/${file}" PROPERTIES COMPILE_FLAGS "-m32 ${PRELOAD_COMPILE_FLAGS}") endforeach(file) set_target_properties(rrpreload_32 PROPERTIES LINK_FLAGS "-m32 -nostartfiles ${LINKER_FLAGS}") set_target_properties(rrpreload_32 PROPERTIES INSTALL_RPATH "\$ORIGIN") target_link_libraries(rrpreload_32 ${CMAKE_DL_LIBS} ) if(RTLD_AUDIT) add_library(rraudit_32) foreach(file ${AUDIT_SOURCE_FILES}) configure_file("${CMAKE_CURRENT_SOURCE_DIR}/src/audit/${file}" "${CMAKE_CURRENT_BINARY_DIR}/32/audit/${file}" COPYONLY) endforeach(file) foreach(file ${AUDIT_FILES}) target_sources(rraudit_32 PUBLIC "${CMAKE_CURRENT_BINARY_DIR}/32/audit/${file}") set_source_files_properties("${CMAKE_CURRENT_BINARY_DIR}/32/audit/${file}" PROPERTIES COMPILE_FLAGS "-m32 ${PRELOAD_COMPILE_FLAGS}") endforeach(file) set_target_properties(rraudit_32 PROPERTIES LINK_FLAGS "-m32 -nostartfiles ${LINKER_FLAGS}") target_link_libraries(rraudit_32 ${CMAKE_DL_LIBS} ) set(RR_INSTALL_LIBS_32 ${RR_INSTALL_LIBS_32} rraudit_32) endif() foreach(file exec_stub.c) configure_file("${CMAKE_CURRENT_SOURCE_DIR}/src/${file}" "${CMAKE_CURRENT_BINARY_DIR}/32/${file}" COPYONLY) set_source_files_properties("${CMAKE_CURRENT_BINARY_DIR}/32/${file}" PROPERTIES COMPILE_FLAGS "-m32 -fno-stack-protector") endforeach(file) add_executable(rr_exec_stub_32 32/exec_stub.c) post_build_executable(rr_exec_stub_32) set_target_properties(rr_exec_stub_32 PROPERTIES LINK_FLAGS "-static -nostartfiles -nodefaultlibs -m32 ${LINKER_FLAGS}") install(TARGETS ${RR_INSTALL_LIBS_32} RUNTIME DESTINATION ${CMAKE_INSTALL_BINDIR} LIBRARY DESTINATION ${CMAKE_INSTALL_LIBDIR}/rr ARCHIVE DESTINATION ${CMAKE_INSTALL_LIBDIR}/rr) endif() ##-------------------------------------------------- ## Testing # A "basic test" consists of a foo.c source file. All basic tests use the # same basic_test.run driver script. The test name is passed as an additional # parameter to the driver script. This script just does # "compare_test EXIT-SUCCESS", i.e. records and replays the program and verifies # that the output of both runs is identical and contains EXIT-SUCCESS. # # NB: you must update this variable when adding a new test source # file. The list is not generated automatically. # # Alphabetical, please. set(BASIC_TESTS 64bit_child _llseek abort accept acct adjtimex aio alarm alarm2 alsa_ioctl arch_prctl async_segv_ignored at_threadexit bad_ip bad_syscall barrier big_buffers big_select block block_open bpf bpf_map brk brk2 capget chew_cpu x86/chew_cpu_cpuid chmod chown clock clock_adjtime clock_nanosleep clock_time64 clone clone_bad_stack clone_bad_tls clone_cleartid_coredump clone_fail clone_immediate_exit clone_newflags clone_parent clone_untraced clone_vfork_pidfd cloned_sigmask close_range constructor copy_file_range x86/cpuid_same_state creat_address_not_truncated x86/cross_arch cwd_inaccessible daemon desched_blocking_poll desched_sigkill detach_huge_mmap detach_state detach_threads detach_sigkill detach_sigkill_exit deterministic_sigsys dev_zero direct dlchecksum dup doublesegv epoll_create epoll_create1 epoll_edge epoll_many epoll_pwait_eintr_sigmask epoll_pwait2 eventfd exec_flags exec_no_env exec_self exec_from_main_thread exec_from_other_thread exec_stopsig execveat exit_with_syscallbuf_signal fadvise fatal_init_signal fatal_sigsegv_thread x86/fault_in_code_page fcntl_dupfd fcntl_misc fcntl_notify fcntl_owner_ex fcntl_rw_hints fcntl_seals fcntl_sig fd_cleanup fd_tracking_across_threads fds_clean flock flock_ofd flock2 fork_brk fork_child_crash fork_many fstatat futex_exit_race futex_exit_race_sigsegv futex_invalid_op futex_pi futex_priorities futex_requeue futex_restart_race futex_restart_clone gcrypt_rdrand getcpu getgroups getpwnam getrandom setitimer getsid gettimeofday grandchild_threads grandchild_threads_main_running grandchild_threads_thread_running grandchild_threads_parent_alive x86/hle x86/hlt inotify int3 intr_futex_wait_restart intr_poll intr_ppoll intr_pselect intr_read_no_restart intr_read_restart intr_sleep intr_sleep_no_restart invalid_exec invalid_fcntl invalid_ioctl io io_uring ioctl ioctl_blk ioctl_br ioctl_fb ioctl_fs ioctl_pty ioctl_sg ioctl_tty ioctl_vt ioprio x86/ioperm x86/iopl join_threads joystick kcmp keyctl kill_newborn kill_ptracee large_hole large_write_deadlock legacy_ugid x86/lsl madvise madvise_free madvise_misc madvise_wipeonfork map_fixed map_shared_syscall membarrier memfd_create memfd_create_shared memfd_create_shared_huge mincore mknod mlock mmap_adjacent_to_rr_usage mmap_private mmap_private_grow_under_map mmap_recycle mmap_ro mmap_self_maps_shared mmap_shared mmap_shared_dev_zero mmap_shared_grow mmap_shared_grow_under_map mmap_shared_multiple mmap_shared_subpage mmap_shared_write mmap_shared_write_fork mmap_short_file mmap_write_complex mmap_zero_size_fd x86/modify_ldt mount_ns_exec mount_ns_exec2 mprotect mprotect_heterogenous mprotect_none mprotect_stack mremap mremap_after_coalesce mremap_dontunmap mremap_grow mremap_grow_shared mremap_non_page_size mremap_overwrite mremap_private_grow_under_map mremap_shrink msg msg_trunc msync mtio multiple_pending_signals multiple_pending_signals_sequential munmap_segv munmap_discontinuous nanosleep netfilter netlink_mmap_disable no_mask_timeslice nscd numa x86/old_fork orphan_process packet_mmap_disable x86/patch_syscall_restart pause perf_event perf_event_ioctl perf_event_mmap personality pid_ns_kill_child pid_ns_kill_child_threads pid_ns_kill_child_zombie pid_ns_kill_threads pid_ns_kill_threads_exit_wait pid_ns_reap pid_ns_segv pid_ns_shutdown pidfd pidfd_getfd x86/pkeys poll_sig_race ppoll ppoll_deliver prctl prctl_caps prctl_deathsig prctl_name prctl_short_name prctl_speculation_ctrl x86/prctl_tsc privileged_net_ioctl proc_fds proc_mem protect_rr_fds prw pthread_condvar_locking pthread_mutex_timedlock pthread_pi_mutex pthread_rwlocks x86/ptrace ptrace_attach_null_status ptrace_attach_running ptrace_attach_sleeping ptrace_attach_stopped ptrace_attach_thread_running ptrace_breakpoint ptrace_change_patched_syscall x86/ptrace_debug_regs ptrace_exec x86/ptrace_exec32 ptrace_kill_grandtracee x86/ptrace_tls ptrace_seize ptrace_sigchld_blocked ptrace_signals ptrace_singlestep ptrace_syscall ptrace_syscall_clone_untraced x86/ptrace_sysemu ptrace_sysemu_syscall ptrace_trace_clone ptrace_trace_exit ptrace_traceme ptracer_death ptracer_death_multithread ptracer_death_multithread_peer # pivot_root ... disabled because it fails when run as root and does nothing otherwise quotactl x86/rdtsc x86/rdtsc_flags read_nothing readdir read_large read_oversize readlink readlinkat readv record_replay_subject recvfrom redzone_integrity rename rlimit rusage samask save_data_fd sched_attr sched_setaffinity sched_setparam sched_yield sched_yield_to_lower_priority scm_rights scratch_read seccomp seccomp_cloning seccomp_clone_fail seccomp_desched seccomp_kill_exit seccomp_null seccomp_sigsys_args seccomp_sigsys_sigtrap seccomp_sigsys_syscallbuf seccomp_tsync seccomp_veto_exec self_shebang self_sigint sem send_block sendfile set_ptracer set_tid_address setgid setgroups setsid shared_exec shared_monitor shared_offset shared_write shm shm_unmap sigaction_old sigaltstack sigchld_interrupt_signal sigcont sighandler_bad_rsp_sigsegv sighandler_fork sighandler_mask sigill signal_deferred signal_during_preload_init signal_frame signal_unstoppable x86/signal_xmm_state signalfd sigprocmask sigprocmask_ensure_delivery sigprocmask_exec sigprocmask_evil sigprocmask_in_syscallbuf_sighandler sigprocmask_rr_sigs sigprocmask_syscallbuf sigpwr sigqueueinfo x86/sigreturn sigreturn_reg sigreturnmask sigrt sigstop sigstop2 sigsuspend sigtrap simple_threads_stress sioc small_holes sock_name_null sock_names_opts spinlock_priorities splice stack_growth_after_syscallbuf stack_growth_syscallbuf stack_growth_with_guard stack_invalid stack_overflow stack_overflow_altstack stack_overflow_with_guard statfs statx stdout_child stdout_cloexec stdout_dup stdout_redirect switch_read symlink sync sync_file_range syscall_bp syscall_in_writable_mem syscallbuf_signal_reset syscallbuf_signal_blocking syscallbuf_sigstop syscallbuf_timeslice syscallbuf_timeslice2 sysconf sysconf_conf sysctl sysemu_singlestep x86/sysfs sysinfo tgkill thread_yield timer timerfd times truncate_temp tun two_signals_with_mask ulimit_low uname unexpected_exit unexpected_exit_execve unexpected_exit_execve_twice unexpected_exit_pid_ns unjoined_thread unshare userfaultfd utimes vdso_parts vfork_done vfork_flush vfork_setopts vfork_shared video_capture vm_readv_writev vsyscall vsyscall_timeslice x86/x87env wait wait_sigstop write_race writev xattr zero_length_read ) if(MQUEUE_H) set(BASIC_TESTS ${BASIC_TESTS} mq) endif() if(FANOTIFY_H) set(BASIC_TESTS ${BASIC_TESTS} fanotify) endif() if(HAVE_ROBUST_MUTEX) set(BASIC_TESTS ${BASIC_TESTS} robust_futex) endif() set(BASIC_CPP_TESTS std_random unwind_rr_page ) # A "test with program" consists of a foo.c source file and a foo.run driver # script. See src/test/util.sh to learn how the .run files work. # # NB: you must update this variable when adding a new test source # file. The list is not generated automatically. # # Alphabetical, please. set(TESTS_WITH_PROGRAM abort_nonmain alternate_thread_diversion args async_kill_with_syscallbuf async_kill_with_syscallbuf2 async_kill_with_threads async_kill_with_threads_main_running async_kill_with_threads_thread_running async_segv async_signal_syscalls async_signal_syscalls2 async_signal_syscalls_siginfo async_usr1 blacklist block_clone_checkpoint block_clone_interrupted block_clone_syscallbuf_overflow block_intr_sigchld blocked_bad_ip blocked_sigill x86/blocked_sigsegv breakpoint breakpoint_conditions breakpoint_overlap call_function call_gettid chaos_oom # Disabled because it's very slow # check_session_leaks checkpoint_dying_threads checkpoint_mixed_mode checksum_sanity check_lost_interrupts clone_file_range clone_interruption # Disabled because it fails # clone_share_vm clone_vfork conditional_breakpoint_calls conditional_breakpoint_offload condvar_stress cont_race x86/cpuid_singlestep crash crash_in_function daemon_read dconf_mock dev_tty x86/diversion_rdtsc diversion_sigtrap diversion_syscall dlopen early_error elapsed_time exclusion_region exec_failed exec_many execve_loop exit_codes exit_group exit_race exit_status x86/explicit_checkpoints fd_limit fork_stress fork_syscalls function_calls x86/fxregs getcwd gdb_bogus_breakpoint gdb_qpasssignals goto_event hello hooks # Disabled because issue #1806 makes tests fail on Debian 8.5 at least # history ignored_async_usr1 ignored_sigsegv ignore_nested immediate_restart x86/int3_ok interrupt intr_ptrace_decline invalid_interpreter invalid_jump jit_proc_mem link madvise_dontfork madvise_fracture_flags main_thread_exit many_yields mmap_fd_reuse_checkpoint mmap_replace_most_mappings mmap_shared_prot mmap_shared_write_exec_race mmap_tmpfs mmap_write mmap_write_private x86/morestack_unwind mprotect_growsdown mprotect_syscallbuf_overflow mutex_pi_stress nested_detach_wait nested_detach_kill_stuck overflow_branch_counter pack patch_page_end x86/patch_40_80_f6_81 priority ptrace_remote_unmap x86/rdtsc_loop x86/rdtsc_loop2 x86/rdtsc_interfering read_big_struct remove_latest_trace restart_abnormal_exit reverse_continue_breakpoint reverse_continue_multiprocess reverse_continue_process_signal reverse_many_breakpoints reverse_step_long reverse_step_threads reverse_step_threads_break # Not called ps, because that interferes with using real 'ps' in tests rr_ps rr_ps_ns rseq rseq_syscallbuf search seccomp_blocks_rr seccomp_open seccomp_signals segfault setuid shared_map shared_persistent_file signal_numbers sigprocmask_race sigprocmask_rr_sigs_nondefault simple x86/singlestep_pushf stack_growth step_thread strict_priorities x86/string_instructions x86/string_instructions_async_signals x86/string_instructions_async_signals_shared x86/string_instructions_multiwatch x86/string_instructions_replay x86/string_instructions_singlestep_fastforward x86/string_instructions_watch x86/syscallbuf_branch_check syscallbuf_fd_disabling x86/syscallbuf_rdtsc_page syscallbuf_signal_blocking_read sysconf_onln target_fork target_process tcp_sockets term_nonmain term_rr term_rr_ok term_trace_reset term_trace_syscall thread_exit_signal thread_open_race thread_stress threaded_syscall_spam threads tls ttyname unexpected_stack_growth unicode user_ignore_sig vdso_clock_gettime_stack vdso_gettimeofday_stack vdso_time_stack vfork vfork_read_clone_stress vsyscall_reverse_next wait_for_all watchpoint watchpoint_at_sched watchpoint_before_signal watchpoint_no_progress watchpoint_size_change watchpoint_syscall watchpoint_unaligned ) # A "test without program" is a foo.run driver script only, which does # something with one of the test executables above (or has special rules # to build its own executable). # # NB: you must update this variable when adding a new test source # file. The list is not generated automatically. # # Alphabetical, please. set(TESTS_WITHOUT_PROGRAM async_signal_syscalls_100 async_signal_syscalls_1000 bad_breakpoint break_block break_clock break_clone break_exec break_int3 break_mmap_private break_msg x86/break_rdtsc break_sigreturn break_sync_signal break_thread break_time_slice breakpoint_consistent breakpoint_print call_exit check_patched_pthread checkpoint_async_signal_syscalls_1000 checkpoint_mmap_shared checkpoint_prctl_name checkpoint_simple checksum_block_open checksum_sanity_noclone comm cont_signal copy_all x86/cpuid dead_thread_target desched_ticks deliver_async_signal_during_syscalls env_newline exec_deleted exec_stop execp explicit_checkpoint_clone file_name_newline final_sigkill first_instruction fork_exec_info_thr get_thread_list hardlink_mmapped_files hbreak large_file mprotect_step nested_detach nested_detach_kill nested_detach_stop nested_release parent_no_break_child_bkpt parent_no_stop_child_crash post_exec_fpu_regs proc_maps read_bad_mem record_replay remove_watchpoint replay_overlarge_event_number replay_serve_files restart_invalid_checkpoint restart_unstable restart_diversion reverse_alarm reverse_continue_exec_subprocess reverse_continue_fork_subprocess reverse_continue_int3 reverse_continue_start reverse_finish reverse_step_breakpoint reverse_step_signal reverse_step_threads2 reverse_watchpoint reverse_watchpoint_syscall run_end run_in_function sanity seekticks shm_checkpoint siginfo x86/sigreturn_checksum signal_stop signal_checkpoint simple_script simple_script_debug simple_winch stack_overflow_debug step1 x86/step_rdtsc step_signal x86/string_instructions_break x86/string_instructions_replay_quirk subprocess_exit_ends_session switch_processes syscallbuf_timeslice_250 tick0 tick0_less trace_version term_trace_cpu trace_events tty unmap_vdso unwind_on_signal vfork_done_clone vfork_exec vfork_break_parent vsyscall_singlestep watch_code watchpoint_cond watchpoint_unaligned2 when ) if(BUILD_TESTS) # Part of the installable testsuite (test files). if(INSTALL_TESTSUITE) install(DIRECTORY ${CMAKE_SOURCE_DIR}/src/test/ DESTINATION ${CMAKE_INSTALL_LIBDIR}/rr/testsuite/rr/src/test USE_SOURCE_PERMISSIONS) endif(INSTALL_TESTSUITE) # We use symlinks in the tests to access the build and source directories. # This is needed because we cannot change the paths used by the tests when # the testsuite is installed. We work around this by using symlinks during # the normal build, and then installing symlinks with the testsuite that # have the same name but, the new link targets. execute_process(COMMAND ${CMAKE_COMMAND} -E create_symlink ${CMAKE_SOURCE_DIR} source_dir) execute_process(COMMAND ${CMAKE_COMMAND} -E create_symlink ${PROJECT_BINARY_DIR} bin_dir) if(INSTALL_TESTSUITE) # Create the directory for the symlinks first and then create symlinks. install(CODE "execute_process(COMMAND ${CMAKE_COMMAND} -E make_directory \$ENV{DESTDIR}\${CMAKE_INSTALL_PREFIX}/${CMAKE_INSTALL_LIBDIR}/rr/testsuite/obj) execute_process(COMMAND ${CMAKE_COMMAND} -E create_symlink \${CMAKE_INSTALL_PREFIX}/${CMAKE_INSTALL_LIBDIR}/rr/testsuite/rr \$ENV{DESTDIR}\${CMAKE_INSTALL_PREFIX}/${CMAKE_INSTALL_LIBDIR}/rr/testsuite/obj/source_dir) execute_process(COMMAND ${CMAKE_COMMAND} -E create_symlink \${CMAKE_INSTALL_PREFIX}/${CMAKE_INSTALL_LIBDIR}/rr/testsuite/obj \$ENV{DESTDIR}\${CMAKE_INSTALL_PREFIX}/${CMAKE_INSTALL_LIBDIR}/rr/testsuite/obj/bin_dir)") endif(INSTALL_TESTSUITE) add_test(check_environment bash source_dir/src/test/check_environment_test.run) set_tests_properties(check_environment PROPERTIES FAIL_REGULAR_EXPRESSION "rr needs /proc/sys/kernel/perf_event_paranoid <= 1") foreach(test ${BASIC_TESTS} ${TESTS_WITH_PROGRAM}) if (NOT x86ish AND ${test} MATCHES "^x86/.*") continue() endif() get_filename_component(testname ${test} NAME) add_executable(${testname} src/test/${test}.c) target_include_directories(${testname} PRIVATE src/preload) post_build_executable(${testname}) set_source_files_properties(src/test/${test}.c PROPERTIES COMPILE_FLAGS ${RR_TEST_FLAGS}) add_dependencies(${testname} Generated) if(LIBRT) target_link_libraries(${testname} ${LIBRT}) endif() target_link_libraries(${testname} -ldl) # Part of the installable testsuite (test programs). if(INSTALL_TESTSUITE) install(PROGRAMS ${CMAKE_CURRENT_BINARY_DIR}/bin/${testname} DESTINATION ${CMAKE_INSTALL_LIBDIR}/rr/testsuite/obj/bin) endif(INSTALL_TESTSUITE) endforeach(test) add_executable(tick0 src/test/tick0.c) post_build_executable(tick0) set_target_properties(tick0 PROPERTIES LINK_FLAGS "-static -nostdlib -nodefaultlibs" COMPILE_FLAGS "-static -nostdlib -nodefaultlibs -O3 -g2 -DHAS_TICK0=1") add_executable(tick0_less src/test/tick0.c) post_build_executable(tick0_less) set_target_properties(tick0_less PROPERTIES LINK_FLAGS "-static -nostdlib -nodefaultlibs" COMPILE_FLAGS "-static -nostdlib -nodefaultlibs -O3 -g2") add_executable(watchpoint_unaligned2 src/test/watchpoint_unaligned2.c) post_build_executable(watchpoint_unaligned2) set_target_properties(watchpoint_unaligned2 PROPERTIES COMPILE_FLAGS "${RR_TEST_FLAGS} -g -O3") add_dependencies(watchpoint_unaligned2 Generated) # Test disabled because it requires libuvc to be built and installed, and a # working USB camera # add_executable(usb src/test/usb.c) # post_build_executable(usb) # add_dependencies(usb Generated) # target_link_libraries(usb ${LIBRT} -L/usr/local/lib -luvc -lusb-1.0) foreach(test ${BASIC_CPP_TESTS}) add_executable(${test} src/test/${test}.cc) post_build_executable(${test}) set_source_files_properties(src/test/${test}.cc PROPERTIES COMPILE_FLAGS ${RR_TEST_FLAGS}) add_dependencies(${test} Generated) if(LIBRT) target_link_libraries(${test} ${LIBRT}) endif() # Part of the installable testsuite (test programs). if(INSTALL_TESTSUITE) install(PROGRAMS ${CMAKE_CURRENT_BINARY_DIR}/bin/${test} DESTINATION ${CMAKE_INSTALL_LIBDIR}/rr/testsuite/obj/bin) endif(INSTALL_TESTSUITE) endforeach(test) add_library(test_lib src/test/test_lib.c ) add_dependencies(test_lib Generated) set_source_files_properties(src/test/test_lib.c PROPERTIES COMPILE_FLAGS ${RR_TEST_FLAGS}) if(LIBRT) target_link_libraries(constructor ${LIBRT}) endif() target_link_libraries(constructor test_lib) # cpuid test needs to link with cpuid_loop.S if (x86ish) add_executable(cpuid src/test/x86/cpuid.c src/test/x86/cpuid_loop.S) post_build_executable(cpuid) set_source_files_properties(src/test/x86/cpuid.c PROPERTIES COMPILE_FLAGS ${RR_TEST_FLAGS}) add_dependencies(cpuid Generated) if(LIBRT) target_link_libraries(cpuid ${LIBRT}) endif() endif() # Check if we're running on KNL. If so, we allot more time to tests, due to # reduced single-core performance. exec_program(cat ARGS "/proc/cpuinfo" OUTPUT_VARIABLE CPUINFO) string(REGEX MATCH "^.*(Xeon Phi).*$" CPU_MODEL_PHI ${CPUINFO}) if(NOT "${CPU_MODEL_PHI}" STREQUAL "") set(TEST_MONITOR_DEFAULT_TIMEOUT 480) else() set(TEST_MONITOR_DEFAULT_TIMEOUT 120) endif() # The real timeouts are handled by test-monitor set(CTEST_TEST_TIMEOUT 1000) function(configure_test test) set_tests_properties(${test} PROPERTIES FAIL_REGULAR_EXPRESSION "FAILED") endfunction(configure_test) if(INSTALL_TESTSUITE) install(TARGETS test_lib LIBRARY DESTINATION ${CMAKE_INSTALL_LIBDIR}/rr) install(PROGRAMS ${CMAKE_CURRENT_BINARY_DIR}/bin/test-monitor DESTINATION ${CMAKE_INSTALL_LIBDIR}/rr/testsuite/obj/bin) if (x86ish) install(PROGRAMS ${CMAKE_CURRENT_BINARY_DIR}/bin/cpuid DESTINATION ${CMAKE_INSTALL_LIBDIR}/rr/testsuite/obj/bin) endif(x86ish) install(FILES ${CMAKE_CURRENT_BINARY_DIR}/CTestTestfile.cmake DESTINATION ${CMAKE_INSTALL_LIBDIR}/rr/testsuite/obj) endif(INSTALL_TESTSUITE) foreach(test ${BASIC_TESTS} ${BASIC_CPP_TESTS} ${OTHER_TESTS}) if (NOT x86ish AND ${test} MATCHES "^x86/.*") continue() endif() get_filename_component(testname ${test} NAME) add_test(${test} bash source_dir/src/test/basic_test.run ${testname} "" bin_dir ${TEST_MONITOR_DEFAULT_TIMEOUT}) configure_test(${test}) add_test(${test}-no-syscallbuf bash source_dir/src/test/basic_test.run ${testname} -n bin_dir ${TEST_MONITOR_DEFAULT_TIMEOUT}) configure_test(${test}-no-syscallbuf) endforeach(test) foreach(test ${TESTS_WITH_PROGRAM} ${TESTS_WITHOUT_PROGRAM}) if (NOT x86ish AND ${test} MATCHES "^x86/.*") continue() endif() get_filename_component(testname ${test} NAME) add_test(${test} bash source_dir/src/test/${test}.run ${testname} "" bin_dir ${TEST_MONITOR_DEFAULT_TIMEOUT}) configure_test(${test}) add_test(${test}-no-syscallbuf bash source_dir/src/test/${test}.run ${testname} -n bin_dir ${TEST_MONITOR_DEFAULT_TIMEOUT}) configure_test(${test}-no-syscallbuf) endforeach(test) # Run 32-bit tests on 64-bit builds. # We copy the test files into '32' subdirectories in the output # directory, so we can set different compile options on them. # This sucks but I can't find a better way to get CMake to build # the same source file in two different ways. if(rr_32BIT AND rr_64BIT) foreach(header util.h nsutils.h ptrace_util.h util_internal.h) configure_file("${CMAKE_CURRENT_SOURCE_DIR}/src/test/${header}" "${CMAKE_CURRENT_BINARY_DIR}/32/${header}" COPYONLY) endforeach(header) foreach(test ${BASIC_TESTS} ${TESTS_WITH_PROGRAM} x86/cpuid test_lib tick0 watchpoint_unaligned2) configure_file("${CMAKE_CURRENT_SOURCE_DIR}/src/test/${test}.c" "${CMAKE_CURRENT_BINARY_DIR}/32/${test}.c" COPYONLY) set_source_files_properties("${CMAKE_CURRENT_BINARY_DIR}/32/${test}.c" PROPERTIES COMPILE_FLAGS "-m32 ${RR_TEST_FLAGS}") endforeach(test) foreach(test ${BASIC_CPP_TESTS}) configure_file("${CMAKE_CURRENT_SOURCE_DIR}/src/test/${test}.cc" "${CMAKE_CURRENT_BINARY_DIR}/32/${test}.cc" COPYONLY) set_source_files_properties("${CMAKE_CURRENT_BINARY_DIR}/32/${test}.cc" PROPERTIES COMPILE_FLAGS "-m32 ${RR_TEST_FLAGS}") endforeach(test) foreach(file x86/cpuid_loop.S x86/util.h) configure_file("${CMAKE_CURRENT_SOURCE_DIR}/src/test/${file}" "${CMAKE_CURRENT_BINARY_DIR}/32/${file}" COPYONLY) set_source_files_properties("${CMAKE_CURRENT_BINARY_DIR}/32/${file}" PROPERTIES COMPILE_FLAGS "-m32 ${RR_TEST_FLAGS}") endforeach(file) foreach(test ${BASIC_TESTS} ${BASIC_CPP_TESTS} ${TESTS_WITH_PROGRAM}) get_filename_component(testname ${test} NAME) if(EXISTS "${CMAKE_CURRENT_BINARY_DIR}/32/${test}.c") add_executable(${testname}_32 "${CMAKE_CURRENT_BINARY_DIR}/32/${test}.c") else() add_executable(${testname}_32 "${CMAKE_CURRENT_BINARY_DIR}/32/${test}.cc") endif() target_include_directories(${testname}_32 PRIVATE src/preload) post_build_executable(${testname}_32) add_dependencies(${testname}_32 Generated) set_target_properties(${testname}_32 PROPERTIES LINK_FLAGS "-m32 ${RR_TEST_FLAGS} ${LINKER_FLAGS}") if(LIBRT_32) target_link_libraries(${testname}_32 ${LIBRT_32}) endif() target_link_libraries(${testname}_32 -ldl) # Part of the installable testsuite (test programs). if (INSTALL_TESTSUITE) install(PROGRAMS ${CMAKE_CURRENT_BINARY_DIR}/bin/${testname}_32 DESTINATION ${CMAKE_INSTALL_LIBDIR}/rr/testsuite/obj/bin) endif (INSTALL_TESTSUITE) endforeach(test) add_executable(tick0_32 "${CMAKE_CURRENT_BINARY_DIR}/32/tick0.c") post_build_executable(tick0_32) set_target_properties(tick0_32 PROPERTIES LINK_FLAGS "-m32 -static -nostdlib -nodefaultlibs" COMPILE_FLAGS "-m32 -static -nostdlib -nodefaultlibs -O3 -g2 -DHAS_TICK0=1") add_executable(tick0_less_32 "${CMAKE_CURRENT_BINARY_DIR}/32/tick0.c") post_build_executable(tick0_less_32) set_target_properties(tick0_less_32 PROPERTIES LINK_FLAGS "-m32 -static -nostdlib -nodefaultlibs" COMPILE_FLAGS "-m32 -static -nostdlib -nodefaultlibs -O3 -g2") add_executable(watchpoint_unaligned2_32 "${CMAKE_CURRENT_BINARY_DIR}/32/watchpoint_unaligned2.c") post_build_executable(watchpoint_unaligned2_32) set_target_properties(watchpoint_unaligned2_32 PROPERTIES LINK_FLAGS "-m32" COMPILE_FLAGS "-m32 ${RR_TEST_FLAGS} -g -O3") add_dependencies(watchpoint_unaligned2_32 Generated) add_library(test_lib_32 "${CMAKE_CURRENT_BINARY_DIR}/32/test_lib.c" ) add_dependencies(test_lib_32 Generated) set_target_properties(test_lib_32 PROPERTIES LINK_FLAGS "-m32 ${LINKER_FLAGS}") if(LIBRT_32) target_link_libraries(constructor_32 ${LIBRT_32}) endif() target_link_libraries(constructor_32 test_lib_32) # cpuid test needs to link with cpuid_loop.S add_executable(cpuid_32 32/x86/cpuid.c 32/x86/cpuid_loop.S) post_build_executable(cpuid_32) add_dependencies(cpuid_32 Generated) set_target_properties(cpuid_32 PROPERTIES LINK_FLAGS "-m32 ${LINKER_FLAGS}") if(LIBRT_32) target_link_libraries(cpuid_32 ${LIBRT_32}) endif() if(INSTALL_TESTSUITE) install(TARGETS test_lib_32 LIBRARY DESTINATION ${CMAKE_INSTALL_LIBDIR}/rr) install(PROGRAMS ${CMAKE_CURRENT_BINARY_DIR}/bin/cpuid_32 DESTINATION ${CMAKE_INSTALL_LIBDIR}/rr/testsuite/obj/bin) endif(INSTALL_TESTSUITE) foreach(test ${BASIC_TESTS} ${BASIC_CPP_TESTS} ${OTHER_TESTS}) get_filename_component(testname ${test} NAME) add_test(${test}-32 bash source_dir/src/test/basic_test.run ${testname}_32 "" bin_dir ${TEST_MONITOR_DEFAULT_TIMEOUT}) configure_test(${test}-32) add_test(${test}-32-no-syscallbuf bash source_dir/src/test/basic_test.run ${testname}_32 -n bin_dir ${TEST_MONITOR_DEFAULT_TIMEOUT}) configure_test(${test}-32-no-syscallbuf) endforeach(test) foreach(test ${TESTS_WITH_PROGRAM} ${TESTS_WITHOUT_PROGRAM}) get_filename_component(testname ${test} NAME) add_test(${test}-32 bash source_dir/src/test/${test}.run ${testname}_32 "" bin_dir ${TEST_MONITOR_DEFAULT_TIMEOUT}) configure_test(${test}-32) add_test(${test}-32-no-syscallbuf bash source_dir/src/test/${test}.run ${testname}_32 -n bin_dir ${TEST_MONITOR_DEFAULT_TIMEOUT}) configure_test(${test}-32-no-syscallbuf) endforeach(test) endif() set(CHAOS_TESTS core_count futex_wakeup getaffinity_core_count pipe_wakeup mmap_adjacent mmap_bits starvation_multithreaded starvation_singlethreaded ) foreach(test ${CHAOS_TESTS}) add_executable(${test} src/chaos-test/${test}.c) post_build_executable(${test}) if(LIBRT) target_link_libraries(${test} ${LIBRT}) endif() endforeach(test) add_executable(test-monitor src/test-monitor/test-monitor.cc) add_executable(ftrace_helper src/ftrace/ftrace_helper.c) add_executable(counters src/counters-test/counters.cc) set_source_files_properties(src/counters-test/counters.cc PROPERTIES COMPILE_FLAGS "-fno-stack-protector") endif() include(ProcessorCount) ProcessorCount(N) if(NOT N EQUAL 0) set(JFLAG -j${N}) endif() add_custom_target(check COMMAND ${CMAKE_CTEST_COMMAND} --verbose ${JFLAG}) # Run only syscallbuf-enabled and native-bitness tests add_custom_target(fastcheck COMMAND ${CMAKE_CTEST_COMMAND} --verbose --exclude-regex '[-]' ${JFLAG}) ##-------------------------------------------------- ## Package configuration include (InstallRequiredSystemLibraries) set(CPACK_PACKAGE_NAME "rr") set(CPACK_PACKAGE_VERSION_MAJOR "${rr_VERSION_MAJOR}") set(CPACK_PACKAGE_VERSION_MINOR "${rr_VERSION_MINOR}") set(CPACK_PACKAGE_VERSION_PATCH "${rr_VERSION_PATCH}") set(CPACK_SYSTEM_NAME "${CMAKE_SYSTEM_NAME}-${CMAKE_SYSTEM_PROCESSOR}") set(CPACK_OUTPUT_FILE_PREFIX dist) set(CPACK_GENERATOR "TGZ;RPM;DEB" CACHE STRING "CPack generators") set(CPACK_SOURCE_GENERATOR "TGZ") set(CPACK_BINARY_DIR "${PROJECT_BINARY_DIR}") # Don't strip binaries. It's important/useful for librrpreload at least to # have debug symbols. For package releases, pass -Dstrip=TRUE to strip symbols # from the rr binary at build time. set(CPACK_STRIP_FILES FALSE) set(CPACK_RESOURCE_FILE_LICENSE "${CMAKE_SOURCE_DIR}/LICENSE") set(CPACK_PACKAGE_DESCRIPTION_SUMMARY "Lightweight tool for recording and replaying execution of applications (trees of processes and threads)") set(CPACK_PACKAGE_DESCRIPTION_FILE "${CMAKE_SOURCE_DIR}/README.md") set(CPACK_PACKAGE_VENDOR "rr-debugger") set(CPACK_DEBIAN_PACKAGE_MAINTAINER "rr-debugger") set(CPACK_DEBIAN_PACKAGE_SECTION "devel") if(${CMAKE_SYSTEM_PROCESSOR} STREQUAL "x86_64") set(CPACK_DEBIAN_PACKAGE_ARCHITECTURE "amd64") elseif(${CMAKE_SYSTEM_PROCESSOR} MATCHES "i.86") set(CPACK_DEBIAN_PACKAGE_ARCHITECTURE "i386") elseif(${CMAKE_SYSTEM_PROCESSOR} MATCHES "arm.*") set(CPACK_DEBIAN_PACKAGE_ARCHITECTURE "arm") endif() # XXX Cmake 2.8.7 doesn't know how to avoid specifying /usr, # /usr/bin, etc, as files to be installed, but distros are finicky # about their specification. We want to manually filter those paths # out of our install list but 2.8.7 also isn't capable of that. set(CPACK_RPM_USER_BINARY_SPECFILE "${CMAKE_SOURCE_DIR}/rr.spec") set(CPACK_RPM_PACKAGE_RELEASE 1) set(CPACK_RPM_PACKAGE_GROUP "Development/Debuggers") set(CPACK_RPM_PACKAGE_LICENSE "MIT and BSD") # Prevent binaries from being stripped set(CPACK_RPM_SPEC_INSTALL_POST "/bin/true") include (CPack) rr-5.7.0/CODE_OF_CONDUCT.md000066400000000000000000000012631450675474200147650ustar00rootroot00000000000000# Community Participation Guidelines This repository is governed by Mozilla's code of conduct and etiquette guidelines. For more details, please read the [Mozilla Community Participation Guidelines](https://www.mozilla.org/about/governance/policies/participation/). ## How to Report For more information on how to report violations of the Community Participation Guidelines, please read our '[How to Report](https://www.mozilla.org/about/governance/policies/participation/reporting/)' page. rr-5.7.0/CONTRIBUTING.md000066400000000000000000000072611450675474200144230ustar00rootroot00000000000000## Submission Checklist Please make sure you go through this list before submitting a patch. The rules aren't hard and fast, but mostly adhering to them will make for quicker mergings. - [ ] Does your PR add support for a new kernel API? For example, supporting a new syscall. If so, your patch should include at least one new test for the API. This is usually pretty easy. See `$rr/src/test` for examples. - [ ] Did you run the rr test suite (including your new tests, if any), and pass all the tests? `make -C $objdir check`. Unfortunately, rr doesn't have automated infrastructure that can run the tests yet, so developers have to run them locally. - [ ] If you created new files for your PR, did you `git add` them? Habitually (or with a script or push hook) checking `git status` is a good habit to acquire. - [ ] If you changed the trace layout or format, did you bump `TRACE_VERSION`? - [ ] If you added new command-line parameters, did you update `print_usage()` to document them? - [ ] Does your PR apply cleanly on top of upstream/master HEAD? It's dangerous to have someone else sort out your merge conflicts, so just don't do it. Best of all is to have a PR *rebased* on top of upstream/master HEAD, so that the merge is simply a fast-forward. - [ ] If your PR includes multiple changesets, do they all (i) build cleanly in sequence; (ii) pass all tests in sequence? This is important for bisecting over commit history. - [ ] If your PR is a very large-scale change (for example, a rewrite in Rust to use the visitor pattern), did you discuss the proposed changes in an issue or the mailing list? It's hard to review large patches that just fall in ones lap. It's much easier to discuss the important changes at a high level and then approach the patch knowing what's important and what's not. - [ ] If your PR is large or includes many changesets, would it have been possible to break the changes into a series of smaller PRs? For example, it's hard to review a big patch that, say, fixes whitespace errors in a file along with a one-line, important, bug fix. It's much easier to review one PR that fixes whitespace (which can just be skimmed), and then review another PR that makes the one-line bug fix (which would be scrutinized more). This approach is also better for the patch author in that it usually allows the work to land faster, and reduces the burden of continually un-bit-rotting large, trivial, changes. - [ ] Did you check your code is formatted correctly? It's easiest to run `scripts/reformat.sh` on each commit. ## Coding Guidelines rr uses assertions heavily, for code documentation, for automated checking that the code matches the documentation, and to improve the power of automated tests. Assertions are turned on in release builds. Whenever you depend on an invariant not immediately obvious, consider adding assertions to check it. rr ships with debugging enabled and compiler optimizations disabled for the rr process itself. That's because rr performance almost always depends on algorithmic issues --- minimizing the number of system calls, and especially, minimizing the number of context switches between the tracees and the rr process --- much more than the performance of the code running in the rr process. For the same reason, rr-process code should be as simple as possible even if that's less efficient. To some extent, once we're running code in the rr process, we've already lost performance-wise. OTOH we do enable optimizations in `preload.c` because that runs in tracees. ## Coding Style Put braces around all statement blocks, even one-line `if` bodies etc. All C++ declarations are in the `rr` namespace. All C++ types are in CamelCase; all C types are underscore_names. rr-5.7.0/LICENSE000066400000000000000000000051071450675474200131740ustar00rootroot00000000000000Copyright (c) 2013 Mozilla Foundation Copyright 2015 VMware, Inc Copyright 2015 Google Inc. Contributors: Albert Noll , Thomas Anderegg , Nimrod Partush Andrew Walton Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions: The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. /* * Copyright 2002 Niels Provos * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ rr-5.7.0/README.md000066400000000000000000000036031450675474200134450ustar00rootroot00000000000000# Overview [![Build status](https://badge.buildkite.com/43782b9c8f7c98ed8a9ba1e82c3baeba59783b675fc4d4c9e4.svg?branch=master)](https://buildkite.com/julialang/rr) rr is a lightweight tool for recording, replaying and debugging execution of applications (trees of processes and threads). Debugging extends gdb with very efficient reverse-execution, which in combination with standard gdb/x86 features like hardware data watchpoints, makes debugging much more fun. More information about the project, including instructions on how to install, run, and build rr, is at [https://rr-project.org](https://rr-project.org). The best technical overview is currently the paper [Engineering Record And Replay For Deployability: Extended Technical Report](https://arxiv.org/pdf/1705.05937.pdf). Or go directly to the [installation and building instructions](https://github.com/rr-debugger/rr/wiki/Building-And-Installing). Please contribute! Make sure to review the [pull request checklist](/CONTRIBUTING.md) before submitting a pull request. If you find rr useful, please [add a testimonial](https://github.com/rr-debugger/rr/wiki/Testimonials). rr development is sponsored by [Pernosco](https://pernos.co) and was originated by [Mozilla](https://www.mozilla.org). # System requirements * Linux kernel >= 4.7 (for support of `__WALL` in `waitid()`) * rr 5.6.0 worked with kernel 3.11 (requiring `PTRACE_SETSIGMASK`) * rr currently requires either: * An Intel CPU with [Nehalem](https://en.wikipedia.org/wiki/Nehalem_%28microarchitecture%29) (2010) or later microarchitecture. * Certain AMD Zen or later processors (see https://github.com/rr-debugger/rr/wiki/Zen) * Certain AArch64 microarchitectures (e.g. ARM Neoverse N1 or the Apple Silicon M-series) * Running in a VM guest is supported, as long as the VM supports virtualization of hardware performance counters. (VMware and KVM are known to work; Xen does not.) rr-5.7.0/Vagrantfile000066400000000000000000000023361450675474200143550ustar00rootroot00000000000000# -*- mode: ruby -*- # vi: set ft=ruby : # Author: David Manouchehri Vagrant.configure("2") do |config| config.vm.box = "bento/ubuntu-16.04" config.vm.synced_folder ".", "/vagrant", disabled: true config.vm.provision "shell", inline: <<-SHELL apt-get update # DEBIAN_FRONTEND=noninteractive apt-get -y upgrade DEBIAN_FRONTEND=noninteractive apt-get -y install ccache cmake make g++-multilib gdb pkg-config realpath python-pexpect manpages-dev git ninja-build capnproto libcapnp-dev zlib1g-dev apt-get clean SHELL config.vm.provision "shell", privileged: false, inline: <<-SHELL git clone https://github.com/rr-debugger/rr.git cd rr mkdir obj cd obj cmake .. -DPYTHON_EXECUTABLE=/usr/bin/python make -j8 make test SHELL config.vm.provision "shell", inline: <<-SHELL cd /home/vagrant/rr/obj/ make install SHELL %w(vmware_fusion vmware_workstation vmware_appcatalyst).each do |provider| config.vm.provider provider do |v| v.vmx["memsize"] = "4096" v.vmx['vpmc.enable'] = 'true' v.vmx['vhv.enable'] = 'true' v.vmx['vvtd.enable'] = 'true' v.vmx['monitor_control.disable_hvsim_clusters'] = 'true' v.vmx['virtualHW.version'] = '14' v.vmx['ethernet0.virtualDev'] = 'vmxnet3' end end end rr-5.7.0/configure000077500000000000000000000001301450675474200140650ustar00rootroot00000000000000#!/usr/bin/env bash # Helper to make |./configure && make| do what you expect. cmake . rr-5.7.0/include/000077500000000000000000000000001450675474200136075ustar00rootroot00000000000000rr-5.7.0/include/rr/000077500000000000000000000000001450675474200142325ustar00rootroot00000000000000rr-5.7.0/include/rr/rr.h000066400000000000000000000024731450675474200150340ustar00rootroot00000000000000/* -*- Mode: C; tab-width: 8; c-basic-offset: 2; indent-tabs-mode: nil; -*- */ #ifndef RR_H_ #define RR_H_ /** * rr tracees can write data to this special fd that they want * verified across record/replay. When it's written in recording, rr * saves the data. During replay, the data are checked against the * recorded data. * * Tracees using this interface should take care that the buffers * storing the data are either not racy, or are synchronized by the * tracee. * * To simplify things, we make this a valid fd opened to /dev/null during * recording. * * Tracees may close this fd, or dup() something over it, etc. If that happens, * it will lose its magical properties. */ #define RR_MAGIC_SAVE_DATA_FD 999 /** * Tracees use this fd to send other fds to rr. * This is only set up during recording. * Only the outermost rr uses this. Inner rr replays will use a different fd. */ #define RR_RESERVED_SOCKET_FD 1001 /** * The preferred fd that rr uses to control tracee desched. Some software * (e.g. the chromium IPC code) wants to have the first few fds all to itself, * so we need to stay above some floor. Tracee close()es of the fd that is * actually assigned will be silently ignored, and tracee dup()s to that fd will * fail with EBADF. */ #define RR_DESCHED_EVENT_FLOOR_FD 100 #endif /* RR_H_ */ rr-5.7.0/release-process/000077500000000000000000000000001450675474200152605ustar00rootroot00000000000000rr-5.7.0/release-process/README.md000066400000000000000000000020501450675474200165340ustar00rootroot00000000000000# Setting up AWS-based rr release testing * Create an AWS account. * Switch to the `us-east-2` (Ohio) region. The AMI IDs under `distro-configs` are all for the `us-east-2` region so this region must be used. * Use the EC2 console to create a keypair named `rr-testing`. This will download a file called `rr-testing.pem` containing the private key; move it somewhere safe and `chmod go-r ...` to make ssh happy. * Install `boto3` locally, e.g. using `pip install boto3`. * Install `aws-cli` locally, e.g. using [these instructions](https://docs.aws.amazon.com/cli/latest/userguide/getting-started-install.html). * Set `AWS_DEFAULT_REGION=us-east-2`, `AWS_ACCESS_KEY_ID` and `AWS_SECRET_ACCESS_KEY` in your environment. * Create AWS resources using `aws cloudformation create-stack --stack-name rr-testing --template-body file://path/to/rr-testing-cloud-formation.json`. * In the future, use `aws cloudformation update-stack --stack-name rr-testing --template-body file://path/to/rr-testing-cloud-formation.json` to update when that configuration file changes. rr-5.7.0/release-process/distro-configs/000077500000000000000000000000001450675474200202125ustar00rootroot00000000000000rr-5.7.0/release-process/distro-configs/centos8.json000066400000000000000000000015401450675474200224700ustar00rootroot00000000000000{ "name": "CentOS Stream 8", "ami_owner": "125523088429", "ami_name_pattern": "CentOS Stream 8 *", "user": "centos", "staticlibs": false, "setup_commands": [ "cloud-init status --wait", "sudo dnf config-manager --set-enabled powertools", "sudo dnf install -y epel-release epel-next-release", "sudo dnf update -y" ], "install_build_deps": "sudo dnf install -y rpm ccache cmake gcc gcc-c++ gdb glibc-devel libstdc++-devel zlib-devel git python3 python3-pexpect rpm-build ninja-build capnproto capnproto-libs capnproto-devel", "install_build_deps_x86_64": "sudo dnf install -y glibc-devel.i686 libstdc++-devel.i686", "install_app_test_deps": "sudo dnf install -y gtk3 dbus-glib xorg-x11-utils gnutls-devel libacl-devel openldap-devel tigervnc-server-minimal curl tar bzip2 libreoffice-writer", "exclude_tests": ["x86/pkeys.*"] } rr-5.7.0/release-process/distro-configs/centos9.json000066400000000000000000000015271450675474200224760ustar00rootroot00000000000000{ "name": "CentOS Stream 9", "ami_owner": "125523088429", "ami_name_pattern": "CentOS Stream 9 *", "user": "ec2-user", "archs": ["x86_64", "arm64"], "staticlibs": false, "setup_commands": [ "cloud-init status --wait", "sudo dnf config-manager --set-enabled crb", "sudo dnf install -y epel-release epel-next-release", "sudo dnf update -y" ], "install_build_deps": "sudo dnf install -y rpm ccache cmake gcc gcc-c++ gdb glibc-devel libstdc++-devel zlib-devel git python3 python3-pexpect rpm-build ninja-build capnproto capnproto-libs capnproto-devel", "install_build_deps_x86_64": "sudo dnf install -y glibc-devel.i686 libstdc++-devel.i686", "install_app_test_deps": "sudo dnf install -y gtk3 dbus-glib xorg-x11-utils gnutls-devel libacl-devel openldap-devel tigervnc-server-minimal curl tar bzip2 libreoffice-writer" } rr-5.7.0/release-process/distro-configs/debian10.json000066400000000000000000000013651450675474200224750ustar00rootroot00000000000000{ "name": "Debian 10", "ami_owner": "136693071363", "ami_name_pattern": "debian-10-*", "user": "admin", "setup_commands": [ "cloud-init status --wait", "export DEBIAN_FRONTEND=noninteractive UCF_FORCE_CONFNEW=1", "sudo -E apt-get update -y", "sudo -E apt-get dist-upgrade -f -y" ], "install_build_deps": "sudo -E apt-get install -y locales-all rpm ccache cmake g++ pkg-config zlib1g-dev git python-dev libacl1-dev ninja-build manpages-dev capnproto libcapnp-dev gdb python3-pexpect", "install_build_deps_x86_64": "sudo -E apt-get install -y g++-multilib", "install_app_test_deps": "sudo -E apt-get install -y tightvncserver xtightvncviewer curl tar bzip2 libdbus-glib-1-2 libreoffice", "exclude_tests": ["x86/pkeys.*"] } rr-5.7.0/release-process/distro-configs/debian11.json000066400000000000000000000013211450675474200224660ustar00rootroot00000000000000{ "name": "Debian 11", "ami_owner": "136693071363", "ami_name_pattern": "debian-11-*", "user": "admin", "setup_commands": [ "cloud-init status --wait", "export DEBIAN_FRONTEND=noninteractive UCF_FORCE_CONFNEW=1", "sudo -E apt-get update -y", "sudo -E apt-get dist-upgrade -f -y" ], "install_build_deps": "sudo -E apt-get install -y locales-all rpm ccache cmake g++ pkg-config zlib1g-dev git python-dev libacl1-dev ninja-build manpages-dev capnproto libcapnp-dev gdb python3-pexpect", "install_build_deps_x86_64": "sudo -E apt-get install -y g++-multilib", "install_app_test_deps": "sudo -E apt-get install -y tightvncserver xtightvncviewer curl tar bzip2 libdbus-glib-1-2 libreoffice" } rr-5.7.0/release-process/distro-configs/debian12.json000066400000000000000000000013741450675474200224770ustar00rootroot00000000000000{ "name": "Debian 12", "ami_owner": "136693071363", "ami_name_pattern": "debian-12-*", "user": "admin", "archs": ["x86_64", "arm64"], "setup_commands": [ "cloud-init status --wait", "export DEBIAN_FRONTEND=noninteractive UCF_FORCE_CONFNEW=1", "sudo -E apt-get update -y", "sudo -E apt-get dist-upgrade -f -y" ], "install_build_deps": "sudo -E apt-get install -y locales-all rpm ccache cmake g++ pkg-config zlib1g-dev git python-dev-is-python3 libacl1-dev ninja-build manpages-dev capnproto libcapnp-dev gdb python3-pexpect", "install_build_deps_x86_64": "sudo -E apt-get install -y g++-multilib", "install_app_test_deps": "sudo -E apt-get install -y tightvncserver xtightvncviewer curl tar bzip2 libdbus-glib-1-2 libreoffice" } rr-5.7.0/release-process/distro-configs/ubuntu18-lts.json000066400000000000000000000013011450675474200233730ustar00rootroot00000000000000{ "name": "Ubuntu 18.04 LTS", "ami_owner": "099720109477", "ami_name_pattern": "ubuntu/images/hvm-ssd/ubuntu-bionic-18.04-*", "user": "ubuntu", "setup_commands": [ "cloud-init status --wait", "sudo apt-get update -y", "sudo apt-get dist-upgrade -f -y" ], "install_build_deps": "sudo apt-get install -y rpm ccache cmake g++ pkg-config zlib1g-dev git python-dev libacl1-dev ninja-build manpages-dev capnproto libcapnp-dev gdb python3-pexpect", "install_build_deps_x86_64": "sudo apt-get install -y g++-multilib", "install_app_test_deps": "sudo apt-get install -y tightvncserver xtightvncviewer curl tar bzip2 libdbus-glib-1-2 libreoffice", "exclude_tests": ["x86/pkeys.*"] } rr-5.7.0/release-process/distro-configs/ubuntu20-lts.json000066400000000000000000000013001450675474200233630ustar00rootroot00000000000000{ "name": "Ubuntu 20.04 LTS", "ami_owner": "099720109477", "ami_name_pattern": "ubuntu/images/hvm-ssd/ubuntu-focal-20.04-*", "user": "ubuntu", "setup_commands": [ "cloud-init status --wait", "sudo apt-get update -y", "sudo apt-get dist-upgrade -f -y" ], "install_build_deps": "sudo apt-get install -y rpm ccache cmake g++ pkg-config zlib1g-dev git python-dev libacl1-dev ninja-build manpages-dev capnproto libcapnp-dev gdb python3-pexpect", "install_build_deps_x86_64": "sudo apt-get install -y g++-multilib", "install_app_test_deps": "sudo apt-get install -y tightvncserver xtightvncviewer curl tar bzip2 libdbus-glib-1-2 libreoffice", "exclude_tests": ["x86/pkeys.*"] } rr-5.7.0/release-process/distro-configs/ubuntu22-lts.json000066400000000000000000000013071450675474200233740ustar00rootroot00000000000000{ "name": "Ubuntu 22.04 LTS", "ami_owner": "099720109477", "ami_name_pattern": "ubuntu/images/hvm-ssd/ubuntu-jammy-22.04-*", "user": "ubuntu", "archs": ["x86_64", "arm64"], "setup_commands": [ "cloud-init status --wait", "sudo apt-get update -y", "sudo apt-get dist-upgrade -f -y" ], "install_build_deps": "sudo apt-get install -y rpm ccache cmake g++ pkg-config zlib1g-dev git python-dev-is-python3 libacl1-dev ninja-build manpages-dev capnproto libcapnp-dev gdb python3-pexpect", "install_build_deps_x86_64": "sudo apt-get install -y g++-multilib", "install_app_test_deps": "sudo apt-get install -y tightvncserver xtightvncviewer curl tar bzip2 libdbus-glib-1-2 libreoffice" } rr-5.7.0/release-process/distro-configs/ubuntu23.04.json000066400000000000000000000013521450675474200230170ustar00rootroot00000000000000{ "name": "Ubuntu 23.04", "ami_owner": "099720109477", "ami_name_pattern": "ubuntu/images/hvm-ssd/ubuntu-lunar-23.04-*", "user": "ubuntu", "archs": ["x86_64", "arm64"], "setup_commands": [ "cloud-init status --wait", "sudo apt-get update -y", "sudo apt-get dist-upgrade -f -y" ], "install_build_deps": "sudo apt-get install -y rpm ccache cmake g++ pkg-config zlib1g-dev git python-dev-is-python3 libacl1-dev ninja-build manpages-dev capnproto libcapnp-dev gdb python3-pexpect", "install_build_deps_x86_64": "sudo apt-get install -y g++-multilib", "install_app_test_deps": "sudo apt-get install -y tightvncserver xtightvncviewer curl tar bzip2 libdbus-glib-1-2 libreoffice", "exclude_tests_arm64": ["setuid.*"] } rr-5.7.0/release-process/prepare-release.py000077500000000000000000000125131450675474200207130ustar00rootroot00000000000000#!/usr/bin/python3 import argparse import glob import json import os import re import subprocess import sys import time # These are where we build the release binaries. They should be as old as possible # while still supported. Update these when the distro release is no longer supported. dist_packaging = { ('ubuntu18-lts', 'x86_64'): 'TGZ;DEB', ('ubuntu22-lts', 'arm64'): 'TGZ;DEB', ('centos8', 'x86_64'): 'RPM', ('centos9', 'arm64'): 'RPM', } os.chdir(os.path.dirname(os.path.dirname(os.path.realpath(__file__)))) parser = argparse.ArgumentParser() parser.add_argument('version') parser.add_argument('keypair_pem_file') args = parser.parse_args() version_re = re.compile('(\d+)\.(\d+)\.(\d+)') m = version_re.match(args.version) if not m: raise ValueError('version must have three numeric components, got %s' % args.version) major = int(m.group(1)) minor = int(m.group(2)) patch = int(m.group(3)) version = '%d.%d.%d' % (major, minor, patch) dist_dir = '/tmp/rr-dist' log_dir = '/tmp/rr-release-logs' def check_call(args): print('Running %s' % args) subprocess.check_call(args) def update_cmake(name, num): check_call(['sed', '-i', 's/rr_VERSION_%s [0-9][0-9]*/rr_VERSION_%s %d/g' % (name, name, num), 'CMakeLists.txt']) def prepare_branch(): output = subprocess.check_output(['git', 'status', '--untracked-files=no', '--porcelain'], stderr=subprocess.STDOUT) if output: print('Uncommitted changes in git workspace, aborting', file=sys.stderr) sys.exit(2) check_call(['git', 'checkout', '-B', 'release', 'master']) update_cmake('MAJOR', major) update_cmake('MINOR', minor) update_cmake('PATCH', patch) check_call(['git', 'commit', '-a', '-m', 'Bump version to %s' % version]) check_call(['git', 'tag', '-f', version]) check_call(['git', 'push', '-f', '--set-upstream', 'origin', 'release']) check_call(['git', 'checkout', 'master']) def prepare_dirs(): check_call(['rm', '-rf', dist_dir, log_dir]) check_call(['mkdir', dist_dir, log_dir]) def output_file_name(distro_name, arch): return os.path.join(log_dir, '%s.%s' % (distro_name, arch)) def has_line_starting(output_file, prefix): with open(output_file, 'r') as f: for line in f: if line.startswith(prefix): return True return False def start_vm(cmd, output_file): with open(output_file, 'w') as f: process = subprocess.Popen(cmd, stderr=subprocess.STDOUT, stdout=f) while True: time.sleep(1) if process.poll() is not None: if has_line_starting(output_file, 'botocore.exceptions.ClientError: An error occurred (VcpuLimitExceeded) '): return None return process if has_line_starting(output_file, 'Started VM '): return process COLOR_SUCCESS = '\033[92m' COLOR_FAILURE = '\033[91m' COLOR_NORMAL = '\033[0m' def run_tests(): distro_files = glob.glob('release-process/distro-configs/*.json') pending = [] for distro_file in sorted(distro_files): distro_name = os.path.basename(distro_file).rsplit('.', 1)[0] with open(distro_file, 'r') as f: distro_config = json.load(f) archs = distro_config['archs'] if 'archs' in distro_config else ['x86_64'] for arch in sorted(archs): cmd = ['release-process/test-system.py', '--keep-vm-on-error', '--git-revision', 'release', distro_file, arch, args.keypair_pem_file] generators = dist_packaging.get((distro_name, arch)) if generators is not None: cmd += ['--dist-files-dir', dist_dir, '--cpack-generators', generators] pending.append((distro_name, arch, cmd)) running = [] fail_count = 0 while pending or running: while pending: distro_name, arch, cmd = pending[0] output_file = output_file_name(distro_name, arch) process = start_vm(cmd, output_file) if process: pending.pop(0) running.append((distro_name, arch, process)) print('Started %s %s' % (distro_name, arch)) else: break # If no exits are seen after 60 seconds, try to launch a new VM anyway. for i in range(60): ready_index = None for running_index, (distro_name, arch, process) in enumerate(running): if process.poll() is not None: ready_index = running_index break if ready_index is not None: distro_name, arch, process = running.pop(ready_index) output_file = output_file_name(distro_name, arch) vm_kept = has_line_starting(output_file, 'VM kept; ') if process.returncode: print('%sTests failed%s: see %s%s' % (COLOR_FAILURE, ' (VM kept)' if vm_kept else '', output_file, COLOR_NORMAL), file=sys.stderr) fail_count += 1 else: print('%sTests succeeded for %s %s%s' % (COLOR_SUCCESS, distro_name, arch, COLOR_NORMAL)) break time.sleep(1) print('%d failures total' % fail_count) if fail_count: sys.exit(1) else: print('Dist files left in %s' % dist_dir) prepare_branch() prepare_dirs() run_tests() rr-5.7.0/release-process/rr-testing-cloud-formation.json000066400000000000000000000007041450675474200233520ustar00rootroot00000000000000{ "AWSTemplateFormatVersion" : "2010-09-09", "Resources": { "SecurityGroup": { "Type": "AWS::EC2::SecurityGroup", "Properties": { "GroupDescription": "rr testing security group", "GroupName": "rr-testing", "SecurityGroupIngress" : [ { "IpProtocol": "tcp", "FromPort": 22, "ToPort": 22, "CidrIp": "0.0.0.0/0" } ] } } } } rr-5.7.0/release-process/rr-testing.sh000066400000000000000000000064701450675474200177210ustar00rootroot00000000000000# Bash script to build rr and run tests. # # Requires variables and functions to be set. See test-system.py. # $git_revision : git revision to check out, build and test # $staticlibs : TRUE or FALSE to build with static libs # $build_dist : 1 if we should build dist packages, 0 otherwise # $test_firefox : 1 to run firefox tests, 0 to skip # $ctest_options : options to pass to ctest, e.g to exclude certain tests # $cpack_generators : CPack generators to build dist # setup_commands : function to setup environment, e.g. 'apt update' # install_build_deps : function to install dependencies required to build rr # install_app_test_deps : function to install dependencies required by tests set -e # default to exiting on error" uname -a setup_commands install_build_deps install_app_test_deps & # job %1 # Free up space before we (re)start rm -rf ~/rr || true git clone https://github.com/rr-debugger/rr ~/rr cd ~/rr git checkout $git_revision rm -rf ~/obj || true mkdir ~/obj cd ~/obj cmake -G Ninja -DCMAKE_BUILD_TYPE=RELEASE -Dstaticlibs=$staticlibs -Dstrip=TRUE -DCPACK_GENERATOR=$cpack_generators ../rr ninja # Enable perf events for rr echo 0 | sudo tee /proc/sys/kernel/perf_event_paranoid # Enable ptrace-attach to any process. This lets us get more data when tests fail. echo 0 | sudo tee /proc/sys/kernel/yama/ptrace_scope rm -rf /tmp/rr-* || true ctest -j`nproc` --verbose $ctest_options echo "For some reason I cannot figure out, bash drops the first four characters from the line following ctest" # Integration test deps are installed in parallel with our build. # Make sure that install has finished before running tests wait %1 rm -rf ~/.local/share/rr/* || true function xvnc-runner { CMD=$1 EXPECT=$2 rm -f /tmp/xvnc /tmp/xvnc-client /tmp/xvnc-wininfo /tmp/xvnc-client-replay || true Xvnc :9 > /tmp/xvnc 2>&1 & for retries in `seq 1 60`; do if grep -q "Listening" /tmp/xvnc; then break fi if [[ $retries == 60 ]]; then echo FAILED: too many retries of $CMD exit 1 fi sleep 1 done DISPLAY=:9 ~/obj/bin/rr $CMD > /tmp/xvnc-client 2>&1 & for retries in `seq 1 60`; do DISPLAY=:9 xwininfo -tree -root > /tmp/xvnc-wininfo 2>&1 if grep -q "$EXPECT" /tmp/xvnc-wininfo; then break fi if [[ $retries == 60 ]]; then echo FAILED: too many retries of $CMD exit 1 fi sleep 1 done # kill Xvnc kill -9 %1 # wait for $CMD to exit. Since we killed the X server it may # exit with a failure code. wait %2 || true ~/obj/bin/rr replay -a > /tmp/xvnc-client-replay 2>&1 || (echo "FAILED: replay failed"; exit 1) diff /tmp/xvnc-client /tmp/xvnc-client-replay || (echo "FAILED: replay differs"; exit 1) echo PASSED: $CMD } if [[ $test_firefox == 1 ]]; then rm -rf /tmp/firefox /tmp/firefox-profile || true mkdir /tmp/firefox-profile ( cd /tmp; curl -L 'https://download.mozilla.org/?product=firefox-latest&os=linux64&lang=en-US' | tar -jxf - ) xvnc-runner "/tmp/firefox/firefox --profile /tmp/firefox-profile $HOME/rr/release-process/test-data/test.html" "rr Test Page" fi if [[ $test_libreoffice == 1 ]]; then rm -rf ~/.config/libreoffice || true xvnc-runner "libreoffice $HOME/rr/release-process/test-data/rr-test-doc.odt" "rr-test-doc.odt" fi if [[ $build_dist != 0 ]]; then ninja package rm /tmp/dist || true ln -s ~/obj/dist /tmp/dist fi rr-5.7.0/release-process/test-data/000077500000000000000000000000001450675474200171465ustar00rootroot00000000000000rr-5.7.0/release-process/test-data/rr-test-doc.odt000066400000000000000000000177301450675474200220310ustar00rootroot00000000000000PK4ToH^Æ2 ''mimetypeapplication/vnd.oasis.opendocument.textPK4ToHS‘ÆjjThumbnails/thumbnail.png‰PNG  IHDRµÂýÇé0PLTE„„„œœœ¦¦¦®®®´´´½½½ÅÅÅËËËÓÓÓÜÜÜâââóóóþþþÿÿÿ'µ#õIDATxÚíÒÁ „0AcŒ÷0»ù§ËŸÿ=,UGPM«kÔÔÔÔÔÔÔÔÔÔÔÔŸ~=*Ÿšý®\ûlg?zÖlã8÷QXWÚqÖþ’Ž:ùù„¸=IßߣxPÜ7ÏרaBCŠ›¼.Ã*T#á0»z²}Í4w""<-iˆ³^¨ÿ ²Ÿ]‘†QCçL #éM5J¡Ê@A¡¨:‹Øãù «ý9Ñy:(vå‹j¨¦-g& Ý<ƒB†Æãku³üHÒH6B'wÀÈÞãq_«¼¨ü^œ-›;ß6ìß>*‚{^/&~)ç¦ä€TcáÒÊ€qã Ú—w‡þ[ˆÆØ®:œ){By™ø{±’ÏÝ÷wû´×êŽgäÎæ™›¸GÏ­Ï£Ôí¢„GÄ{›PK{9œŽ-PK4ToH styles.xmlíZK¯Û¸Þ÷WLw´-?âkOœY¤(f€$fÒM7Z¢-6”(”ùõ=$E‰’%_åÞxÐÍâæyðð;’zûó9e£#’òl„ãi0"YÄcš¶Á??ÿ=?¿ûË[¾ßÓˆlb)É’êˆp&7–¸ ‘m8–Tn2œ¹Qцç$sBŸ{c¦²#FÙPqÃìK+rVC…5oCï†Ïl˜}éXàÓPaÍ ˜úâ{>Tø,Úsñ4ÇŠ¶¬83š}Ù‰Rùf29NãÓ|ÌÅa®×뉡VG_^f¸âhBÑ“ÉI8'Ž7% µOóú&eEº#b04Xá+¯ÊãapD=ÐD ƒcÃ07Ý;‡»wû²)VIOž&hþ|üPÇ‚H‡Î¥yPE‚惗i¹}yÎyeª° jÌM§‹‰ýíqŸn²ŸUDxìÑMö³¨Bœ§] _8DŽ:L·Ð‹îÕ¼œ’s¡*Cöà  3«Ò+Q)ëO/Mu¬Ǭ`Î|©ŽŽ”œ~•ó¶Ö-˜2ôœˆaòëÔMp:Ñ‚ØßóÍ T!ž+“·Gúw)"ók%Qè¼ ¦ãy”v/-¢‚ÆAŸEÌq]J¸ _¹®=šuöt“ù¨]³ÂÆ4Tëk‡Îdë8Q• Û7ï1“^ÄåX`ƒwmCÒüŠë9 iL¸eÅ,O°›À˜±CO .¢‘rÝhÛRƒ8HíAE³˜èÝXŸ/üÅ8#°·AÜð\ê¨ë7»b×v_­¦`È´WÍäe°(Q†Q}…‚ÉÐ%ý ôp–+3Æpv(ð†Hf"Øß”€ˆùô¯ !¢`B_ˆÈÌê:çDЦàì™d¯E´N$œŽ—y…º³ÈQ¿&ŽRšæï?]¡ÛjFα5yÅj±hM^QÚž¾"ýú)¨=Ú(CªFÁÍX—$—'\,£˜BeÈô$ÐN,Ã:a›Y“˜u¶þ?´ÿwBÛ<âJZ;I1Í>Šº˜œ]1å…LZ,¯È{Rðj+#~HÙK:Mt –Åp.u€¿vb$ø©59Œ´ö !9Rü@T¢otB>7±?aÙ5AzÅXÄAoÝpÎcXJ0r«Î´k}¿{)Þ«ª 3ÔmJ¦ÓØgø ̦ìx|é2ë¹ —bå Ëõþ¿˜™ý¿ßq¥ôIZƒp¦IödAÀÃÿfí·r‚i2Ó8`vÂù\-ê)4¦ßwuæþí~»¤-ê4~iU*(…^Ýe;E}½¶£×Kê«kVìJ“*æžqj½«©8ú×S]½¢:ÝŸIó®Ô½5gøâ%×È'¿&u_œ•½¹X ÍHÓU$„¥·¦éÃQúÝÆK–£01­r@1hë웂5|y´Fæ=6§Ãï…ÀßX²Ëºüš’mž:ìE¼9MÉ&Å»§Ÿö—ñžfÐŽi èŠá@ ÇõÁN¶˜•Ôá»Á°ÆÎ³±¢—VŽ¢_á{þŽ1D¾›1ôüýß—©Æ|^({%põ?,%h12r$¬d·èXHÕt)Ò/)Šg…¬.b¥hZŸÄ%58øÊäqwxd•C¦ož»Ô¶XJÝfpm5?‘í.¶œB?x“WgE7¿.«7®RORs}² O)k#{U‹Ô8w.¨}xDzöPH‡ãiø¦iŸÒ@ÚîŽôüÁž­z€®-œÍøÝa^<ÌËÙ¢çšÒÚîŽôòÁ^­žz®)-¤-áîH¿y(¤gãé¼{;ô) ¤áîH¯ éÙS÷vèSZH[ÂÝ‘~z0¤—ÝeÚ#´p^þ)Ezý`0¯Ö}8×”Жpw¤ÃéCA=OÝû¡Oi@í¯ºIòñϸ"Ž§ÙžŠòî¹" ò¾ç\éß]®ËÛÇý#fчl;è¥yÆóeìÉ\¿ói}îã(½Þá’,î3vèÔkDj º¦é½W°ŸD˜g†õº~µìB§TR£ }[Òh óªnþ¼¯IŒ¶ú#ýô:i„Á݈ÀÛøÞm\|ÌÓ0è`jÝpʉÆú›ÍÛ¦a„†à®Xgëñªw…å B\PýÝYéj.”ÀTí[¾ž¾î.¥9(Jƒú^‰[ßVØøC)>WkÑ÷ÏõWF%ƒ$¹Sg±˜Â ö©žÄ½Q£•~ÍNü×/Á],uþmÉ­.»püïB*66츀$vÞYþX?²ÚÏ{¦æ_à Òåg·Ú„`ý”i~L|¼ÁkEuD^‡`IH±¬tT³•ƒZÓÍ×Hßf/t½Lh©Ÿt‘ÿî?PKÿK¾guÑ/PK4ToHmeta.xml“O›0ÅïýÈÝ+ É‚¬ÔCO+µRSio‘cÏ·`#c–ôÛ×üKÙl=òæ7~oÆ&º4µ÷¦“Z( BäâZHUèç᫟¢§òS®__%*4ïPÖoÀ2ϵªŽÎ¥õFQÍ:ÙQÅè¨åT· Öº¥éd4+—Zªß:[ÛRŒ‡a†8ЦÂQ–exª®¨àW®íM=Q‚c¨atèpDxeÇ„ÿjd·‘´ÖW£ŸCOv$ <¯te„¨ï àØ»„Ì2ÿMÂðyËø›…T®Ûc”ù†`Ö¾k†’„ÑÞc?ÚHL㘆A=&$!q–ã;¹àô~kœiLHºÏ’ã›]AHë.Þ½™Î*¿bòc1øP|ßÃÿðº2º¡yf¯Ï§³îˆÎJîMºe§|®{e änaeê¢>ýnoÕvCFWͰʰö|[´«FŸÍ-˜µ-å:œ¥…®eÜyÜr)ÂËl(p‹Ñ¦|–'ߦ+Å» vyx–ª¿_ÒýqŸxàØ=N„ÃðáK/ká“eÿÎËñ»÷ïý‹å_PK175¢½ÉPK4ToH settings.xml½ZQsÚ8~¿_‘á=%$mš0  ¥¥%Á´™ë›°¬CÖz$9À¿¿• iJð•bëžH°½+ïî÷í·7V‰8y¥9ÊÛZãÍYídˆ—óÛÚ·Iïôªö¡õ× Îf<„f„a–€4§Œ¡[ô =.u3¿|[Ë”l"Ó\7%K@7MØÄäö±æË»›ÎYþÍJp¹¸­ÅƤÍz}¹\¾Y^¼A5¯7®¯¯ëîêöÖåŒÏu•ßýÒ">;²ä‹qÎÎÏÎÞÖóÿk'›E¾Íy­µÃöõ[7ùÇ)7ØØœl¾¶K»­‘Ëæ‡åsÔjûžûõ™ït[›`ZÛ^1ë”®”óZëì¦þÚÄáf03>ì>òÈÄ{ _\¾{Û(gü3ðy¼Ùçwóã¬1.ÇQA7frzÇÃQ“µ–Qç£/; —î1‚"ë3&ôÁæO–žrÁ ¢×ÁÚ_`Zò~´³Tm·Á¶µ|d°­ÝÂÚ»~ÿ¾Dù!åüúêêX«šOTg¶rd;«ãBˆ/®KÙî 1˜âïHtÿ@L&dj·ØbT¦ Ø3ÓE‘%rÓUYï .*õë¸ôXhPí_{ãìÈÕ÷uBQOÑG,}Ï—/É¥èò†¯öß@½ñðnš‘)f¨7ÿI[ƒ™‘BR<$ÍÙ·„1b^I¾~²Ýaáb®0“»$]•“.R2Qøz‡¶”h\úöÀ§:ýOŠ¥1}-ßѬÏ÷Øêá587¸~p¡¤ z “L¶Ûm*+÷ ¬ÌH°bì²UnÚi*Ö#¦ØÜfõž©EUÂ̲d .&ž^n v€B”•°?aÓ!§×™séÁþG MÔ#Öh ãA¦ûØä?RV‚„ áÁÏOž³ù·ñºSl騾zg]ÁÓ¢M¦Š¡]ÂCO 3†‘˜zÀÄ%Þ‡—orJãÒ¹ÙćWaÁZÓ_u¶ÉLàÒÓNl˜|Ñx@‘ð§^0h1×e© I{Q;º¼ô ‚¼ÀÇ$5ë.a¡X}À;LƒÄÛ‚Ï¥ÝÇ©ÞM;Êuïp6#áè#/}=`Sw›­(_­ T)…0ÌŒô€œø›ƒˆ´ÏfÚŸKTÐãJ›yíÓ( M_útù-˜+&¤‚þöò‰Ÿ…G¤©—_|(8GáDwŒº¯TIåI¦ë¶Œ(*r¡I¨Ù|w™3áô¾LkŠhŠ{ õ ÿ25,©ÑȯHå) ý”mÀžà“À){& àÂSié1ج<ÁsHV‘±æ»ê™òTk5Îß_½»8rkî—9~<øâX×òþsÎ+;ÖPí* >OÜô™º¥Ué>^ rªÜQÔm¦<”Q϶¨vf0§õêä$â³!í Wõ¯ÑÉEH<ØÅ$¥!Æž^U¾½j9ê{~26”]ÚǨ}ã3㛆a‰ÏžZ¦u›ºÇ1ŸÇ§Q™ícÇjï d[s&G™¤9ÓcO´›$ †Óˆ#G¨¹õä©geSc«®'ÖÔúCª-ˆÚî?0Ú h‚Ov,ñ0Ä‘¯+ªÉÄXä±™RT{[f?ÌTx¼k —]uÓäÙe2„­™’èùÝfL):Îëw»Ã”zšäŠü„ðRª“årj«íŽr¡ENŸw4u_¸öÑ¡7Û¤” ‘áÿ+¬wÝPÍ]¾ípÉÔú°eL­¤ßà/ˆ••§Ï“| $ ù³†’(øÌG¸ÈÑq7åõj`ÖôÞÁŒeÂG¦í7Êâ•#OhÝf •o`0Ýö‘êÓ|‡h¾dÚðÙÚ’~ä&¾g2c¢cw3=Ä̹ƒc¡Ø6FÙy•$ æ}(š_™žZÂd´G¹‚;R› Ecb1”âÔ«ŠY—¥v‡Ü²?aQ¥‘þ·€•;£Ù}ßiز =*hëg±½=ŠnU$|Ü£—½ým_|ÁÁ^¦_×1¬«Š~ àøë¯~@W/úiaë_PK×éhœ(PK4ToH'Configurations2/accelerator/current.xmlPKPK4ToHConfigurations2/toolpanel/PK4ToHConfigurations2/floater/PK4ToHConfigurations2/menubar/PK4ToHConfigurations2/images/Bitmaps/PK4ToHConfigurations2/popupmenu/PK4ToHConfigurations2/progressbar/PK4ToHConfigurations2/toolbar/PK4ToHConfigurations2/statusbar/PK4ToH manifest.rdfÍ“Ínƒ0„ï<…eÎØ@/r(ʹjŸÀ5†X/òšÞ¾Ž“VQ¤ªêŸÔã®F3ߎ´›íaÈ‹²¨ÁT4c)%ÊHhµé+:».¹¥Û:ÚØ¶+šñjƒ¥Ÿ*ºwn*9_–…-7 lϳ¢(xšóPK4ToH^Æ2 ''mimetypePK4ToHS‘ÆjjMThumbnails/thumbnail.pngPK4ToH{9œŽ- ícontent.xmlPK4ToHÿK¾guÑ/ ´styles.xmlPK4ToH175¢½Éameta.xmlPK4ToH×éhœ( Tsettings.xmlPK4ToH'œConfigurations2/accelerator/current.xmlPK4ToHóConfigurations2/toolpanel/PK4ToH+Configurations2/floater/PK4ToHaConfigurations2/menubar/PK4ToH—Configurations2/images/Bitmaps/PK4ToHÔConfigurations2/popupmenu/PK4ToH Configurations2/progressbar/PK4ToHFConfigurations2/toolbar/PK4ToH|Configurations2/statusbar/PK4ToH´÷hÒƒ ´manifest.rdfPK4ToHõ#‡>óMETA-INF/manifest.xmlPKpRrr-5.7.0/release-process/test-data/test.html000066400000000000000000000001341450675474200210110ustar00rootroot00000000000000

Hello! rr-5.7.0/release-process/test-system.py000077500000000000000000000205401450675474200201370ustar00rootroot00000000000000#!/usr/bin/python3 import argparse import boto3 from datetime import datetime import json import pathlib import subprocess import sys import time parser = argparse.ArgumentParser() parser.add_argument('distro_config_json') parser.add_argument('architecture') parser.add_argument('keypair_pem_file') parser.add_argument('--cpack-generators', default='TGZ') parser.add_argument('--git-revision', default='master') parser.add_argument('--timeout', default=1200) # 20 minutes parser.add_argument('--machine-type') parser.add_argument('--keep-vm', action='store_true') parser.add_argument('--keep-vm-on-error', action='store_true') parser.add_argument('--dist-files-dir') args = parser.parse_args() class Ec2Vm: def __init__(self, machine_type, architecture, distro_config, keypair_pem_file): """Start an EC2 VM using the latest available AMI. If this completes without throwing an exception, then terminate() should be called eventually (unless you want to keep the VM running).""" self.distro_name = distro_config['name'] self.user = distro_config['user'] self.keypair_pem_file = keypair_pem_file self.ssh_ready = False self.ec2 = boto3.resource('ec2') self.ec2_client = boto3.client('ec2') response = self.ec2_client.describe_images(Owners=[distro_config['ami_owner']], Filters=[ {'Name': 'architecture', 'Values': [architecture]}, {'Name': 'name', 'Values': [distro_config['ami_name_pattern']]} ], MaxResults=1000) images = response['Images'] if len(images) >= 1000: raise Exception('Too many AMIs match filter') if len(images) == 0: raise Exception('No AMIs match filter') latest_image = sorted(map(lambda image: ( datetime.strptime(image['CreationDate'], '%Y-%m-%dT%H:%M:%S.%f%z'), image ), response['Images']))[-1][1] ami = latest_image['ImageId'] block_device = None for mapping in latest_image['BlockDeviceMappings']: if 'Ebs' in mapping: if block_device is not None: raise Exception('Multiple block devices found') block_device = mapping['DeviceName'] if block_device is None: raise Exception('No block device found') print('Found AMI %s created %s with block device %s'%(ami, latest_image['CreationDate'], block_device), file=sys.stderr) tags = [{ 'ResourceType': 'instance', 'Tags': [{'Key': 'Name', 'Value': "rr-test %s %s"%(self.distro_name, architecture)}] }] response = self.ec2.create_instances(ImageId=ami, InstanceType=machine_type, KeyName='rr-testing', MinCount=1, MaxCount=1, BlockDeviceMappings=[{'DeviceName': block_device, 'Ebs': {'VolumeSize': 32}}], InstanceInitiatedShutdownBehavior='terminate', SecurityGroups=['rr-testing'], TagSpecifications=tags) self.instance = response[0] print('Starting VM %s "%s"'%(self.instance.id, self.distro_name), file=sys.stderr) def wait_for_ssh(self): """Wait until the instance is ready to accept ssh commands.""" self.instance.wait_until_running() self.instance.reload() print('Started VM %s "%s" at %s'%(self.instance.id, self.distro_name, self.instance.public_ip_address), file=sys.stderr) for retries in range(60): result = subprocess.run(self.ssh_command() + ['true'], stdin=subprocess.DEVNULL, stderr=subprocess.PIPE) if result.returncode == 0: self.ssh_ready = True return if (b'Connection refused' not in result.stderr and b'reset by peer' not in result.stderr and b'Connection timed out' not in result.stderr): raise Exception('SSH connection failed:\n%s'%result.stderr.decode('utf-8')) time.sleep(1) raise Exception('Too many retries, cannot connect via SSH') def ssh(self, cmd, input): """Run `cmd` (command + args list) via SSH and wait for it to finish. Command stdout and stderr are echoed to our stdout/stderr. If the command fails, throws an exception with the exit status. Returns nothing.""" full_cmd = self.ssh_command() + cmd print('Running %s'%full_cmd, file=sys.stderr) process = subprocess.Popen(full_cmd, stdin=subprocess.PIPE) process.communicate(input=input, timeout=args.timeout) if process.returncode != 0: raise Exception('Command failed with %d'%process.returncode) def scp_from(self, options, src, dst): """Copies files from remote `src` to local `dst`.""" full_cmd = ['scp'] + self.ssh_options() + options + ['%s:%s'%(self.ssh_dest(), src), dst] print('Running %s'%full_cmd, file=sys.stderr) subprocess.check_call(full_cmd) def ssh_command(self): return ['ssh'] + self.ssh_options() + [self.ssh_dest()] def ssh_options(self): return ['-i', self.keypair_pem_file, '-o', 'StrictHostKeyChecking=no', '-o', 'BatchMode=yes', '-o', 'ConnectTimeout=5', '-o', 'IdentitiesOnly=yes'] def ssh_dest(self): return '%s@%s'%(self.user, self.instance.public_ip_address) def terminate(self): response = self.instance.terminate() if response['ResponseMetadata']['HTTPStatusCode'] != 200: print('Terminating VM %s failed: %s'%(self.instance_id, response), file=sys.stderr) self.instance.wait_until_terminated() with open(args.distro_config_json, 'r') as f: distro_config = json.load(f) with pathlib.Path(__file__).with_name('rr-testing.sh').open('rb') as f: rr_testing_script = f.read() def get_config_lines(config_key): entry = distro_config.get(config_key) if isinstance(entry, str): return [entry] if isinstance(entry, list): return entry if entry is None: return [] raise ValueError('Invalid config entry %s: %s' % (config_key, entry)) def get_config_lines_arch(config_key): return get_config_lines(config_key) + get_config_lines('%s_%s'%(config_key, args.architecture)) def config_script_function(config_key): lines = get_config_lines_arch(config_key) return ('function %s {\n%s\n}' % (config_key, '\n'.join(lines))) machine_type = args.machine_type if not machine_type: if args.architecture == 'x86_64': machine_type = 'c5.9xlarge' elif args.architecture == 'arm64': machine_type = 'c6g.8xlarge' vm = Ec2Vm(machine_type, args.architecture, distro_config, args.keypair_pem_file) success = False try: vm.wait_for_ssh() exclude_tests = get_config_lines_arch('exclude_tests') if args.architecture == 'arm64': # Currently AWS Graviton instances have a high failure rate on the `rseq` test # because of missed timer interrupts exclude_tests += ["rseq.*"] ctest_options = [] if exclude_tests: ctest_options = ['-E', '|'.join(exclude_tests)] full_script = '\n'.join( [ 'set -x', # echo commands config_script_function('setup_commands'), config_script_function('install_build_deps'), config_script_function('install_app_test_deps'), 'git_revision="%s"'%args.git_revision, 'staticlibs=%s'%('TRUE' if distro_config.get('staticlibs', True) else 'FALSE'), 'build_dist=%d'%(1 if args.dist_files_dir is not None else 0), # Firefox doesn't have release tarballs for Aarch64 'test_firefox=%d'%(1 if args.architecture == 'x86_64' else 0), # libreoffice uses STREX 'test_libreoffice=%d'%(1 if args.architecture == 'x86_64' else 0), 'ctest_options="%s"'%' '.join(c for c in ctest_options), 'cpack_generators="%s"'%args.cpack_generators ]).encode('utf-8') + b'\n' + rr_testing_script vm.ssh(['/bin/bash', '-s'], full_script) if args.dist_files_dir is not None: vm.scp_from(['-r'], '/tmp/dist/*', args.dist_files_dir) success = True finally: if (not success and args.keep_vm_on_error) or args.keep_vm: if vm.ssh_ready: print('VM kept; connect with: %s'%(' '.join(vm.ssh_command())), file=sys.stderr) else: print('VM %s still starting up'%vm.instance.id) else: vm.terminate() rr-5.7.0/rr.spec000066400000000000000000000025371450675474200134720ustar00rootroot00000000000000Buildroot: @CPACK_BINARY_DIR@/_CPack_Packages/@CPACK_SYSTEM_NAME@/RPM/@CPACK_PACKAGE_FILE_NAME@ Summary: Lightweight tool for recording and replaying execution of applications (trees of processes and threads) Name: @CPACK_PACKAGE_NAME@ Version: @CPACK_PACKAGE_VERSION@ Release: @CPACK_RPM_PACKAGE_RELEASE@ License: @CPACK_RPM_PACKAGE_LICENSE@ Group: Development/Debuggers Vendor: @CPACK_PACKAGE_VENDOR@ Prefix: @CPACK_PACKAGING_INSTALL_PREFIX@ @CPACK_RPM_PACKAGE_REQUIRES@ %define _rpmfilename @CPACK_PACKAGE_FILE_NAME@.rpm %define _unpackaged_files_terminate_build 0 %description rr is a lightweight tool for recording and replaying execution of applications (trees of processes and threads). For more information, please visit http://rr-project.org # This is a shortcutted spec file generated by CMake RPM generator # we skip _install step because CPack does that for us. # We do only save CPack installed tree in _prepr # and then restore it in build. %files %defattr(-,root,root,-) @CPACK_PACKAGING_INSTALL_PREFIX@/lib64/* @CPACK_PACKAGING_INSTALL_PREFIX@/bin/rr @CPACK_PACKAGING_INSTALL_PREFIX@/bin/rr_exec_stub* @CPACK_PACKAGING_INSTALL_PREFIX@/bin/signal-rr-recording.sh @CPACK_PACKAGING_INSTALL_PREFIX@/share/rr/*.xml %changelog * Tue Jun 25 2013 Chris Jones - - Initial build. rr-5.7.0/scripts/000077500000000000000000000000001450675474200136535ustar00rootroot00000000000000rr-5.7.0/scripts/checkpoint-visualizer.html000066400000000000000000000053271450675474200210720ustar00rootroot00000000000000
CheckpointTimeTime to next
rr-5.7.0/scripts/reformat.sh000077500000000000000000000001211450675474200160230ustar00rootroot00000000000000#!/bin/sh find src -regex '.*\.\(c\|h\|cc\)$'|xargs clang-format -style=file -i rr-5.7.0/scripts/rr-collect-symbols.py000077500000000000000000000164701450675474200177740ustar00rootroot00000000000000#!/usr/bin/env python3 import errno import glob import os import re import shutil import subprocess import sys import tempfile from urllib.request import urlretrieve from urllib.error import HTTPError, ContentTooShortError # Usage: rr-collect-symbols.py [ | ] # # Given a , downloads the zip/.tar.zst file at , uncompresses it, # runs "gunzip" on any .gz files, and for any ELF files found whose build-ids # match the build-id of an ELF file in the trace, moves it into the trace. # # Given a , which must contain a .build-id directory with the usual # structure (e.g. as Ubuntu and Fedora create under /usr/lib/debug), searches # the directory tree for any ELF files whose build-ids match the build-id of # an ELF file in the trace and copies them into the trace. defaults to # "/usr/lib/debug", which will grab any available system debuginfo files # in Ubuntu and Fedora at least. # # This script assumes that the trace-dir has been packed via `rr pack` so all # relevant files actually appear in the trace-dir. # It also assumes rr is on the PATH. # # The debuginfo files are placed in the trace under a "debug" subdirectory, # in a ".build-id" subdirectory with the usual structure. # # If a debuginfo file contains a .gnu_debugaltlink section then we also # attempt to find the referenced file and copy it into the trace with the # same file name as the .debug file, but with a .sup suffix. if len(sys.argv) < 2: print("Usage: rr-collect-symbols.py [ | ]", file=sys.stderr) sys.exit(1) trace_dir = sys.argv[1] if len(sys.argv) < 3: source = "/usr/lib/debug" else: source = sys.argv[2] rr_buildid = subprocess.Popen(["rr", "buildid"], stdin=subprocess.PIPE, stdout=subprocess.PIPE) def build_id_for(file): global rr_buildid rr_buildid.stdin.write(("%s\n"%file).encode('utf-8')) try: rr_buildid.stdin.flush() except BrokenPipeError: print("Can't write to rr, termination code %s"%rr_buildid.returncode, file=sys.stderr) sys.exit(2) return rr_buildid.stdout.readline().rstrip().decode('utf-8') altref_regex = re.compile(rb"^\s+\[\s*0\]\s+(.*)"); def find_altref(file): proc = subprocess.Popen(["readelf", "-p", ".gnu_debugaltlink", file], stdout=subprocess.PIPE, stderr=subprocess.DEVNULL) try: for line in proc.stdout: m = altref_regex.match(line) if m: return m.group(1).rstrip() finally: proc.wait() return None def find_altref_for_trace_file(trace_file, altref): proc = subprocess.Popen(["rr", "filename", trace_file], stdout=subprocess.PIPE) try: for line in proc.stdout: file = line.rstrip() altref_file = os.path.join(os.path.dirname(file), altref) if os.path.isfile(altref_file): return altref_file finally: proc.wait() return None def mkdir_p(path): try: os.makedirs(path) except OSError as exc: if exc.errno == errno.EEXIST and os.path.isdir(path): pass else: raise # 'dst' must be a complete file name, not a directory. def copy_file(src, dst): try: # Remove the destination file in case it's a hard link # or owned by someone else. os.remove(dst) except: pass shutil.copy(src, dst) # 'dst' must be a complete file name, not a directory def create_link(src, dst): try: # Remove the destination file in case it's wrong. os.remove(dst) except: pass os.symlink(src, dst) def collect_trace_build_ids(): ret = {} for file in glob.iglob("%s/mmap_*"%trace_dir): build_id = build_id_for(file) if build_id: ret[build_id] = True altref = find_altref(file) if altref: altref_file = find_altref_for_trace_file(file, altref) if not altref_file: print("WARNING: Can't find alt file %s for %s"%(altref, file)) continue dir = "%s/debug/.build-id/%s"%(trace_dir, build_id[:2]) mkdir_p(dir) copy_file(altref_file, "%s/%s.sup"%(dir, build_id[2:])) return ret trace_build_ids = collect_trace_build_ids() def collect_archive(url): is_tar_zst = url.endswith(".tar.zst") tmp_dir = tempfile.mkdtemp(dir=trace_dir) if is_tar_zst: tmp_file_name = "%s/archive.tar.zst"%tmp_dir else: # Assume its a ZIP tmp_file_name = "%s/archive.zip"%tmp_dir try: (file, headers) = urlretrieve(url, tmp_file_name) except (HTTPError, ContentTooShortError) as exc: print("Failed to load archive %s: %s"%(url, exc), file=sys.stderr) sys.exit(2) if is_tar_zst: subprocess.check_call(["tar", "-C", tmp_dir, "-I", "zstd", "-xvf", file]) else: subprocess.check_call(["unzip", "-d", tmp_dir, file]) os.remove(file) for root, dirs, files in os.walk(tmp_dir): for name in files: file = os.path.join(root, name) if file.endswith(".gz"): subprocess.check_call(["gunzip", file]) file = file[:-3] build_id = build_id_for(file) if build_id and build_id in trace_build_ids: dir = "%s/debug/.build-id/%s"%(trace_dir, build_id[:2]) mkdir_p(dir) dst = "%s/%s.debug"%(dir, build_id[2:]) os.rename(file, dst) else: os.remove(file) shutil.rmtree(tmp_dir) def collect_filesystem(path): for root, dirs, files in os.walk(path): for name in files: file = os.path.join(root, name) if not os.path.islink(file): build_id = build_id_for(file) if build_id and build_id in trace_build_ids: dir = "%s/debug/.build-id/%s"%(trace_dir, build_id[:2]) mkdir_p(dir) copy_file(file, "%s/%s.debug"%(dir, build_id[2:])) altref = find_altref(file) if altref: altref = altref.decode('utf-8') altref_file = os.path.join(os.path.dirname(file), altref) copy_file(altref_file, "%s/%s.sup"%(dir, build_id[2:])) if altref.startswith("../../../.dwz/"): mkdir_p("%s/.dwz"%trace_dir) src = "../debug/.build-id/%s/%s.sup"%(build_id[:2], build_id[2:]) create_link(src, "%s/.dwz/%s"%(trace_dir, altref[14:])) elif altref.startswith("../../.dwz/"): mkdir_p("%s/debug/.dwz"%trace_dir) src = "../.build-id/%s/%s.sup"%(build_id[:2], build_id[2:]) create_link(src, "%s/debug/.dwz/%s"%(trace_dir, altref[11:])) elif altref.startswith("../.dwz/"): mkdir_p("%s/debug/.build-id/.dwz"%trace_dir) src = "../%s/%s.sup"%(build_id[:2], build_id[2:]) create_link(src, "%s/debug/.build-id/.dwz/%s"%(trace_dir, altref[8:])) if re.search("^[^:/]+:", source): collect_archive(source) else: collect_filesystem(source) rr_buildid.terminate() rr-5.7.0/scripts/rr_completion000066400000000000000000000015461450675474200164600ustar00rootroot00000000000000# vi:syntax=sh # # completion script for rr commands (to be sourced) _rr_subcmd_completion() { local cmd=$1 local short_opts=$(rr help $cmd | sed -n 's/\s*-\([a-zA-Z]\),.*/-\1/p') local long_opts=$(rr help $cmd | sed -n 's/.*--\([^= ]*\).*/--\1/p') echo "$short_opts" "$long_opts" } _rr_completion() { COMPREPLY=() local rr_commands="$(rr --list-commands | cut -s -d ' ' -f 3)" # completion for rr if [ $COMP_CWORD -eq 1 ]; then COMPREPLY=( $( compgen -W "$rr_commands" -- "${COMP_WORDS[1]}" ) ) return fi # completion for rr 's options local cmd="$(echo "${COMP_WORDS[1]}" | tr -d '[:space:]')" if [ "$(echo $rr_commands | grep -w "$cmd")" ] ; then COMPREPLY=( $( compgen -W "$(_rr_subcmd_completion "$cmd")" -- "${COMP_WORDS[COMP_CWORD]}" ) ) fi } complete -F _rr_completion rr rr-5.7.0/scripts/signal-rr-recording.sh000077500000000000000000000010621450675474200200610ustar00rootroot00000000000000#!/usr/bin/env bash signal=$1 if [[ "$signal" == "" ]]; then echo "Usage: $0 " >&2 echo "Sends to all processes being recorded by rr" >&2 exit 1 fi function signal_descendants { pid=$1 for child in `ps -o pid= --ppid $pid`; do echo Sending $signal to $child kill -s $signal $child signal_descendants $child done } for rr_pid in `pidof rr` ; do if cat /proc/$rr_pid/cmdline | tr '\0' '\n' | head -n2 | tail -n1 | grep -qz '\(^record$\)\|/' ; then signal_descendants $rr_pid fi done rr-5.7.0/scripts/tag-release.sh000077500000000000000000000020021450675474200163750ustar00rootroot00000000000000#!/usr/bin/env bash function fatal { why=$1; echo "[FATAL]" $why >&2 exit 1 } major=$1 minor=$2 patch=$3 ver="$major.$minor.$patch" echo "Preparing for release '$ver' ..." if [[ $major == "" || $minor == "" || $patch == "" ]]; then fatal "Usage: ./tag-release.sh MAJOR MINOR PATCH" fi verfile=CMakeLists.txt echo "Patching $verfile with new version string ..." sed -i "s/rr_VERSION_MAJOR [0-9][0-9]*/rr_VERSION_MAJOR $major/g" $verfile sed -i "s/rr_VERSION_MINOR [0-9][0-9]*/rr_VERSION_MINOR $minor/g" $verfile sed -i "s/rr_VERSION_PATCH [0-9][0-9]*/rr_VERSION_PATCH $patch/g" $verfile echo "Showing diff for $verfile ..." git diff -p -U8 echo -n "Is this what you expected to see? [Y/n] " read ok if [[ $ok != "Y" ]]; then fatal "Oops. Aborting version update by user request." fi echo "Generating git commit ..." git commit $verfile -m "Bump version to $ver." echo "Generating git tag $ver ..." git tag $ver echo "Done! Publish the new version using 'git push --all' or 'git push; git push --tags'." rr-5.7.0/scripts/update-gh-pages.sh000077500000000000000000000013461450675474200171710ustar00rootroot00000000000000#!/usr/bin/env bash function fatal { why=$1; echo "[FATAL]" $why >&2 exit 1 } rev=HEAD if [[ $1 != "" ]]; then rev=$1 fi ver=`git name-rev --name-only --tags $rev` if [[ $ver == undefined ]]; then fatal "No tag found" fi echo "Updating repo ..." git checkout gh-pages || fatal "Failed to checkout gh-pages branch." verfile=index.html echo "Patching $verfile with new version $ver ..." sed -i "s/[^<]*$ver= 0 and ssb_mode & PR_SPEC_PRCTL: mitigated = (prctl(PR_SET_SPECULATION_CTRL, PR_SPEC_STORE_BYPASS, PR_SPEC_DISABLE, 0, 0) == 0) if not mitigated: print('Failed to enable SSB mitigation') else: ssb_status = 'mitigated' else: ssb_status = 'immutable' msrs = [read_msr(cpu) & BIT for cpu in cpus] if all(msr for msr in msrs): if ssb_status in ('mitigated', 'immutable') or args.check: print('Zen workaround in place') else: print('Zen workaround maybe in place.') elif args.reset or args.check: if all(not msr for msr in msrs): print('Zen workaround disabled') elif args.reset: print('Zen workaround somehow not entirely disabled?') else: print('Zen workaround not entirely enabled?') else: print('Zen workaround does not stick. Please see https://github.com/rr-debugger/rr/wiki/Zen') rr-5.7.0/scripts/zen_workaround.service000066400000000000000000000021761450675474200203120ustar00rootroot00000000000000# systemd service for AMD Zen `rr` workaround # See https://github.com/rr-debugger/rr/wiki/Zen for more details # To install: # - Save this file as `/etc/systemd/system/zen_workaround.service` # - Download the `zen_workaround.py` script to a secure location, example: # - sudo mkdir -p /usr/share/zen_workaround # - cd /usr/share/zen_workaround # - curl -L https://github.com/rr-debugger/rr/raw/master/scripts/zen_workaround.py | sudo tee -a zen_workaround.py >/dev/null # - chmod +x ./zen_workaround.py # - run `sudo systemctl enable zen_workaround` to enable on startup # - run `sudo systemctl start zen_workaround` to manually start it immediately # - run `systemctl status zen_workaround` to ensure that it completed successfully on your hardware [Unit] Description = Startup script for rr zen workaround [Service] # Step to actually run `zen_workaround.py`. ExecStart =+/usr/share/zen_workaround/zen_workaround.py # Only run this once, report it as "(active)" even after we've exited. Type = oneshot RemainAfterExit = yes [Install] WantedBy = default.target rr-5.7.0/snap/000077500000000000000000000000001450675474200131255ustar00rootroot00000000000000rr-5.7.0/snap/snapcraft.yaml000066400000000000000000000036341450675474200160000ustar00rootroot00000000000000name: rr base: core20 # the base snap is the execution environment for this snap version: git summary: low-overhead record-replay debugging tool description: | rr aspires to be your primary C/C++ debugging tool for Linux, replacing — well, enhancing — gdb. You record a failure once, then debug the recording, deterministically, as many times as you want. The same execution is replayed every time. rr also provides efficient reverse execution under gdb. Set breakpoints and data watchpoints and quickly reverse-execute to where they were hit. * Low overhead compared to other similar tools, especially on mostly-single-threaded workloads * Supports recording and replay of all kinds of applications: Firefox, Chrome, QEMU, LibreOffice, Go programs, ... * Record, replay and debug multiple-process workloads, including entire containers * Works with gdb scripting and [IDE integration](https://github.com/rr-debugger/rr/wiki/Using-rr-in-an-IDE) * [Durable](http://robert.ocallahan.org/2017/06/new-rr-pack-command.html), [compact](http://robert.ocallahan.org/2017/07/selecting-compression-algorithm-for-rr.html) traces that can be [ported](http://robert.ocallahan.org/2017/09/rr-trace-portability.html) between machines * [Chaos mode](http://robert.ocallahan.org/2016/02/introducing-rr-chaos-mode.html) to make intermittent bugs more reproducible grade: stable # must be 'stable' to release into candidate/stable channels confinement: classic apps: rr: command: usr/bin/rr parts: rr: plugin: cmake cmake-parameters: - -DCMAKE_INSTALL_PREFIX=/usr source: . source-type: git build-packages: - g++ - g++-multilib - gdb - pkg-config - coreutils - python3-pexpect - manpages-dev - ninja-build - capnproto - libcapnp-dev - zlib1g-dev stage-packages: - libcapnp-0.7.0 - zlib1g rr-5.7.0/src/000077500000000000000000000000001450675474200127535ustar00rootroot00000000000000rr-5.7.0/src/AddressSpace.cc000066400000000000000000002472301450675474200156330ustar00rootroot00000000000000/* -*- Mode: C++; tab-width: 8; c-basic-offset: 2; indent-tabs-mode: nil; -*- */ #include "AddressSpace.h" #include #include #include #include #include #include #include "rr/rr.h" #include "preload/preload_interface.h" #include "AutoRemoteSyscalls.h" #include "MonitoredSharedMemory.h" #include "RecordSession.h" #include "RecordTask.h" #include "Session.h" #include "Task.h" #include "core.h" #include "log.h" using namespace std; namespace rr { static const uint8_t x86_breakpoint_insn[] = { 0xcc }; // int $3 static const uint8_t arm64_breakpoint_insn[4] = {0x0, 0x0, 0x20, 0xd4}; // brk #0 static const uint8_t *breakpoint_insn(SupportedArch arch) { switch (arch) { case x86: case x86_64: return x86_breakpoint_insn; case aarch64: return arm64_breakpoint_insn; default: DEBUG_ASSERT(0 && "Must define breakpoint insn for this architecture"); return nullptr; } } /** * Advance *str to skip leading blank characters. */ static const char* trim_leading_blanks(const char* str) { const char* trimmed = str; while (isblank(*trimmed)) { ++trimmed; } return trimmed; } /** * Returns true if a task in t's thread-group other than t is doing an exec. */ static bool thread_group_in_exec(Task* t) { if (!t->session().is_recording()) { return false; } for (Task* tt : t->thread_group()->task_set()) { if (tt == t || t->already_exited()) { continue; } RecordTask* rt = static_cast(tt); Event& ev = rt->ev(); if (ev.is_syscall_event() && ev.Syscall().is_exec()) { return true; } } return false; } KernelMapIterator::KernelMapIterator(Task* t, bool* ok) : tid(t->tid) { // See https://lkml.org/lkml/2016/9/21/423 ASSERT(t, !thread_group_in_exec(t)) << "Task-group in execve, so reading " "/proc/.../maps may trigger kernel " "deadlock!"; init(ok); } KernelMapIterator::~KernelMapIterator() { if (maps_file) { fclose(maps_file); } } void KernelMapIterator::init(bool* ok) { char maps_path[PATH_MAX]; sprintf(maps_path, "/proc/%d/maps", tid); if (ok) { *ok = true; } if (!(maps_file = fopen(maps_path, "r"))) { if (ok) { *ok = false; } else { FATAL() << "Failed to open " << maps_path; } } ++*this; } void KernelMapIterator::operator++() { char line[PATH_MAX * 2]; if (!fgets(line, sizeof(line), maps_file)) { fclose(maps_file); maps_file = nullptr; return; } uint64_t start, end, offset, inode; int dev_major, dev_minor; char flags[32]; int chars_scanned; int nparsed = sscanf(line, "%" SCNx64 "-%" SCNx64 " %31s %" SCNx64 " %x:%x %" SCNu64 " %n", &start, &end, flags, &offset, &dev_major, &dev_minor, &inode, &chars_scanned); DEBUG_ASSERT(8 /*number of info fields*/ == nparsed || 7 /*num fields if name is blank*/ == nparsed); // trim trailing newline, if any int last_char = strlen(line) - 1; if (line[last_char] == '\n') { line[last_char] = 0; } raw_line = line; const char* name = trim_leading_blanks(line + chars_scanned); #if defined(__i386__) if (start > numeric_limits::max() || end > numeric_limits::max() || strcmp(name, "[vsyscall]") == 0) { // We manually read the exe link here because // this helper is used to set // |t->vm()->exe_image()|, so we can't rely on // that being correct yet. char proc_exe[PATH_MAX]; char exe[PATH_MAX]; snprintf(proc_exe, sizeof(proc_exe), "/proc/%d/exe", tid); ssize_t size = readlink(proc_exe, exe, sizeof(exe)); if (size < 0) { FATAL() << "readlink failed"; } FATAL() << "Sorry, tracee " << tid << " has x86-64 image " << exe << " and that's not supported with a 32-bit rr."; } #endif int prot = (strchr(flags, 'r') ? PROT_READ : 0) | (strchr(flags, 'w') ? PROT_WRITE : 0) | (strchr(flags, 'x') ? PROT_EXEC : 0); int f = (strchr(flags, 'p') ? MAP_PRIVATE : 0) | (strchr(flags, 's') ? MAP_SHARED : 0); string tmp_name; if (strchr(name, '\\')) { // Unescape any '\012' sequences while (*name) { if (strncmp(name, "\\012", 4) == 0) { tmp_name.push_back('\n'); name += 4; } else { tmp_name.push_back(*name); ++name; } } name = tmp_name.c_str(); } km = KernelMapping(start, end, name, MKDEV(dev_major, dev_minor), inode, prot, f, offset); } static KernelMapping read_kernel_mapping(pid_t tid, remote_ptr addr) { MemoryRange range(addr, 1); bool ok; KernelMapIterator it(tid, &ok); if (!ok) { return KernelMapping(); } for (; !it.at_end(); ++it) { const KernelMapping& km = it.current(); if (km.contains(range)) { return km; } } return KernelMapping(); } KernelMapping AddressSpace::read_kernel_mapping(Task* t, remote_ptr addr) { return rr::read_kernel_mapping(t->tid, addr); } KernelMapping AddressSpace::read_local_kernel_mapping(uint8_t* addr) { return rr::read_kernel_mapping(getpid(), remote_ptr((uintptr_t)addr)); } /** * Cat the /proc/[t->tid]/maps file to stdout, line by line. */ void AddressSpace::print_process_maps(Task* t) { for (KernelMapIterator it(t); !it.at_end(); ++it) { string line; it.current(&line); cerr << line << '\n'; } } AddressSpace::Mapping::Mapping(const KernelMapping& map, const KernelMapping& recorded_map, EmuFile::shr_ptr emu_file, std::unique_ptr mapped_file_stat, void* local_addr, shared_ptr&& monitored) : map(map), recorded_map(recorded_map), emu_file(emu_file), mapped_file_stat(std::move(mapped_file_stat)), local_addr(static_cast(local_addr)), monitored_shared_memory(std::move(monitored)), flags(FLAG_NONE) {} static unique_ptr clone_stat( const unique_ptr& other) { return other ? unique_ptr(new struct stat(*other)) : nullptr; } AddressSpace::Mapping::Mapping(const Mapping& other) : map(other.map), recorded_map(other.recorded_map), emu_file(other.emu_file), mapped_file_stat(clone_stat(other.mapped_file_stat)), local_addr(other.local_addr), monitored_shared_memory(other.monitored_shared_memory), flags(other.flags) {} AddressSpace::Mapping::~Mapping() {} AddressSpace::~AddressSpace() { for (auto& m : mem) { if (m.second.local_addr) { int ret = munmap(m.second.local_addr, m.second.map.size()); if (ret < 0) { FATAL() << "Can't munmap"; } } } session_->on_destroy(this); } static uint32_t find_offset_of_syscall_instruction_in(SupportedArch arch, uint8_t* vdso_data, size_t vdso_len) { auto instruction = syscall_instruction(arch); for (uint32_t i = 1; i < vdso_len - instruction.size(); ++i) { if (memcmp(vdso_data + i, instruction.data(), instruction.size()) == 0) { return i; } } return 0; } uint32_t AddressSpace::offset_to_syscall_in_vdso[SupportedArch_MAX + 1]; remote_code_ptr AddressSpace::find_syscall_instruction(Task* t) { SupportedArch arch = t->arch(); // This assert passes even if --unmap-vdso is passed because this only ever // gets called at the start of process_execve before we unmap the vdso. After // the rr page is mapped in, we use the syscall instructions contained therein ASSERT(t, has_vdso()) << "Kernel with vDSO disabled?"; if (!offset_to_syscall_in_vdso[arch]) { auto vdso_data = t->read_mem(vdso().start().cast(), vdso().size()); offset_to_syscall_in_vdso[arch] = find_offset_of_syscall_instruction_in( arch, vdso_data.data(), vdso_data.size()); ASSERT(t, offset_to_syscall_in_vdso[arch]) << "No syscall instruction found in VDSO"; } return remote_code_ptr( (vdso().start().cast() + offset_to_syscall_in_vdso[arch]) .as_int()); } void AddressSpace::map_rr_page(AutoRemoteSyscalls& remote) { int prot = PROT_EXEC | PROT_READ; int flags = MAP_PRIVATE | MAP_FIXED; string file_name; Task* t = remote.task(); SupportedArch arch = t->arch(); const char *fname = nullptr; switch (t->arch()) { case x86_64: case aarch64: fname = RRPAGE_LIB_FILENAME; break; case x86: #if defined(__x86_64__) fname = RRPAGE_LIB_FILENAME_32; #else fname = RRPAGE_LIB_FILENAME; #endif break; } string path = find_helper_library(fname); if (path.empty()) { FATAL() << "Failed to locate " << fname << "; needed by " << t->exe_path() << " (" << arch_name(t->arch()) << ")"; } path += fname; size_t offset_pages = t->session().is_recording() ? RRPAGE_RECORD_PAGE_OFFSET : RRPAGE_REPLAY_PAGE_OFFSET; size_t offset_bytes = offset_pages * PRELOAD_LIBRARY_PAGE_SIZE; { ScopedFd page(path.c_str(), O_RDONLY); ASSERT(t, page.is_open()) << "Failed to open rrpage library " << path; int child_fd = remote.infallible_send_fd_if_alive(page); if (child_fd >= 0) { if (t->session().is_recording()) { remote.infallible_mmap_syscall_if_alive(rr_page_start() - offset_bytes, offset_bytes, prot, flags, child_fd, 0); } remote.infallible_mmap_syscall_if_alive(rr_page_start(), PRELOAD_LIBRARY_PAGE_SIZE, prot, flags, child_fd, offset_bytes); struct stat fstat = t->stat_fd(child_fd); file_name = t->file_name_of_fd(child_fd); remote.infallible_close_syscall_if_alive(child_fd); map(t, rr_page_start(), PRELOAD_LIBRARY_PAGE_SIZE, prot, flags, offset_bytes, file_name, fstat.st_dev, fstat.st_ino); mapping_flags_of(rr_page_start()) = Mapping::IS_RR_PAGE; if (t->session().is_recording()) { map(t, rr_page_start() - offset_bytes, offset_bytes, prot, flags, 0, file_name, fstat.st_dev, fstat.st_ino); } } } if (t->session().is_recording()) { // brk() will not have been called yet so the brk area is empty. brk_start = brk_end = remote.infallible_syscall(syscall_number_for_brk(arch), 0); ASSERT(t, !brk_end.is_null()); } traced_syscall_ip_ = rr_page_syscall_entry_point( TRACED, UNPRIVILEGED, RECORDING_AND_REPLAY, t->arch()); privileged_traced_syscall_ip_ = rr_page_syscall_entry_point( TRACED, PRIVILEGED, RECORDING_AND_REPLAY, t->arch()); } void AddressSpace::unmap_all_but_rr_mappings(AutoRemoteSyscalls& remote, UnmapOptions options) { vector unmaps; for (const auto& m : maps()) { // Do not attempt to unmap [vsyscall] --- it doesn't work. if (m.map.start() != AddressSpace::rr_page_start() && m.map.start() != AddressSpace::preload_thread_locals_start() && !m.map.is_vsyscall() && (!options.exclude_vdso_vvar || (!m.map.is_vdso() && m.map.is_vvar()))) { unmaps.push_back(m.map); } } for (auto& m : unmaps) { remote.infallible_syscall(syscall_number_for_munmap(remote.task()->arch()), m.start(), m.size()); unmap(remote.task(), m.start(), m.size()); } } /** * Must match generate_rr_page.py */ static const AddressSpace::SyscallType entry_points[] = { { AddressSpace::TRACED, AddressSpace::UNPRIVILEGED, AddressSpace::RECORDING_AND_REPLAY }, { AddressSpace::TRACED, AddressSpace::PRIVILEGED, AddressSpace::RECORDING_AND_REPLAY }, { AddressSpace::UNTRACED, AddressSpace::UNPRIVILEGED, AddressSpace::RECORDING_AND_REPLAY }, { AddressSpace::UNTRACED, AddressSpace::UNPRIVILEGED, AddressSpace::REPLAY_ONLY }, { AddressSpace::UNTRACED, AddressSpace::UNPRIVILEGED, AddressSpace::RECORDING_ONLY }, { AddressSpace::UNTRACED, AddressSpace::PRIVILEGED, AddressSpace::RECORDING_AND_REPLAY }, { AddressSpace::UNTRACED, AddressSpace::PRIVILEGED, AddressSpace::REPLAY_ONLY }, { AddressSpace::UNTRACED, AddressSpace::PRIVILEGED, AddressSpace::RECORDING_ONLY }, { AddressSpace::UNTRACED, AddressSpace::UNPRIVILEGED, AddressSpace::REPLAY_ASSIST }, }; static int rr_page_syscall_stub_size(SupportedArch arch) { int val = 0; switch (arch) { case x86: case x86_64: val = 3; break; case aarch64: val = 8; break; default: FATAL() << "Syscall stub size not defined for this architecture"; } if (arch == NativeArch::arch()) { DEBUG_ASSERT(val == RR_PAGE_SYSCALL_STUB_SIZE); } return val; } static int rr_page_syscall_instruction_end(SupportedArch arch) { int val = 0; switch (arch) { case x86: case x86_64: val = 2; break; case aarch64: val = 4; break; default: FATAL() << "Syscall stub size not defined for this architecture"; } if (arch == NativeArch::arch()) { DEBUG_ASSERT(val == RR_PAGE_SYSCALL_INSTRUCTION_END); } return val; } static remote_code_ptr entry_ip_from_index(SupportedArch arch, size_t i) { return remote_code_ptr(RR_PAGE_ADDR + rr_page_syscall_stub_size(arch) * i); } static remote_code_ptr exit_ip_from_index(SupportedArch arch, size_t i) { return remote_code_ptr(RR_PAGE_ADDR + rr_page_syscall_stub_size(arch) * i + rr_page_syscall_instruction_end(arch)); } remote_code_ptr AddressSpace::rr_page_syscall_exit_point(Traced traced, Privileged privileged, Enabled enabled, SupportedArch arch) { for (auto& e : entry_points) { if (e.traced == traced && e.privileged == privileged && e.enabled == enabled) { return exit_ip_from_index(arch, &e - entry_points); } } return nullptr; } remote_code_ptr AddressSpace::rr_page_syscall_entry_point(Traced traced, Privileged privileged, Enabled enabled, SupportedArch arch) { for (auto& e : entry_points) { if (e.traced == traced && e.privileged == privileged && e.enabled == enabled) { return entry_ip_from_index(arch, &e - entry_points); } } return nullptr; } const AddressSpace::SyscallType* AddressSpace::rr_page_syscall_from_exit_point( SupportedArch arch, remote_code_ptr ip) { for (size_t i = 0; i < array_length(entry_points); ++i) { if (exit_ip_from_index(arch, i) == ip) { return &entry_points[i]; } } return nullptr; } const AddressSpace::SyscallType* AddressSpace::rr_page_syscall_from_entry_point( SupportedArch arch, remote_code_ptr ip) { for (size_t i = 0; i < array_length(entry_points); ++i) { if (entry_ip_from_index(arch, i) == ip) { return &entry_points[i]; } } return nullptr; } vector AddressSpace::rr_page_syscalls() { vector result; for (auto& e : entry_points) { result.push_back(e); } return result; } void AddressSpace::save_auxv(Task* t) { saved_auxv_ = read_auxv(t); save_interpreter_base(t, saved_auxv()); } void AddressSpace::save_interpreter_base(Task* t, std::vector auxv) { saved_interpreter_base_ = read_interpreter_base(auxv); save_ld_path(t, saved_interpreter_base()); } void AddressSpace::save_ld_path(Task* t, remote_ptr interpreter_base) { saved_ld_path_ = read_ld_path(t, interpreter_base); } void AddressSpace::read_mm_map(Task* t, NativeArch::prctl_mm_map* map) { char buf[PATH_MAX+1024]; { string proc_stat = t->proc_stat_path(); ScopedFd fd(proc_stat.c_str(), O_RDONLY); memset(buf, 0, sizeof(buf)); int err = read_to_end(fd, 0, buf, sizeof(buf)-1); if (err < 0) { FATAL() << "Failed to read /proc//stat"; } } // The last close-paren indicates the end of the comm and the // start of the fixed-width area char* fixed = strrchr(buf, ')'); // We don't change /proc/pid/exe, since we're unlikely to have CAP_SYS_ADMIN map->exe_fd = -1; // auxv is restored separately map->auxv.val = 0; map->auxv_size = 0; // All of these fields of /proc/pid/stat, we don't use (currently) char state; pid_t ppid; pid_t pgrp; int session; int tty_nr; int tpgid; unsigned int flags; unsigned long minflt, cminflt, majflt, cmajflt, utime, stime; long cutime, cstime, priority, nice, num_threads, itrealvalue; unsigned long long starttime; unsigned long vsize; long rss; unsigned long rsslim, kstkesp, kstskip, signal; unsigned long blocked, sigignore, sigcatch, wchan, nswap, cnswap; int exit_signal, processor; unsigned int rt_priority, policy; unsigned long long delayacct_blkio_ticks; unsigned long guest_time; long cguest_time; int exit_code; // See the proc(5) man page for the correct scan codes for these size_t n = sscanf(fixed + 1, // state ppid pgrp session tty_nr tpgid " %c %d %d %d %d %d" // flags minflt cminflt majflt cmajflt utime stime cutime cstime " %u %lu %lu %lu %lu %lu %lu %ld %ld" // priority nice num_threads itrealvalue starttime vsize rss " %ld %ld %ld %ld %llu %lu %ld" // rsslim startcode endcode startstack kstkesp kstskip signal " %lu %lu %lu %lu %lu %lu %lu" // blocked sigignore sigcatch wchan nswap cnswap exit_signal " %lu %lu %lu %lu %lu %lu %d" // processor rt_priority policy delayacct_blkio_ticks guest_time cguest_time " %d %u %u %llu %lu %ld " // start_data end_data start_brk arg_start arg_end env_start env_end exit_code " %lu %lu %lu %lu %lu %lu %lu %d", &state, &ppid, &pgrp, &session, &tty_nr, &tpgid, &flags, &minflt, &cminflt, &majflt, &cmajflt, &utime, &stime, &cutime, &cstime, &priority, &nice, &num_threads, &itrealvalue, &starttime, &vsize, &rss, &rsslim, (unsigned long *)&map->start_code, (unsigned long *)&map->end_code, (unsigned long *)&map->start_stack, &kstkesp, &kstskip, &signal, &blocked, &sigignore, &sigcatch, &wchan, &nswap, &cnswap, &exit_signal, &processor, &rt_priority, &policy, &delayacct_blkio_ticks, &guest_time, &cguest_time, (unsigned long *)&map->start_data, (unsigned long *)&map->end_data, (unsigned long *)&map->start_brk, (unsigned long *)&map->arg_start, (unsigned long *)&map->arg_end, (unsigned long *)&map->env_start, (unsigned long *)&map->env_end, &exit_code); ASSERT(t, n == 50); // Fill in brk end ASSERT(t, map->start_brk == this->brk_start.as_int()); map->brk = this->brk_end.as_int(); } void AddressSpace::post_exec_syscall(Task* t) { // First locate a syscall instruction we can use for remote syscalls. traced_syscall_ip_ = find_syscall_instruction(t); privileged_traced_syscall_ip_ = nullptr; do_breakpoint_fault_addr_ = nullptr; stopping_breakpoint_table_ = nullptr; stopping_breakpoint_table_entry_size_ = 0; // Now remote syscalls work, we can open_mem_fd. t->open_mem_fd(); // Set up AutoRemoteSyscalls again now that the mem-fd is open. AutoRemoteSyscalls remote(t); // Now we can set up the "rr page" at its fixed address. This gives // us traced and untraced syscall instructions at known, fixed addresses. map_rr_page(remote); // Set up the preload_thread_locals shared area. t->session().create_shared_mmap(remote, PRELOAD_THREAD_LOCALS_SIZE, preload_thread_locals_start(), "preload_thread_locals"); mapping_flags_of(preload_thread_locals_start()) |= AddressSpace::Mapping::IS_THREAD_LOCALS; } void AddressSpace::brk(Task* t, remote_ptr addr, int prot) { LOG(debug) << "brk(" << addr << ")"; remote_ptr old_brk = ceil_page_size(brk_end); remote_ptr new_brk = ceil_page_size(addr); if (old_brk < new_brk) { map(t, old_brk, new_brk - old_brk, prot, MAP_ANONYMOUS | MAP_PRIVATE, 0, "[heap]"); } else { unmap(t, new_brk, old_brk - new_brk); } brk_end = addr; } static const char* stringify_flags(int flags) { switch (flags) { case AddressSpace::Mapping::FLAG_NONE: return ""; case AddressSpace::Mapping::IS_SYSCALLBUF: return " [syscallbuf]"; case AddressSpace::Mapping::IS_THREAD_LOCALS: return " [thread_locals]"; case AddressSpace::Mapping::IS_PATCH_STUBS: return " [patch_stubs]"; default: return "[unknown_flags]"; } } void AddressSpace::dump() const { fprintf(stderr, " (heap: %p-%p)\n", (void*)brk_start.as_int(), (void*)brk_end.as_int()); for (auto it = mem.begin(); it != mem.end(); ++it) { const KernelMapping& m = it->second.map; fprintf(stderr, "%s%s\n", m.str().c_str(), stringify_flags(it->second.flags)); } } SupportedArch AddressSpace::arch() const { return (*task_set().begin())->arch(); } BreakpointType AddressSpace::get_breakpoint_type_for_retired_insn( remote_code_ptr ip) { remote_code_ptr addr = ip.undo_executed_bkpt(arch()); return get_breakpoint_type_at_addr(addr); } BreakpointType AddressSpace::get_breakpoint_type_at_addr(remote_code_ptr addr) { auto it = breakpoints.find(addr); return it == breakpoints.end() ? BKPT_NONE : it->second.type(); } bool AddressSpace::is_exec_watchpoint(remote_code_ptr addr) { for (auto& kv : watchpoints) { if (kv.first.contains(addr.to_data_ptr()) && (kv.second.watched_bits() & EXEC_BIT)) { return true; } } return false; } bool AddressSpace::is_breakpoint_in_private_read_only_memory( remote_code_ptr addr) { for (const auto& m : maps_containing_or_after(addr.to_data_ptr())) { if (m.map.start() >= addr.increment_by_bkpt_insn_length(arch()).to_data_ptr()) { break; } if ((m.map.prot() & PROT_WRITE) || (m.map.flags() & MAP_SHARED)) { return false; } } return true; } void AddressSpace::replace_breakpoints_with_original_values( uint8_t* dest, size_t length, remote_ptr addr) { for (auto& it : breakpoints) { remote_ptr bkpt_location = it.first.to_data_ptr(); remote_ptr start = max(addr, bkpt_location); remote_ptr end = min(addr + length, bkpt_location + bkpt_instruction_length(arch())); if (start < end) { memcpy(dest + (start - addr), it.second.original_data() + (start - bkpt_location), end - start); } } } bool AddressSpace::is_breakpoint_instruction(Task* t, remote_code_ptr ip) { bool ok = true; uint8_t data[MAX_BKPT_INSTRUCTION_LENGTH]; t->read_bytes_helper(ip.to_data_ptr(), bkpt_instruction_length(t->arch()), data, &ok); return memcmp(data, breakpoint_insn(t->arch()), bkpt_instruction_length(t->arch())) == 0 && ok; } static void remove_range(set& ranges, const MemoryRange& range) { if (ranges.empty()) { return; } auto start = ranges.lower_bound(range); // An earlier range might extend into range, so check for that. if (start != ranges.begin()) { --start; if (start->end() <= range.start()) { ++start; } } auto end = start; auto prev_end = start; while (end != ranges.end() && end->start() < range.end()) { prev_end = end; ++end; } if (start == end) { return; } MemoryRange start_range = *start; MemoryRange end_range = *prev_end; ranges.erase(start, end); if (start_range.start() < range.start()) { ranges.insert(MemoryRange(start_range.start(), range.start())); } if (range.end() < end_range.end()) { ranges.insert(MemoryRange(range.end(), end_range.end())); } } static void add_range(set& ranges, const MemoryRange& range) { // Remove overlapping ranges remove_range(ranges, range); ranges.insert(range); // We could coalesce adjacent ranges, but there's probably no need. } KernelMapping AddressSpace::map(Task* t, remote_ptr addr, size_t num_bytes, int prot, int flags, off64_t offset_bytes, const string& fsname, dev_t device, ino_t inode, unique_ptr mapped_file_stat, const KernelMapping* recorded_map, EmuFile::shr_ptr emu_file, void* local_addr, shared_ptr monitored) { LOG(debug) << "mmap(" << addr << ", " << num_bytes << ", " << HEX(prot) << ", " << HEX(flags) << ", " << HEX(offset_bytes) << ")"; num_bytes = ceil_page_size(num_bytes); KernelMapping m(addr, addr + num_bytes, fsname, device, inode, prot, flags, offset_bytes); if (!num_bytes) { return m; } remove_range(dont_fork, MemoryRange(addr, num_bytes)); remove_range(wipe_on_fork, MemoryRange(addr, num_bytes)); // The mmap() man page doesn't specifically describe // what should happen if an existing map is // "overwritten" by a new map (of the same resource). // In testing, the behavior seems to be as if the // overlapping region is unmapped and then remapped // per the arguments to the second call. unmap_internal(t, addr, num_bytes); const KernelMapping& actual_recorded_map = recorded_map ? *recorded_map : m; map_and_coalesce(t, m, actual_recorded_map, emu_file, std::move(mapped_file_stat), std::move(local_addr), std::move(monitored)); // During an emulated exec, we will explicitly map in a (copy of) the VDSO // at the recorded address. if (actual_recorded_map.is_vdso()) { vdso_start_addr = addr; } return m; } template void AddressSpace::at_preload_init_arch(Task* t) { auto params = t->read_mem( remote_ptr>(t->regs().orig_arg1())); if (t->session().is_recording()) { ASSERT(t, t->session().as_record()->use_syscall_buffer() == params.syscallbuf_enabled) << "Tracee thinks syscallbuf is " << (params.syscallbuf_enabled ? "en" : "dis") << "abled, but tracer thinks " << (t->session().as_record()->use_syscall_buffer() ? "en" : "dis") << "abled"; } else { if (params.breakpoint_table_entry_size == -1) { do_breakpoint_fault_addr_ = params.breakpoint_instr_addr.rptr().as_int(); } else { stopping_breakpoint_table_ = params.breakpoint_table.rptr().as_int(); stopping_breakpoint_table_entry_size_ = params.breakpoint_table_entry_size; } } if (!params.syscallbuf_enabled) { return; } syscallbuf_enabled_ = true; if (t->session().is_recording()) { monkeypatch_state->patch_at_preload_init(static_cast(t)); } } void AddressSpace::at_preload_init(Task* t) { RR_ARCH_FUNCTION(at_preload_init_arch, t->arch(), t); } const AddressSpace::Mapping& AddressSpace::mapping_of( remote_ptr addr) const { MemoryRange range(floor_page_size(addr), 1); auto it = mem.find(range); DEBUG_ASSERT(it != mem.end()); DEBUG_ASSERT(it->second.map.contains(range)); return it->second; } uint32_t& AddressSpace::mapping_flags_of(remote_ptr addr) { return const_cast( static_cast(this)->mapping_of(addr)) .flags; } uint8_t* AddressSpace::local_mapping(remote_ptr addr, size_t size) { MemoryRange range(floor_page_size(addr), 1); auto it = mem.find(range); if (it == mem.end()) { return nullptr; } DEBUG_ASSERT(it->second.map.contains(range)); const Mapping& map = it->second; // Fall back to the slow path if we can't get the entire region if (size > static_cast(map.map.end() - addr)) { return nullptr; } if (map.local_addr != nullptr) { size_t offset = addr - map.map.start(); return static_cast(map.local_addr) + offset; } return nullptr; } void* AddressSpace::detach_local_mapping(remote_ptr addr) { auto m = const_cast(mapping_of(addr)); void* p = m.local_addr; m.local_addr = nullptr; return p; } bool AddressSpace::has_mapping(remote_ptr addr) const { if (addr + page_size() < addr) { // Assume the last byte in the address space is never mapped; avoid overflow return false; } MemoryRange m(floor_page_size(addr), 1); auto it = mem.find(m); return it != mem.end() && it->first.contains(m); } bool AddressSpace::has_rr_page() const { MemoryRange m(RR_PAGE_ADDR, 1); auto it = mem.find(m); return it != mem.end() && (it->second.flags & Mapping::IS_RR_PAGE); } void AddressSpace::protect(Task* t, remote_ptr addr, size_t num_bytes, int prot) { LOG(debug) << "mprotect(" << addr << ", " << num_bytes << ", " << HEX(prot) << ")"; MemoryRange last_overlap; auto protector = [this, prot, &last_overlap](Mapping m, MemoryRange rem) { LOG(debug) << " protecting (" << rem << ") ..."; remove_from_map(m.map); // PROT_GROWSDOWN means that if this is a grows-down segment // (which for us means "stack") then the change should be // extended to the start of the segment. // We don't try to handle the analogous PROT_GROWSUP, because we // don't understand the idea of a grows-up segment. remote_ptr new_start; if ((m.map.start() < rem.start()) && (prot & PROT_GROWSDOWN)) { new_start = m.map.start(); LOG(debug) << " PROT_GROWSDOWN: expanded region down to " << new_start; } else { new_start = rem.start(); } LOG(debug) << " erased (" << m.map << ")"; // If the first segment we protect underflows the // region, remap the underflow region with previous // prot. auto monitored = m.monitored_shared_memory; if (m.map.start() < new_start) { Mapping underflow( m.map.subrange(m.map.start(), rem.start()), m.recorded_map.subrange(m.recorded_map.start(), rem.start()), m.emu_file, clone_stat(m.mapped_file_stat), m.local_addr, std::move(monitored)); underflow.flags = m.flags; add_to_map(underflow); } // Remap the overlapping region with the new prot. remote_ptr new_end = min(rem.end(), m.map.end()); int new_prot = prot & (PROT_READ | PROT_WRITE | PROT_EXEC); Mapping overlap( m.map.subrange(new_start, new_end).set_prot(new_prot), m.recorded_map.subrange(new_start, new_end).set_prot(new_prot), m.emu_file, clone_stat(m.mapped_file_stat), m.local_addr ? m.local_addr + (new_start - m.map.start()) : 0, m.monitored_shared_memory ? m.monitored_shared_memory->subrange(new_start - m.map.start(), new_end - new_start) : nullptr); overlap.flags = m.flags; add_to_map(overlap); last_overlap = overlap.map; // If the last segment we protect overflows the // region, remap the overflow region with previous // prot. if (rem.end() < m.map.end()) { Mapping overflow( m.map.subrange(rem.end(), m.map.end()), m.recorded_map.subrange(rem.end(), m.map.end()), m.emu_file, clone_stat(m.mapped_file_stat), m.local_addr ? m.local_addr + (rem.end() - m.map.start()) : 0, m.monitored_shared_memory ? m.monitored_shared_memory->subrange(rem.end() - m.map.start(), m.map.end() - rem.end()) : nullptr); overflow.flags = m.flags; add_to_map(overflow); } }; for_each_in_range(addr, num_bytes, protector, ITERATE_CONTIGUOUS); if (last_overlap.size()) { // All mappings that we altered which might need coalescing // are adjacent to |last_overlap|. coalesce_around(t, mem.find(last_overlap)); } } void AddressSpace::fixup_mprotect_growsdown_parameters(Task* t) { ASSERT(t, !(t->regs().arg3() & PROT_GROWSUP)); if (t->regs().arg3() & PROT_GROWSDOWN) { Registers r = t->regs(); if (r.arg1() == floor_page_size(r.arg1()) && has_mapping(r.arg1())) { auto& km = mapping_of(r.arg1()).map; if (km.flags() & MAP_GROWSDOWN) { auto new_start = km.start(); r.set_arg2(remote_ptr(r.arg1()) + size_t(r.arg2()) - new_start); r.set_arg1(new_start); r.set_arg3(r.arg3() & ~PROT_GROWSDOWN); t->set_regs(r); } } } } void AddressSpace::remap(Task* t, remote_ptr old_addr, size_t old_num_bytes, remote_ptr new_addr, size_t new_num_bytes, int flags) { LOG(debug) << "mremap(" << old_addr << ", " << old_num_bytes << ", " << new_addr << ", " << new_num_bytes << ")"; old_num_bytes = ceil_page_size(old_num_bytes); Mapping mr = mapping_of(old_addr); DEBUG_ASSERT(!mr.monitored_shared_memory); KernelMapping km = mr.map.subrange(old_addr, min(mr.map.end(), old_addr + old_num_bytes)); unmap_internal(t, old_addr, old_num_bytes); if (flags & MREMAP_DONTUNMAP) { // This can only ever be an anonymous private mapping. map(t, old_addr, old_num_bytes, km.prot(), km.flags(), 0, string()); } if (0 == new_num_bytes) { return; } new_num_bytes = ceil_page_size(new_num_bytes); auto it = dont_fork.lower_bound(MemoryRange(old_addr, old_num_bytes)); if (it != dont_fork.end() && it->start() < old_addr + old_num_bytes) { // mremap fails if some but not all pages are marked DONTFORK DEBUG_ASSERT(*it == MemoryRange(old_addr, old_num_bytes)); remove_range(dont_fork, MemoryRange(old_addr, old_num_bytes)); add_range(dont_fork, MemoryRange(new_addr, new_num_bytes)); } else { remove_range(dont_fork, MemoryRange(old_addr, old_num_bytes)); remove_range(dont_fork, MemoryRange(new_addr, new_num_bytes)); } it = wipe_on_fork.lower_bound(MemoryRange(old_addr, old_num_bytes)); if (it != wipe_on_fork.end() && it->start() < old_addr + old_num_bytes) { // hopefully mremap fails if some but not all pages are marked DONTFORK DEBUG_ASSERT(*it == MemoryRange(old_addr, old_num_bytes)); remove_range(wipe_on_fork, MemoryRange(old_addr, old_num_bytes)); add_range(wipe_on_fork, MemoryRange(new_addr, new_num_bytes)); } else { remove_range(wipe_on_fork, MemoryRange(old_addr, old_num_bytes)); remove_range(wipe_on_fork, MemoryRange(new_addr, new_num_bytes)); } unmap_internal(t, new_addr, new_num_bytes); remote_ptr new_end = new_addr + new_num_bytes; map_and_coalesce(t, km.set_range(new_addr, new_end), mr.recorded_map.set_range(new_addr, new_end), mr.emu_file, clone_stat(mr.mapped_file_stat), nullptr, nullptr); } void AddressSpace::remove_breakpoint(remote_code_ptr addr, BreakpointType type) { auto it = breakpoints.find(addr); if (it == breakpoints.end() || it->second.unref(type) > 0) { return; } destroy_breakpoint(it); } bool AddressSpace::add_breakpoint(remote_code_ptr addr, BreakpointType type) { auto it = breakpoints.find(addr); if (it == breakpoints.end()) { uint8_t overwritten_data[MAX_BKPT_INSTRUCTION_LENGTH]; ssize_t bkpt_size = bkpt_instruction_length(arch()); // Grab a random task from the VM so we can use its // read/write_mem() helpers. Task* t = first_running_task(); if (!t || bkpt_size != t->read_bytes_fallible(addr.to_data_ptr(), bkpt_size, overwritten_data)) { return false; } t->write_bytes_helper(addr.to_data_ptr(), bkpt_size, breakpoint_insn(arch()), nullptr, Task::IS_BREAKPOINT_RELATED); auto it_and_is_new = breakpoints.insert(make_pair(addr, Breakpoint())); DEBUG_ASSERT(it_and_is_new.second); memcpy(it_and_is_new.first->second.overwritten_data, overwritten_data, sizeof(overwritten_data)); it = it_and_is_new.first; } it->second.ref(type); return true; } void AddressSpace::remove_all_breakpoints() { while (!breakpoints.empty()) { destroy_breakpoint(breakpoints.begin()); } } void AddressSpace::suspend_breakpoint_at(remote_code_ptr addr) { auto it = breakpoints.find(addr); if (it != breakpoints.end()) { Task* t = first_running_task(); if (t) { t->write_bytes_helper(addr.to_data_ptr(), bkpt_instruction_length(arch()), it->second.overwritten_data); } } } void AddressSpace::restore_breakpoint_at(remote_code_ptr addr) { auto it = breakpoints.find(addr); if (it != breakpoints.end()) { Task* t = first_running_task(); if (t) { t->write_bytes_helper(addr.to_data_ptr(), bkpt_instruction_length(arch()), breakpoint_insn(arch())); } } } int AddressSpace::access_bits_of(WatchType type) { switch (type) { case WATCH_EXEC: return EXEC_BIT; case WATCH_WRITE: return WRITE_BIT; case WATCH_READWRITE: return READ_BIT | WRITE_BIT; default: FATAL() << "Unknown watchpoint type " << type; return 0; // not reached } } /** * We do not allow a watchpoint to watch the last byte of memory addressable * by rr. This avoids constructing a MemoryRange that wraps around. * For 64-bit builds this is no problem because addresses at the top of memory * are in kernel space. For 32-bit builds it seems impossible to map the last * page of memory in Linux so we should be OK there too. * Note that zero-length watchpoints are OK. configure_watch_registers just * ignores them. */ static MemoryRange range_for_watchpoint(remote_ptr addr, size_t num_bytes) { uintptr_t p = addr.as_int(); uintptr_t max_len = UINTPTR_MAX - p; return MemoryRange(addr, min(num_bytes, max_len)); } void AddressSpace::remove_watchpoint(remote_ptr addr, size_t num_bytes, WatchType type) { auto it = watchpoints.find(range_for_watchpoint(addr, num_bytes)); if (it != watchpoints.end() && 0 == it->second.unwatch(access_bits_of(type))) { watchpoints.erase(it); } allocate_watchpoints(); } bool AddressSpace::add_watchpoint(remote_ptr addr, size_t num_bytes, WatchType type) { MemoryRange key = range_for_watchpoint(addr, num_bytes); auto it = watchpoints.find(key); if (it == watchpoints.end()) { auto it_and_is_new = watchpoints.insert(make_pair(key, Watchpoint(num_bytes))); DEBUG_ASSERT(it_and_is_new.second); it = it_and_is_new.first; update_watchpoint_value(it->first, it->second); } it->second.watch(access_bits_of(type)); return allocate_watchpoints(); } void AddressSpace::save_watchpoints() { saved_watchpoints.push_back(watchpoints); } bool AddressSpace::restore_watchpoints() { DEBUG_ASSERT(!saved_watchpoints.empty()); watchpoints = saved_watchpoints[saved_watchpoints.size() - 1]; saved_watchpoints.pop_back(); return allocate_watchpoints(); } bool AddressSpace::update_watchpoint_value(const MemoryRange& range, Watchpoint& watchpoint) { Task* t = first_running_task(); if (!t) { return false; } bool valid = true; vector value_bytes = watchpoint.value_bytes; for (size_t i = 0; i < value_bytes.size(); ++i) { value_bytes[i] = 0xFF; } remote_ptr addr = range.start(); size_t num_bytes = range.size(); while (num_bytes > 0) { ssize_t bytes_read = t->read_bytes_fallible( addr, num_bytes, value_bytes.data() + (addr - range.start())); if (bytes_read <= 0) { valid = false; // advance to next page and try to read more. We want to know // when the valid part of a partially invalid watchpoint changes. bytes_read = min(num_bytes, (floor_page_size(addr) + page_size()) - addr); } addr += bytes_read; num_bytes -= bytes_read; } bool changed = valid != watchpoint.valid || memcmp(value_bytes.data(), watchpoint.value_bytes.data(), value_bytes.size()) != 0; watchpoint.valid = valid; watchpoint.value_bytes = value_bytes; return changed; } void AddressSpace::update_watchpoint_values(remote_ptr start, remote_ptr end) { MemoryRange r(start, end); for (auto& it : watchpoints) { if (it.first.intersects(r) && update_watchpoint_value(it.first, it.second)) { it.second.changed = true; // We do nothing to track kernel reads of read-write watchpoints... } } } static int DR_WATCHPOINT(int n) { return 1 << n; } static bool watchpoint_triggered(uintptr_t debug_status, const vector& regs) { for (auto reg : regs) { if (debug_status & DR_WATCHPOINT(reg)) { return true; } } return false; } bool AddressSpace::notify_watchpoint_fired(uintptr_t debug_status, remote_ptr hit_addr, remote_code_ptr address_of_singlestep_start) { bool triggered = false; for (auto& it : watchpoints) { // On Skylake/4.14.13-300.fc27.x86_64 at least, we have observed a // situation where singlestepping through the instruction before a hardware // execution watchpoint causes singlestep completion *and* also reports the // hardware execution watchpoint being triggered. The latter is incorrect. // This could be a HW issue or a kernel issue. Work around it by ignoring // triggered watchpoints that aren't on the instruction we just tried to // execute. bool write_triggered = (it.second.watched_bits() & WRITE_BIT) && update_watchpoint_value(it.first, it.second); // Depending on the architecture the hardware may indicate hit watchpoints // either by number, or by the address that triggered the watchpoint hit // - support either. bool read_triggered = false; bool exec_triggered = false; bool watchpoint_in_range = false; if (is_x86ish(arch())) { read_triggered = (it.second.watched_bits() & READ_BIT) && watchpoint_triggered(debug_status, it.second.debug_regs_for_exec_read); exec_triggered = (it.second.watched_bits() & EXEC_BIT) && (address_of_singlestep_start.is_null() || it.first.start() == address_of_singlestep_start.to_data_ptr()) && watchpoint_triggered(debug_status, it.second.debug_regs_for_exec_read); } else { // The reported address may not match our watchpoint exactly. // The ARM manual says: // The address recorded is within an address range of the size defined by the // DCZID_EL0.BS field. The start of the range is aligned to the size defined // by the DCZID_EL0.BS field and its end is not greater than the address that // triggered the watchpoint. // So we construct a range spanning the whole block, then test that the range // intersects a watchpoint range *and* that hit_addr is not past the first byte // of the watched region. auto block_size = dczid_el0_block_size(); auto slop = hit_addr.as_int() % block_size; auto hit_range = MemoryRange(hit_addr - slop, block_size); watchpoint_in_range = it.first.intersects(hit_range) && it.first.start() >= hit_addr; } if (write_triggered || read_triggered || exec_triggered || watchpoint_in_range) { it.second.changed = true; triggered = true; } } return triggered; } void AddressSpace::notify_written(remote_ptr addr, size_t num_bytes, uint32_t flags) { if (!(flags & Task::IS_BREAKPOINT_RELATED)) { update_watchpoint_values(addr, addr + num_bytes); } session()->accumulate_bytes_written(num_bytes); } void AddressSpace::remove_all_watchpoints() { watchpoints.clear(); allocate_watchpoints(); } void AddressSpace::unmap(Task* t, remote_ptr addr, ssize_t num_bytes) { LOG(debug) << "munmap(" << addr << ", " << num_bytes << ")"; num_bytes = ceil_page_size(num_bytes); if (!num_bytes) { return; } remove_range(dont_fork, MemoryRange(addr, num_bytes)); remove_range(wipe_on_fork, MemoryRange(addr, num_bytes)); return unmap_internal(t, addr, num_bytes); } void AddressSpace::unmap_internal(Task*, remote_ptr addr, ssize_t num_bytes) { LOG(debug) << "munmap(" << addr << ", " << num_bytes << ")"; auto unmapper = [this](Mapping m, MemoryRange rem) { LOG(debug) << " unmapping (" << rem << ") ..."; remove_from_map(m.map); LOG(debug) << " erased (" << m.map << ") ..."; // If the first segment we unmap underflows the unmap // region, remap the underflow region. auto monitored = m.monitored_shared_memory; if (m.map.start() < rem.start()) { Mapping underflow(m.map.subrange(m.map.start(), rem.start()), m.recorded_map.subrange(m.map.start(), rem.start()), m.emu_file, clone_stat(m.mapped_file_stat), m.local_addr, std::move(monitored)); underflow.flags = m.flags; add_to_map(underflow); } // If the last segment we unmap overflows the unmap // region, remap the overflow region. if (rem.end() < m.map.end()) { Mapping overflow( m.map.subrange(rem.end(), m.map.end()), m.recorded_map.subrange(rem.end(), m.map.end()), m.emu_file, clone_stat(m.mapped_file_stat), m.local_addr ? m.local_addr + (rem.end() - m.map.start()) : 0, m.monitored_shared_memory ? m.monitored_shared_memory->subrange(rem.end() - m.map.start(), m.map.end() - rem.end()) : nullptr); overflow.flags = m.flags; add_to_map(overflow); } if (m.local_addr) { auto addr = m.local_addr + (rem.start() - m.map.start()); auto size = std::min(rem.size(), m.map.size() - (rem.start() - m.map.start())); int ret = munmap(addr, size); if (ret < 0) { FATAL() << "Can't munmap"; } } }; for_each_in_range(addr, num_bytes, unmapper); update_watchpoint_values(addr, addr + num_bytes); } void AddressSpace::advise(Task*, remote_ptr addr, ssize_t num_bytes, int advice) { LOG(debug) << "madvise(" << addr << ", " << num_bytes << ", " << advice << ")"; num_bytes = ceil_page_size(num_bytes); switch (advice) { case MADV_DONTFORK: add_range(dont_fork, MemoryRange(addr, num_bytes)); break; case MADV_DOFORK: remove_range(dont_fork, MemoryRange(addr, num_bytes)); break; case MADV_WIPEONFORK: add_range(wipe_on_fork, MemoryRange(addr, num_bytes)); break; case MADV_KEEPONFORK: remove_range(wipe_on_fork, MemoryRange(addr, num_bytes)); break; default: break; } } void AddressSpace::did_fork_into(Task* t) { // MADV_WIPEONFORK is inherited across fork and cleared on exec. // We'll copy it here, then do the `dont_fork` unmappings, and then // whatever survives in the new AddressSpace's wipe_on_fork gets wiped. t->vm()->wipe_on_fork = wipe_on_fork; for (auto& range : dont_fork) { // During recording we execute MADV_DONTFORK so the forked child will // have had its dontfork areas unmapped by the kernel already if (!t->session().is_recording()) { AutoRemoteSyscalls remote(t); remote.infallible_syscall(syscall_number_for_munmap(remote.arch()), range.start(), range.size()); } t->vm()->unmap(t, range.start(), range.size()); } // Any ranges that were dropped were unmapped (and thus removed from // wipe_on_fork), so now we can record anything that's left. for (auto& range : t->vm()->wipe_on_fork) { if (t->session().is_recording()) { // Record that these mappings were wiped. RecordTask* rt = static_cast(t); rt->record_remote(range); } } } static string strip_deleted(const string& s) { static const char deleted[] = " (deleted)"; ssize_t find_deleted = s.size() - (sizeof(deleted) - 1); if (s.find(deleted) == size_t(find_deleted)) { return s.substr(0, find_deleted); } return s; } string KernelMapping::fsname_strip_deleted() const { return strip_deleted(fsname_); } enum HandleHeap { TREAT_HEAP_AS_ANONYMOUS, RESPECT_HEAP }; static bool normalized_file_names_equal(const KernelMapping& km1, const KernelMapping& km2, HandleHeap handle_heap) { if (km1.is_stack() || km2.is_stack()) { // The kernel seems to use "[stack:]" for any mapping area containing // thread |tid|'s stack pointer. When the thread exits, the next read of // the maps doesn't treat the area as stack at all. We don't want to track // thread exits, so if one of the mappings is a stack, skip the name // comparison. Device and inode numbers will still be checked. return true; } if (handle_heap == TREAT_HEAP_AS_ANONYMOUS && (km1.is_heap() || km2.is_heap())) { // The kernel's heuristics for treating an anonymous mapping as "[heap]" // are obscure. Just skip the name check. Device and inode numbers will // still be checked. return true; } // We don't track when a file gets deleted, so it's possible for the kernel // to have " (deleted)" when we don't. return strip_deleted(km1.fsname()) == strip_deleted(km2.fsname()); } /** * Return true iff |left| and |right| are located adjacently in memory * with the same metadata, and map adjacent locations of the same * underlying (real) device. */ static bool is_adjacent_mapping(const KernelMapping& mleft, const KernelMapping& mright, HandleHeap handle_heap, int32_t flags_to_check = 0xFFFFFFFF) { if (mleft.end() != mright.start()) { return false; } if (((mleft.flags() ^ mright.flags()) & flags_to_check) || mleft.prot() != mright.prot()) { return false; } if (!normalized_file_names_equal(mleft, mright, handle_heap)) { return false; } if (mleft.device() != mright.device() || mleft.inode() != mright.inode()) { return false; } if (mleft.is_real_device() && mleft.file_offset_bytes() + off64_t(mleft.size()) != mright.file_offset_bytes()) { return false; } return true; } /** * If |*left_m| and |right_m| are adjacent (see * |is_adjacent_mapping()|), write a merged segment descriptor to * |*left_m| and return true. Otherwise return false. */ static bool try_merge_adjacent(KernelMapping* left_m, const KernelMapping& right_m) { if (is_adjacent_mapping(*left_m, right_m, TREAT_HEAP_AS_ANONYMOUS, KernelMapping::checkable_flags_mask)) { *left_m = KernelMapping(left_m->start(), right_m.end(), left_m->fsname(), left_m->device(), left_m->inode(), right_m.prot(), right_m.flags(), left_m->file_offset_bytes()); return true; } return false; } static dev_t normalized_device_number(const KernelMapping& m) { if (m.fsname().c_str()[0] != '/') { return m.device(); } // btrfs files can report the wrong device number in /proc//maps, so // restrict ourselves to checking whether the device number is != 0 if (m.device() != KernelMapping::NO_DEVICE) { return (dev_t)-1; } return m.device(); } static void assert_segments_match(Task* t, const KernelMapping& input_m, const KernelMapping& km) { KernelMapping m = input_m; string err; if (m.start() != km.start()) { err = "starts differ"; } else if (m.end() != km.end()) { err = "ends differ"; } else if (m.prot() != km.prot()) { err = "prots differ"; } else if ((m.flags() ^ km.flags()) & KernelMapping::checkable_flags_mask) { err = "flags differ"; } else if (!normalized_file_names_equal(m, km, TREAT_HEAP_AS_ANONYMOUS) && !(km.is_heap() && m.fsname() == "") && !(m.is_heap() && km.fsname() == "") && !km.is_vdso()) { // Due to emulated exec, the kernel may identify any of our anonymous maps // as [heap] (or not). // Kernels before 3.16 have a bug where any mapping at the original VDSO // address is marked [vdso] even if the VDSO was unmapped and replaced by // something else, so if the kernel reports [vdso] it may be spurious and // we skip this check. See kernel commit // a62c34bd2a8a3f159945becd57401e478818d51c. err = "filenames differ"; } else if (normalized_device_number(m) != normalized_device_number(km)) { err = "devices_differ"; } else if (m.inode() != km.inode()) { err = "inodes differ"; } if (err.size()) { cerr << "cached mmap:" << endl; t->vm()->dump(); cerr << "/proc/" << t->tid << "/mmaps:" << endl; AddressSpace::print_process_maps(t); ASSERT(t, false) << "\nCached mapping " << m << " should be " << km << "; " << err; } } void AddressSpace::ensure_replay_matches_single_recorded_mapping(Task* t, MemoryRange range) { // The only case where we eagerly coalesced during recording but not replay should // be where we mapped private memory beyond-end-of-file. // Don't do an actual coalescing check here; we rely on the caller to tell us // the range to coalesce. ASSERT(t, range.start() == floor_page_size(range.start())); ASSERT(t, range.end() == ceil_page_size(range.end())); auto fixer = [this, t, range](Mapping mapping, MemoryRange) { if (mapping.map == range) { // Existing single mapping covers entire range; nothing to do. return; } // These should be null during replay ASSERT(t, !mapping.mapped_file_stat); // These should not be in use for a beyond-end-of-file mapping ASSERT(t, !mapping.local_addr); // The mapping should be private ASSERT(t, mapping.map.flags() & MAP_PRIVATE); ASSERT(t, !mapping.emu_file); ASSERT(t, !mapping.monitored_shared_memory); // Flagged mappings shouldn't be coalescable ever ASSERT(t, !mapping.flags); if (!(mapping.map.flags() & MAP_ANONYMOUS)) { // Direct-mapped piece. Turn it into an anonymous mapping. vector buffer; buffer.resize(mapping.map.size()); t->read_bytes_helper(mapping.map.start(), buffer.size(), buffer.data()); { AutoRemoteSyscalls remote(t); remote.infallible_mmap_syscall_if_alive(mapping.map.start(), buffer.size(), mapping.map.prot(), mapping.map.flags() | MAP_ANONYMOUS | MAP_FIXED, -1, 0); } t->write_bytes_helper(mapping.map.start(), buffer.size(), buffer.data()); // We replace the entire mapping even if part of it falls outside the desired range. // That's OK, this replacement preserves behaviour, it's simpler, even if a bit // less efficient in weird cases. mem.erase(mapping.map); KernelMapping anonymous_km(mapping.map.start(), mapping.map.end(), string(), KernelMapping::NO_DEVICE, KernelMapping::NO_INODE, mapping.map.prot(), mapping.map.flags() | MAP_ANONYMOUS); Mapping new_mapping(anonymous_km, mapping.recorded_map); mem[new_mapping.map] = new_mapping; } }; for_each_in_range(range.start(), range.size(), fixer); coalesce_around(t, mem.find(range)); } KernelMapping AddressSpace::vdso() const { DEBUG_ASSERT(!vdso_start_addr.is_null()); return mapping_of(vdso_start_addr).map; } /** * Iterate over /proc/maps segments for a task and verify that the * task's cached mapping matches the kernel's (given a lenient fuzz * factor). */ void AddressSpace::verify(Task* t) const { ASSERT(t, task_set().end() != task_set().find(t)); if (thread_group_in_exec(t)) { return; } LOG(debug) << "Verifying address space for task " << t->tid; MemoryMap::const_iterator mem_it = mem.begin(); KernelMapIterator kernel_it(t); if (kernel_it.at_end()) { LOG(debug) << "Task " << t->tid << " exited unexpectedly, ignoring"; return; } while (!kernel_it.at_end() && mem_it != mem.end()) { KernelMapping km = kernel_it.current(); ++kernel_it; while (!kernel_it.at_end()) { KernelMapping next_km = kernel_it.current(); if (!try_merge_adjacent(&km, next_km)) { break; } ++kernel_it; } KernelMapping vm = mem_it->second.map; ++mem_it; while (mem_it != mem.end() && try_merge_adjacent(&vm, mem_it->second.map)) { ++mem_it; } assert_segments_match(t, vm, km); } ASSERT(t, kernel_it.at_end() && mem_it == mem.end()); } // Just a place that rr's AutoSyscall functionality can use as a syscall // instruction in rr's address space for use before we have exec'd. extern "C" { // Mark this as hidden, otherwise we might get the address of the GOT entry, // which could cause problems. extern char rr_syscall_addr __attribute__ ((visibility ("hidden"))); } static void __attribute__((noinline, used)) fake_syscall() { __asm__ __volatile__(".global rr_syscall_addr\n\t"); #ifdef __i386__ __asm__ __volatile__("rr_syscall_addr: int $0x80\n\t" "nop\n\t" "nop\n\t" "nop\n\t"); #elif defined(__x86_64__) __asm__ __volatile__("rr_syscall_addr: syscall\n\t" "nop\n\t" "nop\n\t" "nop\n\t"); #elif defined(__aarch64__) __asm__ __volatile__("rr_syscall_addr: svc #0\n\t" "nop\n\t" "nop\n\t" "nop\n\t"); #endif } AddressSpace::AddressSpace(Task* t, const string& exe, uint32_t exec_count) : exe(exe), leader_tid_(t->rec_tid), leader_serial(t->tuid().serial()), exec_count(exec_count), session_(&t->session()), monkeypatch_state(t->session().is_recording() ? new Monkeypatcher() : nullptr), syscallbuf_enabled_(false), do_breakpoint_fault_addr_(nullptr), stopping_breakpoint_table_(nullptr), stopping_breakpoint_table_entry_size_(0), first_run_event_(0) { // TODO: this is a workaround of // https://github.com/rr-debugger/rr/issues/1113 . if (session_->done_initial_exec()) { populate_address_space(t); DEBUG_ASSERT(!vdso_start_addr.is_null()); } else { // Setup traced_syscall_ip_ now because we need to do AutoRemoteSyscalls // (for open_mem_fd) before the first exec. We rely on the fact that we // haven't execed yet, so the address space layout is the same. traced_syscall_ip_ = remote_code_ptr((uintptr_t)&rr_syscall_addr); } } // Does not copy the task set; the new AddressSpace will be for new tasks. AddressSpace::AddressSpace(Session* session, const AddressSpace& o, pid_t leader_tid, uint32_t leader_serial, uint32_t exec_count) : exe(o.exe), leader_tid_(leader_tid), leader_serial(leader_serial), exec_count(exec_count), brk_start(o.brk_start), brk_end(o.brk_end), mem(o.mem), shm_sizes(o.shm_sizes), monitored_mem(o.monitored_mem), dont_fork(o.dont_fork), wipe_on_fork(o.wipe_on_fork), session_(session), vdso_start_addr(o.vdso_start_addr), monkeypatch_state(o.monkeypatch_state ? new Monkeypatcher(*o.monkeypatch_state) : nullptr), traced_syscall_ip_(o.traced_syscall_ip_), privileged_traced_syscall_ip_(o.privileged_traced_syscall_ip_), syscallbuf_enabled_(o.syscallbuf_enabled_), do_breakpoint_fault_addr_(o.do_breakpoint_fault_addr_), stopping_breakpoint_table_(o.stopping_breakpoint_table_), stopping_breakpoint_table_entry_size_(o.stopping_breakpoint_table_entry_size_), saved_auxv_(o.saved_auxv_), saved_interpreter_base_(o.saved_interpreter_base_), saved_ld_path_(o.saved_ld_path_), last_free_memory(o.last_free_memory), first_run_event_(0) { for (auto& m : mem) { // The original address space continues to have exclusive ownership of // all local mappings. m.second.local_addr = nullptr; } for (auto& it : o.breakpoints) { breakpoints.insert(make_pair(it.first, it.second)); } for (auto& it : o.watchpoints) { watchpoints.insert(make_pair(it.first, it.second)); } if (session != o.session()) { // Cloning into a new session means we're checkpointing. first_run_event_ = o.first_run_event_; } // cloned tasks will automatically get cloned debug registers and // cloned address-space memory, so we don't need to do any more work here. } bool AddressSpace::post_vm_clone(Task* t) { if (has_mapping(preload_thread_locals_start()) && (mapping_flags_of(preload_thread_locals_start()) & AddressSpace::Mapping::IS_THREAD_LOCALS) == 0) { // The tracee already has a mapping at this address that doesn't belong to // us. Don't touch it. return false; } // Otherwise, the preload_thread_locals mapping is nonexistent or ours. // Recreate it. AutoRemoteSyscalls remote(t); t->session().create_shared_mmap(remote, PRELOAD_THREAD_LOCALS_SIZE, preload_thread_locals_start(), "preload_thread_locals"); mapping_flags_of(preload_thread_locals_start()) |= AddressSpace::Mapping::IS_THREAD_LOCALS; return true; } static bool try_split_unaligned_range(MemoryRange& range, size_t bytes, vector& result) { if ((range.start().as_int() & (bytes - 1)) || range.size() < bytes) { return false; } result.push_back(MemoryRange(range.start(), bytes)); range = MemoryRange(range.start() + bytes, range.end()); return true; } static vector split_range(const MemoryRange& range) { vector result; MemoryRange r = range; while (r.size() > 0) { if ((sizeof(void*) < 8 || !try_split_unaligned_range(r, 8, result)) && !try_split_unaligned_range(r, 4, result) && !try_split_unaligned_range(r, 2, result)) { bool ret = try_split_unaligned_range(r, 1, result); DEBUG_ASSERT(ret); } } return result; } static void configure_watch_registers(vector& regs, const MemoryRange& range, WatchType type, vector* assigned_regs, AddressSpace::WatchpointAlignment alignment) { if (alignment == AddressSpace::UNALIGNED) { regs.push_back(WatchConfig(range.start(), range.size(), type)); return; } // Zero-sized WatchConfigs return no ranges here, so are ignored. auto split_ranges = split_range(range); if (type == WATCH_WRITE && range.size() > 1) { // We can suppress spurious write-watchpoint triggerings by checking // whether memory values have changed. So we can sometimes conserve // debug registers by upgrading an unaligned range to an aligned range // of a larger size. uintptr_t align; if (range.size() <= 2) { align = 2; } else if (range.size() <= 4 || sizeof(void*) <= 4) { align = 4; } else { align = 8; } remote_ptr aligned_start(range.start().as_int() & ~(align - 1)); remote_ptr aligned_end((range.end().as_int() + (align - 1)) & ~(align - 1)); auto split = split_range(MemoryRange(aligned_start, aligned_end)); // If the aligned range doesn't reduce register usage, use the original // split to avoid spurious triggerings if (split.size() < split_ranges.size()) { split_ranges = split; } } for (auto& r : split_ranges) { if (assigned_regs) { assigned_regs->push_back(regs.size()); } regs.push_back(WatchConfig(r.start(), r.size(), type)); } } vector AddressSpace::get_watchpoints_internal( WatchpointFilter filter, WatchpointAlignment alignment, UpdateWatchpointRegisterAssignments update_watchpoint_register_assignments) { vector result; for (auto& kv : watchpoints) { if (filter == CHANGED_WATCHPOINTS) { if (!kv.second.changed) { continue; } kv.second.changed = false; } vector* assigned_regs = nullptr; if (update_watchpoint_register_assignments == UPDATE_WATCHPOINT_REGISTER_ASSIGNMENTS) { kv.second.debug_regs_for_exec_read.clear(); assigned_regs = &kv.second.debug_regs_for_exec_read; } const MemoryRange& r = kv.first; int watching = kv.second.watched_bits(); if (EXEC_BIT & watching) { configure_watch_registers(result, r, WATCH_EXEC, assigned_regs, alignment); } if (READ_BIT & watching) { configure_watch_registers(result, r, WATCH_READWRITE, assigned_regs, alignment); } else if (WRITE_BIT & watching) { configure_watch_registers(result, r, WATCH_WRITE, nullptr, alignment); } } return result; } bool AddressSpace::has_any_watchpoint_changes() { for (auto& kv : watchpoints) { if (kv.second.changed) { return true; } } return false; } bool AddressSpace::has_exec_watchpoint_fired(remote_code_ptr addr) { for (auto& kv : watchpoints) { if (kv.second.changed && kv.second.exec_count > 0 && kv.first.start() == addr.to_data_ptr()) { return true; } } return false; } bool AddressSpace::allocate_watchpoints() { vector regs = get_watchpoints_internal(ALL_WATCHPOINTS, ALIGNED, UPDATE_WATCHPOINT_REGISTER_ASSIGNMENTS); if (task_set().empty()) { // We can't validate the watchpoint set in this case FATAL() << "No tasks???"; } if ((*task_set().begin())->set_debug_regs(regs)) { return true; } for (auto kv : watchpoints) { kv.second.debug_regs_for_exec_read.clear(); } return false; } static inline void assert_coalescable(Task* t, const AddressSpace::Mapping& lower, const AddressSpace::Mapping& higher) { ASSERT(t, lower.emu_file == higher.emu_file); ASSERT(t, lower.flags == higher.flags); ASSERT(t, (lower.local_addr == 0 && higher.local_addr == 0) || lower.local_addr + lower.map.size() == higher.local_addr); ASSERT(t, !lower.monitored_shared_memory && !higher.monitored_shared_memory); } static bool is_coalescable(const AddressSpace::Mapping& mleft, const AddressSpace::Mapping& mright) { if (!is_adjacent_mapping(mleft.map, mright.map, RESPECT_HEAP) || !is_adjacent_mapping(mleft.recorded_map, mright.recorded_map, RESPECT_HEAP)) { return false; } return mleft.flags == mright.flags; } void AddressSpace::coalesce_around(Task* t, MemoryMap::iterator it) { auto first_kv = it; while (mem.begin() != first_kv) { auto next = first_kv; --first_kv; if (!is_coalescable(first_kv->second, next->second)) { first_kv = next; break; } assert_coalescable(t, first_kv->second, next->second); } auto last_kv = it; while (true) { auto prev = last_kv; ++last_kv; if (mem.end() == last_kv || !is_coalescable(prev->second, last_kv->second)) { last_kv = prev; break; } assert_coalescable(t, prev->second, last_kv->second); } ASSERT(t, last_kv != mem.end()); if (first_kv == last_kv) { LOG(debug) << " no mappings to coalesce"; return; } Mapping new_m(first_kv->second.map.extend(last_kv->first.end()), first_kv->second.recorded_map.extend(last_kv->first.end()), first_kv->second.emu_file, clone_stat(first_kv->second.mapped_file_stat), first_kv->second.local_addr); new_m.flags = first_kv->second.flags; LOG(debug) << " coalescing " << new_m.map; // monitored-memory currently isn't coalescable so we don't need to // adjust monitored_mem mem.erase(first_kv, ++last_kv); auto ins = mem.insert(MemoryMap::value_type(new_m.map, new_m)); DEBUG_ASSERT(ins.second); // key didn't already exist } void AddressSpace::destroy_breakpoint(BreakpointMap::const_iterator it) { if (task_set().empty()) { return; } Task* t = first_running_task(); if (!t) { return; } auto ptr = it->first.to_data_ptr(); auto data = it->second.overwritten_data; if (bkpt_instruction_length(arch()) == 1) { LOG(debug) << "Writing back " << HEX(data[0]) << " at " << ptr; } else { LOG(debug) << "Writing back " << bkpt_instruction_length(arch()) << " bytes at " << ptr; } t->write_bytes_helper(ptr, bkpt_instruction_length(arch()), data, nullptr, Task::IS_BREAKPOINT_RELATED); breakpoints.erase(it); } void AddressSpace::maybe_update_breakpoints(Task* t, remote_ptr addr, size_t len) { for (auto& it : breakpoints) { remote_ptr bp_addr = it.first.to_data_ptr(); if (addr <= bp_addr && bp_addr < addr + len - 1) { // This breakpoint was overwritten. Note the new data and reset the // breakpoint. bool ok = true; t->read_bytes_helper(bp_addr, bkpt_instruction_length(arch()), &it.second.overwritten_data, &ok); ASSERT(t, ok); t->write_bytes_helper(bp_addr, bkpt_instruction_length(arch()), breakpoint_insn(arch())); } } } void AddressSpace::for_each_in_range( remote_ptr addr, ssize_t num_bytes, function f, int how) { remote_ptr region_start = floor_page_size(addr); remote_ptr last_unmapped_end = region_start; remote_ptr region_end = ceil_page_size(addr + num_bytes); while (last_unmapped_end < region_end) { // Invariant: |rem| is always exactly the region of // memory remaining to be examined for pages to be // unmapped. MemoryRange rem(last_unmapped_end, region_end); // The next page to iterate may not be contiguous with // the last one seen. auto it = mem.lower_bound(rem); if (mem.end() == it) { LOG(debug) << " not found, done."; return; } // Don't make a reference here. |f| is allowed to erase Mappings. MemoryRange range = it->first; if (rem.end() <= range.start()) { LOG(debug) << " mapping at " << range.start() << " out of range, done."; return; } if (ITERATE_CONTIGUOUS == how && !(range.start() < region_start || rem.start() == range.start())) { LOG(debug) << " discontiguous mapping at " << range.start() << ", done."; return; } f(it->second, rem); // Maintain the loop invariant. last_unmapped_end = range.end(); } } void AddressSpace::map_and_coalesce( Task* t, const KernelMapping& m, const KernelMapping& recorded_map, EmuFile::shr_ptr emu_file, unique_ptr mapped_file_stat, void* local_addr, shared_ptr monitored) { LOG(debug) << " mapping " << m; if (monitored) { monitored_mem.insert(m.start()); } auto ins = mem.insert(MemoryMap::value_type( m, Mapping(m, recorded_map, emu_file, std::move(mapped_file_stat), local_addr, std::move(monitored)))); coalesce_around(t, ins.first); update_watchpoint_values(m.start(), m.end()); } static bool could_be_stack(const KernelMapping& km) { // On 4.1.6-200.fc22.x86_64 we observe that during exec of the rr_exec_stub // during replay, when the process switches from 32-bit to 64-bit, the 64-bit // registers seem truncated to 32 bits during the initial PTRACE_GETREGS so // our sp looks wrong and /proc//maps doesn't identify the region as // stack. // On stub execs there should only be one read-writable memory area anyway. return km.prot() == (PROT_READ | PROT_WRITE) && km.fsname() == "" && km.device() == KernelMapping::NO_DEVICE && km.inode() == KernelMapping::NO_INODE; } static dev_t check_device(const KernelMapping& km) { if (km.fsname().c_str()[0] != '/') { return km.device(); } // btrfs files can return the wrong device number in /proc//maps struct stat st; int ret = stat(km.fsname().c_str(), &st); if (ret < 0) { return km.device(); } return st.st_dev; } void AddressSpace::populate_address_space(Task* t) { bool found_proper_stack = false; for (KernelMapIterator it(t); !it.at_end(); ++it) { auto& km = it.current(); if (km.is_stack()) { found_proper_stack = true; } } // If we're being recorded by rr, we'll see the outer rr's rr_page and // preload_thread_locals. In post_exec() we'll remap those with our // own mappings. That's OK because a) the rr_page contents are the same // anyway and immutable and b) the preload_thread_locals page is only // used by the preload library, and the preload library only knows about // the inner rr. I.e. as far as the outer rr is concerned, the tracee is // not doing syscall buffering. int found_stacks = 0; for (KernelMapIterator it(t); !it.at_end(); ++it) { auto& km = it.current(); int flags = km.flags(); remote_ptr start = km.start(); bool is_stack = found_proper_stack ? km.is_stack() : could_be_stack(km); if (is_stack) { ++found_stacks; flags |= MAP_GROWSDOWN; if (uses_invisible_guard_page()) { // MAP_GROWSDOWN segments really occupy one additional page before // the start address shown by /proc//maps --- unless that page // is already occupied by another mapping. if (!has_mapping(start - page_size())) { start -= page_size(); } } } map(t, start, km.end() - start, km.prot(), flags, km.file_offset_bytes(), km.fsname(), check_device(km), km.inode(), nullptr); } ASSERT(t, found_stacks == 1); } static int addr_bits(SupportedArch arch) { switch (arch) { default: DEBUG_ASSERT(0 && "Unknown architecture"); RR_FALLTHROUGH; case x86: return 32; // Current x86-64 systems have only 48 bits of virtual address space, // and only the bottom half is usable by user space case x86_64: return 47; // Aarch64 has 48 bit address space, with user and kernel each getting // their own 48 bits worth of address space at opposite end of the full // 64-bit address space. case aarch64: return 48; } } static MemoryRange adjust_range_for_stack_growth(const KernelMapping& km) { remote_ptr start = km.start(); if (km.flags() & MAP_GROWSDOWN) { start = min(start, km.end() - AddressSpace::chaos_mode_min_stack_size()); } return MemoryRange(start, km.end()); } static MemoryRange overlaps_excluded_range(const RecordSession& session, MemoryRange range) { for (const auto& r : session.excluded_ranges()) { if (r.intersects(range)) { return r; } } return MemoryRange(); } static bool is_all_memory_excluded(const RecordSession& session) { for (const auto& r : session.excluded_ranges()) { if (r == MemoryRange::all()) { return true; } } return false; } // Choose a 4TB range to exclude from random mappings. This makes room for // advanced trace analysis tools that require a large address range in tracees // that is never mapped. static MemoryRange choose_global_exclusion_range(const RecordSession* session) { if (session && is_all_memory_excluded(*session)) { return MemoryRange(nullptr, 0); } if (session && session->fixed_global_exclusion_range().size()) { // For TSAN we have a hardcoded range stored in the session. return session->fixed_global_exclusion_range(); } const uint64_t range_size = uint64_t(4)*1024*1024*1024*1024; while (true) { int bits = addr_bits(x86_64); uint64_t r = ((uint64_t)(uint32_t)random() << 32) | (uint32_t)random(); uint64_t r_addr = r & ((uint64_t(1) << bits) - 1); r_addr = min(r_addr, (uint64_t(1) << bits) - range_size); remote_ptr addr = floor_page_size(remote_ptr(r_addr)); MemoryRange ret(addr, (uintptr_t)range_size); if (!session || !overlaps_excluded_range(*session, ret).size()) { return ret; } } } MemoryRange AddressSpace::get_global_exclusion_range(const RecordSession* session) { static MemoryRange global_exclusion_range = choose_global_exclusion_range(session); return global_exclusion_range; } static remote_ptr usable_address_space_end(Task* t) { return remote_ptr((uint64_t(1) << addr_bits(t->arch())) - page_size()); } static const remote_ptr addr_space_start(0x40000); remote_ptr AddressSpace::chaos_mode_find_free_memory(RecordTask* t, size_t len, remote_ptr hint) { if (is_all_memory_excluded(t->session())) { return nullptr; } MemoryRange global_exclusion_range = get_global_exclusion_range(&t->session()); // NB: Above RR_PAGE_ADDR is probably not free anyways, but if it somehow is // don't hand it out again. static MemoryRange rrpage_so_range = MemoryRange(RR_PAGE_ADDR - PRELOAD_LIBRARY_PAGE_SIZE, RR_PAGE_ADDR + PRELOAD_LIBRARY_PAGE_SIZE); // Ignore the hint half the time. if (hint && (random() & 1)) { hint = nullptr; } remote_ptr start = hint; if (!start) { // Half the time, try to allocate at a completely random address. The other // half of the time, we'll try to allocate immediately before or after a // randomly chosen existing mapping. if (random() % 2) { uint64_t r = ((uint64_t)(uint32_t)random() << 32) | (uint32_t)random(); start = floor_page_size(remote_ptr(r & ((uint64_t(1) << addr_bits(t->arch())) - 1))); } else { ASSERT(t, !mem.empty()); int map_index = random() % mem.size(); int map_count = 0; for (const auto& m : maps()) { if (map_count == map_index) { start = m.map.start(); break; } ++map_count; } } } // Reserve 3 pages at the end of userspace in case Monkeypatcher wants // to allocate something there. uint64_t reserve_area_for_monkeypatching = 3 * page_size(); remote_ptr addr_space_end = usable_address_space_end(t) - reserve_area_for_monkeypatching; // Clamp start so that we're in the usable address space. start = max(start, addr_space_start); start = min(start, addr_space_end - len); // Search the address space in one direction all the way to the end, // then in the other direction. int direction = (random() % 2) ? 1 : -1; remote_ptr addr; for (int iteration = 0; iteration < 2; ++iteration) { // Invariant: [addr, addr+len) is always in the usable address space // [addr_space_start, addr_space_end). addr = start; while (true) { // Look for any reserved address space that overlaps [addr, addr+len] // and store any overlapping range here. If multiple reserved areas // overlap, we just pick one arbitrarily. MemoryRange overlapping_range; Maps m = maps_containing_or_after(addr); if (m.begin() != m.end()) { MemoryRange range = adjust_range_for_stack_growth(m.begin()->map); if (range.start() < addr + len) { overlapping_range = range; } } if (!overlapping_range.size()) { MemoryRange r(addr, ceil_page_size(len)); if (r.intersects(rrpage_so_range)) { overlapping_range = rrpage_so_range; } else if (r.intersects(global_exclusion_range)) { overlapping_range = global_exclusion_range; } else if (!t->session().excluded_ranges().empty()) { ASSERT(t, word_size(t->arch()) >= 8) << "Chaos mode with ASAN/TSAN not supported in 32-bit processes"; MemoryRange excluded = overlaps_excluded_range(t->session(), r); if (excluded.size()) { overlapping_range = excluded; } } } if (!overlapping_range.size()) { // No overlap and the range fits into our address space. Stop. return addr; } if (direction == -1) { // Try moving backwards to allocate just before the start of // the overlapping range. if (overlapping_range.start() < addr_space_start + len) { break; } addr = overlapping_range.start() - len; } else { // Try moving forwards to allocate just after the end of // the overlapping range. if (overlapping_range.end() + len > addr_space_end) { break; } addr = overlapping_range.end(); } } direction = -direction; } return nullptr; } remote_ptr AddressSpace::find_free_memory(Task* t, size_t required_space, remote_ptr after, FindFreeMemoryPolicy policy) { if (after < last_free_memory && policy == FindFreeMemoryPolicy::USE_LAST_FREE_HINT) { // Search for free memory starting at the last place we finished // our search. This is more efficient than starting at the beginning // every time. after = last_free_memory; } remote_ptr addr_space_end = usable_address_space_end(t); ASSERT(t, required_space < UINT64_MAX - addr_space_end.as_int()); bool started_from_beginning = after.is_null(); while (true) { auto maps = maps_starting_at(after); auto current = maps.begin(); while (current != maps.end()) { auto next = current; ++next; remote_ptr end_of_free_space; if (next == maps.end()) { end_of_free_space = addr_space_end; } else { end_of_free_space = min(addr_space_end, next->map.start()); } if (current->map.end() + required_space <= end_of_free_space) { return current->map.end(); } current = next; } if (started_from_beginning) { return nullptr; } started_from_beginning = true; after = addr_space_start; } } void AddressSpace::add_stap_semaphore_range(Task* task, MemoryRange range) { ASSERT(task, range.start() != range.end()) << "Unexpected zero-length SystemTap semaphore range: " << range; ASSERT(task, (range.size() & 1) == 0) << "Invalid SystemTap semaphore range at " << range << ": size is not a multiple of the size of a STap semaphore!"; auto ptr = range.start().cast(), end = range.end().cast(); for (; ptr < end; ++ptr) { stap_semaphores.insert(ptr); } } void AddressSpace::remove_stap_semaphore_range(Task* task, MemoryRange range) { ASSERT(task, range.start() != range.end()) << "Unexpected zero-length SystemTap semaphore range: " << range; ASSERT(task, (range.size() & 1) == 0) << "Invalid SystemTap semaphore range at " << range << ": size is not a multiple of the size of a STap semaphore!"; auto ptr = range.start().cast(), end = range.end().cast(); for (; ptr < end; ++ptr) { stap_semaphores.erase(ptr); } } bool AddressSpace::is_stap_semaphore(remote_ptr addr) { return stap_semaphores.find(addr) != stap_semaphores.end(); } void AddressSpace::fd_tables_changed() { if (!session()->is_recording()) { // All modifications are recorded during record return; } if (!syscallbuf_enabled()) { return; } DEBUG_ASSERT(task_set().size() != 0); uint8_t fdt_uniform = true; RecordTask* rt = static_cast(first_running_task()); if (!rt) { return; } auto fdt = rt->fd_table(); for (auto* t : task_set()) { if (t->fd_table() != fdt) { fdt_uniform = false; } } auto addr = REMOTE_PTR_FIELD(rt->preload_globals, fdt_uniform); bool ok = true; if (rt->read_mem(addr, &ok) != fdt_uniform) { if (!ok) { return; } rt->write_mem(addr, fdt_uniform); rt->record_local(addr, sizeof(fdt_uniform), &fdt_uniform); } } bool AddressSpace::range_is_private_mapping(const MemoryRange& range) const { MemoryRange r = range; while (r.size() > 0) { if (!has_mapping(r.start())) { return false; } const AddressSpace::Mapping& m = mapping_of(r.start()); if (!(m.map.flags() & MAP_PRIVATE)) { return false; } if (m.map.end() >= r.end()) { return true; } r = MemoryRange(m.map.end(), r.end()); } return true; } } // namespace rr rr-5.7.0/src/AddressSpace.h000066400000000000000000001255431450675474200154770ustar00rootroot00000000000000/* -*- Mode: C++; tab-width: 8; c-basic-offset: 2; indent-tabs-mode: nil; -*- */ #ifndef RR_ADDRESS_SPACE_H_ #define RR_ADDRESS_SPACE_H_ #include #include #include #include #include #include #include #include #include #include #include "preload/preload_interface.h" #include "EmuFs.h" #include "HasTaskSet.h" #include "MemoryRange.h" #include "Monkeypatcher.h" #include "TaskishUid.h" #include "TraceStream.h" #include "core.h" #include "kernel_abi.h" #include "log.h" #include "remote_code_ptr.h" #include "util.h" namespace rr { class AutoRemoteSyscalls; class MonitoredSharedMemory; class RecordSession; class RecordTask; class Session; class Task; /** * Records information that the kernel knows about a mapping. This includes * everything returned through /proc//maps but also information that * we know from observing mmap and mprotect calls. */ class KernelMapping : public MemoryRange { public: /** * These are the flags we track internally to distinguish * between adjacent segments. For example, the kernel * considers a NORESERVE anonymous mapping that's adjacent to * a non-NORESERVE mapping distinct, even if all other * metadata are the same. See |is_adjacent_mapping()|. */ static const int map_flags_mask = MAP_ANONYMOUS | MAP_NORESERVE | MAP_PRIVATE | MAP_SHARED | MAP_STACK | MAP_GROWSDOWN; static const int checkable_flags_mask = MAP_PRIVATE | MAP_SHARED; static const dev_t NO_DEVICE = 0; static const ino_t NO_INODE = 0; KernelMapping() : device_(0), inode_(0), prot_(0), flags_(0), offset(0) {} KernelMapping(remote_ptr start, remote_ptr end, const std::string& fsname, dev_t device, ino_t inode, int prot, int flags, off64_t offset = 0) : MemoryRange(start, end), fsname_(fsname), device_(device), inode_(inode), prot_(prot), flags_(flags & map_flags_mask), offset(offset) { assert_valid(); } KernelMapping(const KernelMapping& o) : MemoryRange(o), fsname_(o.fsname_), device_(o.device_), inode_(o.inode_), prot_(o.prot_), flags_(o.flags_), offset(o.offset) { assert_valid(); } KernelMapping operator=(const KernelMapping& o) { this->~KernelMapping(); new (this) KernelMapping(o); return *this; } void assert_valid() const { DEBUG_ASSERT(end() >= start()); DEBUG_ASSERT(size() % page_size() == 0); DEBUG_ASSERT(!(flags_ & ~map_flags_mask)); DEBUG_ASSERT(offset % page_size() == 0); } KernelMapping extend(remote_ptr end) const { DEBUG_ASSERT(end >= MemoryRange::end()); return KernelMapping(start(), end, fsname_, device_, inode_, prot_, flags_, offset); } KernelMapping set_range(remote_ptr start, remote_ptr end) const { return KernelMapping(start, end, fsname_, device_, inode_, prot_, flags_, offset); } KernelMapping subrange(remote_ptr start, remote_ptr end) const { DEBUG_ASSERT(start >= MemoryRange::start() && end <= MemoryRange::end()); return KernelMapping( start, end, fsname_, device_, inode_, prot_, flags_, offset + (is_real_device() ? start - MemoryRange::start() : 0)); } KernelMapping set_prot(int prot) const { return KernelMapping(start(), end(), fsname_, device_, inode_, prot, flags_, offset); } /** * Dump a representation of |this| to a string in a format * similar to the former part of /proc/[tid]/maps. */ std::string str() const { char str[200]; sprintf(str, "%8p-%8p %c%c%c%c %08" PRIx64 " %02x:%02x %-10ld ", (void*)start().as_int(), (void*)end().as_int(), (PROT_READ & prot_) ? 'r' : '-', (PROT_WRITE & prot_) ? 'w' : '-', (PROT_EXEC & prot_) ? 'x' : '-', (MAP_SHARED & flags_) ? 's' : 'p', offset, (int)MAJOR(device()), (int)MINOR(device()), (long)inode()); return str + fsname(); } const std::string& fsname() const { return fsname_; } std::string fsname_strip_deleted() const; dev_t device() const { return device_; } ino_t inode() const { return inode_; } int prot() const { return prot_; } int flags() const { return flags_; } uint64_t file_offset_bytes() const { return offset; } /** * Return true if this file is/was backed by an external * device, as opposed to a transient RAM mapping. */ bool is_real_device() const { return device() > NO_DEVICE; } bool is_vdso() const { return fsname() == "[vdso]"; } bool is_heap() const { return fsname() == "[heap]"; } bool is_stack() const { return fsname().find("[stack") == 0; } bool is_vvar() const { return fsname() == "[vvar]"; } bool is_vsyscall() const { return fsname() == "[vsyscall]"; } struct stat fake_stat() const { struct stat fake_stat; memset(&fake_stat, 0, sizeof(fake_stat)); fake_stat.st_dev = device(); fake_stat.st_ino = inode(); fake_stat.st_size = size(); return fake_stat; } private: // The kernel's name for the mapping, as per /proc//maps. This must // be exactly correct. const std::string fsname_; // Note that btrfs has weird behavior and /proc/.../maps can show a different // device number to the device from stat()ing the file that was mapped. // https://www.mail-archive.com/linux-btrfs@vger.kernel.org/msg57667.html // We store here the device number obtained from fstat()ing the file. // This also seems to be consistent with what we read from populate_address_space // for the initial post-exec mappings. It is NOT consistent with what we get // from reading /proc/.../maps for non-initial mappings. dev_t device_; ino_t inode_; const int prot_; const int flags_; const uint64_t offset; }; inline std::ostream& operator<<(std::ostream& o, const KernelMapping& m) { o << m.str(); return o; } /** * Compare |a| and |b| so that "subset" lookups will succeed. What * does that mean? If |a| and |b| overlap (intersect), then this * comparator considers them equivalent. That means that if |a| * represents one byte within a mapping |b|, then |a| and |b| will be * considered equivalent. * * If |a| and |b| don't overlap, return true if |a|'s start address is * less than |b|'s/ */ struct MappingComparator { bool operator()(const MemoryRange& a, const MemoryRange& b) const { return !a.intersects(b) && a.start() < b.start(); } }; enum BreakpointType { BKPT_NONE = 0, // Trap for internal rr purposes, f.e. replaying async // signals. BKPT_INTERNAL, // Trap on behalf of a debugger user. BKPT_USER, }; enum WatchType { // NB: these random-looking enumeration values are chosen to // match the numbers programmed into x86 debug registers. WATCH_EXEC = 0x00, WATCH_WRITE = 0x01, WATCH_READWRITE = 0x03 }; enum ArmWatchType { ARM_WATCH_EXEC = 0x0, ARM_WATCH_READ = 0x1, ARM_WATCH_WRITE = 0x2, ARM_WATCH_READWRITE = ARM_WATCH_READ | ARM_WATCH_WRITE }; enum ArmPrivLevel { ARM_PRIV_EL0 = 0x2 }; enum DebugStatus { DS_WATCHPOINT_ANY = 0xf, DS_SINGLESTEP = 1 << 14, }; /** * A distinct watchpoint, corresponding to the information needed to * program a single hardware watchpoint. */ struct WatchConfig { WatchConfig(remote_ptr addr, size_t num_bytes, WatchType type) : addr(addr), num_bytes(num_bytes), type(type) {} remote_ptr addr; size_t num_bytes; WatchType type; bool operator==(const WatchConfig& other) const { return addr == other.addr && num_bytes == other.num_bytes && type == other.type; } bool operator!=(const WatchConfig& other) const { return !(*this == other); } }; /** * Models the address space for a set of tasks. This includes the set * of mapped pages, and the resources those mappings refer to. */ class AddressSpace : public HasTaskSet { friend class Session; friend struct VerifyAddressSpace; public: class Mapping { public: Mapping(const KernelMapping& map, const KernelMapping& recorded_map, EmuFile::shr_ptr emu_file = nullptr, std::unique_ptr mapped_file_stat = nullptr, void* local_addr = nullptr, std::shared_ptr&& monitored = nullptr); ~Mapping(); Mapping(const Mapping&); Mapping() : local_addr(nullptr), flags(0) {} const Mapping& operator=(const Mapping& other) { this->~Mapping(); new (this) Mapping(other); return *this; } const KernelMapping map; // The corresponding KernelMapping in the recording. During recording, // equal to 'map'. const KernelMapping recorded_map; const EmuFile::shr_ptr emu_file; std::unique_ptr mapped_file_stat; // If this mapping has been mapped into the local address space, // this is the address of the first byte of the equivalent local mapping. // This mapping is always mapped as PROT_READ|PROT_WRITE regardless of the // mapping's permissions in the tracee. Also note that it is the caller's // responsibility to keep this alive at least as long as this mapping is // present in the address space. uint8_t* local_addr; const std::shared_ptr monitored_shared_memory; // Flags indicate mappings that require special handling. Adjacent mappings // may only be merged if their `flags` value agree. enum : uint32_t { FLAG_NONE = 0x0, // This mapping represents a syscallbuf. It needs to handled specially // during checksumming since its contents are not fully restored by the // replay. IS_SYSCALLBUF = 0x1, // This mapping is used as our thread-local variable area for this // address space IS_THREAD_LOCALS = 0x2, // This mapping is used for syscallbuf patch stubs IS_PATCH_STUBS = 0x4, // This mapping is the rr page IS_RR_PAGE = 0x8 }; uint32_t flags; }; typedef std::map MemoryMap; typedef std::shared_ptr shr_ptr; virtual ~AddressSpace(); /** * Call this after a successful execve syscall has completed. At this point * it is safe to perform remote syscalls. */ void post_exec_syscall(Task* t); /** * Change the program data break of this address space to * |addr|. Only called during recording! */ void brk(Task* t, remote_ptr addr, int prot); /** * This can only be called during recording. */ remote_ptr current_brk() const { DEBUG_ASSERT(!brk_end.is_null()); return brk_end; } /** * Dump a representation of |this| to stderr in a format * similar to /proc/[tid]/maps. * * XXX/ostream-ify me. */ void dump() const; /** * Return tid of the first task for this address space. */ pid_t leader_tid() const { return leader_tid_; } /** * Return AddressSpaceUid for this address space. */ AddressSpaceUid uid() const { return AddressSpaceUid(leader_tid_, leader_serial, exec_count); } Session* session() const { return session_; } SupportedArch arch() const; /** * Return the path this address space was exec()'d with. */ const std::string& exe_image() const { return exe; } const std::string& interp_name() const { return interp_name_; } void set_interp_name(std::string name) { interp_name_ = name; } remote_ptr interp_base() const { return interp_base_; } void set_interp_base(remote_ptr base) { interp_base_ = base; } /** * Assuming the last retired instruction has raised a SIGTRAP * and might be a breakpoint trap instruction, return the type * of breakpoint set at |ip() - sizeof(breakpoint_insn)|, if * one exists. Otherwise return TRAP_NONE. */ BreakpointType get_breakpoint_type_for_retired_insn(remote_code_ptr ip); /** * Return the type of breakpoint that's been registered for * |addr|. */ BreakpointType get_breakpoint_type_at_addr(remote_code_ptr addr); /** * Check if the user has placed a hardware EXEC watchpoint at addr. */ bool is_exec_watchpoint(remote_code_ptr addr); /** * Returns true when the breakpoint at |addr| is in private * non-writeable memory. When this returns true, the breakpoint can't be * overwritten by the tracee without an intervening mprotect or mmap * syscall. */ bool is_breakpoint_in_private_read_only_memory(remote_code_ptr addr); /** * Return true if there's a breakpoint instruction at |ip|. This might * be an explicit instruction, even if there's no breakpoint set via our API. */ bool is_breakpoint_instruction(Task* t, remote_code_ptr ip); /** * The buffer |dest| of length |length| represents the contents of tracee * memory at |addr|. Replace the bytes in |dest| that have been overwritten * by breakpoints with the original data that was replaced by the breakpoints. */ void replace_breakpoints_with_original_values(uint8_t* dest, size_t length, remote_ptr addr); /** * Map |num_bytes| into this address space at |addr|, with * |prot| protection and |flags|. The pages are (possibly * initially) backed starting at |offset| of |res|. |fsname|, |device| and * |inode| are values that will appear in the /proc//maps entry. * |mapped_file_stat| is a complete copy of the 'stat' data for the mapped * file, or null if this isn't a file mapping or isn't during recording. * |*recorded_map| is the mapping during recording, or null if the mapping * during recording is known to be the same as the new map (e.g. because * we are recording!). * |local_addr| is the local address of the memory shared with the tracee, * or null if it's not shared with the tracee. AddressSpace takes ownership * of the shared memory and is responsible for unmapping it. */ KernelMapping map( Task* t, remote_ptr addr, size_t num_bytes, int prot, int flags, off64_t offset_bytes, const std::string& fsname, dev_t device = KernelMapping::NO_DEVICE, ino_t inode = KernelMapping::NO_INODE, std::unique_ptr mapped_file_stat = nullptr, const KernelMapping* recorded_map = nullptr, EmuFile::shr_ptr emu_file = nullptr, void* local_addr = nullptr, std::shared_ptr monitored = nullptr); /** * Return the mapping and mapped resource for the byte at address 'addr'. * There must be such a mapping. */ const Mapping& mapping_of(remote_ptr addr) const; /** * Detach local mapping and return it. */ void* detach_local_mapping(remote_ptr addr); /** * Return a reference to the flags of the mapping at this address, allowing * manipulation. There must exist a mapping at `addr`. */ uint32_t& mapping_flags_of(remote_ptr addr); /** * Return true if there is some mapping for the byte at 'addr'. */ bool has_mapping(remote_ptr addr) const; /** * If the given memory region is mapped into the local address space, obtain * the local address from which the `size` bytes at `addr` can be accessed. */ uint8_t* local_mapping(remote_ptr addr, size_t size); /** * Return true if the rr page is mapped at its expected address. */ bool has_rr_page() const; /** * Object that generates robust iterators through the memory map. The * memory map can be updated without invalidating iterators, as long as * Mappings are not added or removed. */ class Maps { public: Maps(const AddressSpace& outer, remote_ptr start) : outer(outer), start(start) {} class iterator { public: iterator(const iterator& it) = default; const iterator& operator++() { ptr = to_it()->second.map.end(); return *this; } bool operator==(const iterator& other) const { return to_it() == other.to_it(); } bool operator!=(const iterator& other) const { return !(*this == other); } const Mapping* operator->() const { return &to_it()->second; } const Mapping& operator*() const { return to_it()->second; } iterator& operator=(const iterator& other) { this->~iterator(); new (this) iterator(other); return *this; } private: friend class Maps; iterator(const MemoryMap& outer, remote_ptr ptr) : outer(outer), ptr(ptr), at_end(false) {} iterator(const MemoryMap& outer) : outer(outer), at_end(true) {} MemoryMap::const_iterator to_it() const { return at_end ? outer.end() : outer.lower_bound(MemoryRange(ptr, ptr)); } const MemoryMap& outer; remote_ptr ptr; bool at_end; }; iterator begin() const { return iterator(outer.mem, start); } iterator end() const { return iterator(outer.mem); } private: const AddressSpace& outer; remote_ptr start; }; friend class Maps; Maps maps() const { return Maps(*this, remote_ptr()); } Maps maps_starting_at(remote_ptr start) { return Maps(*this, start); } Maps maps_containing_or_after(remote_ptr start) { if (has_mapping(start)) { return Maps(*this, mapping_of(start).map.start()); } else { return Maps(*this, start); } } const std::set>& monitored_addrs() const { return monitored_mem; } /** * Change the protection bits of [addr, addr + num_bytes) to * |prot|. */ void protect(Task* t, remote_ptr addr, size_t num_bytes, int prot); /** * Fix up mprotect registers parameters to take account of PROT_GROWSDOWN. */ void fixup_mprotect_growsdown_parameters(Task* t); /** * Move the mapping [old_addr, old_addr + old_num_bytes) to * [new_addr, old_addr + new_num_bytes), preserving metadata. */ void remap(Task* t, remote_ptr old_addr, size_t old_num_bytes, remote_ptr new_addr, size_t new_num_bytes, int flags); /** * Notify that data was written to this address space by rr or * by the kernel. * |flags| can contain values from Task::WriteFlags. */ void notify_written(remote_ptr addr, size_t num_bytes, uint32_t flags); /** Ensure a breakpoint of |type| is set at |addr|. */ bool add_breakpoint(remote_code_ptr addr, BreakpointType type); /** * Remove a |type| reference to the breakpoint at |addr|. If * the removed reference was the last, the breakpoint is * destroyed. */ void remove_breakpoint(remote_code_ptr addr, BreakpointType type); /** * Destroy all breakpoints in this VM, regardless of their * reference counts. */ void remove_all_breakpoints(); /** * Temporarily remove the breakpoint at |addr|. */ void suspend_breakpoint_at(remote_code_ptr addr); /** * Restore any temporarily removed breakpoint at |addr|. */ void restore_breakpoint_at(remote_code_ptr addr); /** * Manage watchpoints. Analogous to breakpoint-managing * methods above, except that watchpoints can be set for an * address range. */ bool add_watchpoint(remote_ptr addr, size_t num_bytes, WatchType type); void remove_watchpoint(remote_ptr addr, size_t num_bytes, WatchType type); void remove_all_watchpoints(); std::vector all_watchpoints() { return get_watchpoints_internal(ALL_WATCHPOINTS, UNALIGNED, DONT_UPDATE_WATCHPOINT_REGISTER_ASSIGNMENTS); } /** * Save all watchpoint state onto a stack. */ void save_watchpoints(); /** * Pop all watchpoint state from the saved-state stack. */ bool restore_watchpoints(); /** * Notify that at least one watchpoint was hit --- recheck them all. * Returns true if any watchpoint actually triggered. Note that * debug_status can indicate a hit watchpoint that doesn't actually * trigger, because the value of a write-watchpoint did not change. * Likewise, debug_status can indicate a watchpoint wasn't hit that * actually was (because in some configurations, e.g. VMWare * hypervisor with 32-bit x86 guest, debug_status watchpoint bits * are known to not be set on singlestep). */ bool notify_watchpoint_fired(uintptr_t debug_status, remote_ptr hit_addr, remote_code_ptr address_of_singlestep_start); /** * Return true if any watchpoint has fired. Will keep returning true until * consume_watchpoint_changes() is called. */ bool has_any_watchpoint_changes(); /** * Return true if an EXEC watchpoint has fired at addr since the last * consume_watchpoint_changes. */ bool has_exec_watchpoint_fired(remote_code_ptr addr); /** * Return all changed watchpoints in |watches| and clear their changed flags. */ std::vector consume_watchpoint_changes() { return get_watchpoints_internal(CHANGED_WATCHPOINTS, UNALIGNED, DONT_UPDATE_WATCHPOINT_REGISTER_ASSIGNMENTS); } /** * Get hardware watchpoint assignments. */ std::vector get_hw_watchpoints() { return get_watchpoints_internal(ALL_WATCHPOINTS, ALIGNED, DONT_UPDATE_WATCHPOINT_REGISTER_ASSIGNMENTS); } void set_shm_size(remote_ptr addr, size_t bytes) { shm_sizes[addr] = bytes; } /** * Dies if no shm size is registered for the address. */ size_t get_shm_size(remote_ptr addr) { return shm_sizes[addr]; } void remove_shm_size(remote_ptr addr) { shm_sizes.erase(addr); } /** * Make [addr, addr + num_bytes) inaccessible within this * address space. */ void unmap(Task* t, remote_ptr addr, ssize_t num_bytes); /** * Notification of madvise call. */ void advise(Task* t, remote_ptr addr, ssize_t num_bytes, int advice); /** Return the vdso mapping of this. */ KernelMapping vdso() const; bool has_vdso() const { return has_mapping(vdso_start_addr); } /** * Verify that this cached address space matches what the * kernel thinks it should be. */ void verify(Task* t) const; bool has_breakpoints() { return !breakpoints.empty(); } bool has_watchpoints() { return !watchpoints.empty(); } ScopedFd& mem_fd() { return child_mem_fd; } void set_mem_fd(ScopedFd&& fd) { child_mem_fd = std::move(fd); } ScopedFd& pagemap_fd() { return child_pagemap_fd; } void set_pagemap_fd(ScopedFd&& fd) { child_pagemap_fd = std::move(fd); } Monkeypatcher& monkeypatcher() { DEBUG_ASSERT(monkeypatch_state); return *monkeypatch_state; } void at_preload_init(Task* t); /* The address of the syscall instruction from which traced syscalls made by * the syscallbuf will originate. */ remote_code_ptr traced_syscall_ip() const { return traced_syscall_ip_; } /* The address of the syscall instruction from which privileged traced * syscalls made by the syscallbuf will originate. */ remote_code_ptr privileged_traced_syscall_ip() const { return privileged_traced_syscall_ip_; } bool syscallbuf_enabled() const { return syscallbuf_enabled_; } /** * We'll map a page of memory here into every exec'ed process for our own * use. */ static remote_ptr rr_page_start() { return RR_PAGE_ADDR; } static remote_ptr rr_page_end() { return rr_page_start() + PRELOAD_LIBRARY_PAGE_SIZE; } static remote_ptr preload_thread_locals_start() { return rr_page_start() + PRELOAD_LIBRARY_PAGE_SIZE; } static uint32_t preload_thread_locals_size() { return PRELOAD_THREAD_LOCALS_SIZE; } enum Traced { TRACED, UNTRACED }; enum Privileged { PRIVILEGED, UNPRIVILEGED }; /** * Depending on which entry point this is and whether or not we're recording * or replaying, the instruction in the rr page, may be something other than * a syscall. This enum encodes the combination of instructions for each entry * point: * * Enabled | Record | Replay * ---------------------|---------|------- * RECORDING_ONLY | syscall | nop * REPLAY_ONLY | nop | syscall * RECORDING_AND_REPLAY | syscall | syscall * REPLAY_ASSIST | syscall | int3 * * The REPLAY_ASSIST is used for a syscall that is untraced during record (so * we can save the context switch penalty), but requires us to apply side * effects during replay. The int3 lets the replayer stop and apply these * at the appropriate point. */ enum Enabled { RECORDING_ONLY, REPLAY_ONLY, RECORDING_AND_REPLAY, REPLAY_ASSIST }; static remote_code_ptr rr_page_syscall_exit_point(Traced traced, Privileged privileged, Enabled enabled, SupportedArch arch); static remote_code_ptr rr_page_syscall_entry_point(Traced traced, Privileged privileged, Enabled enabled, SupportedArch arch); struct SyscallType { Traced traced; Privileged privileged; Enabled enabled; }; static std::vector rr_page_syscalls(); static const SyscallType* rr_page_syscall_from_exit_point( SupportedArch arch, remote_code_ptr ip); static const SyscallType* rr_page_syscall_from_entry_point( SupportedArch arch, remote_code_ptr ip); /** * Return a pointer to 8 bytes of 0xFF. * (Currently only set during record / not part of the ABI) */ static remote_ptr rr_page_record_ff_bytes() { return RR_PAGE_FF_BYTES; } /** * Locate a syscall instruction in t's VDSO. * This gives us a way to execute remote syscalls without having to write * a syscall instruction into executable tracee memory (which might not be * possible with some kernels, e.g. PaX). */ remote_code_ptr find_syscall_instruction(Task* t); /** * Task |t| just forked from this address space. Apply dont_fork and * wipe_on_fork settings. */ void did_fork_into(Task* t); void set_first_run_event(FrameTime event) { first_run_event_ = event; } FrameTime first_run_event() { return first_run_event_; } const std::vector& saved_auxv() { return saved_auxv_; } void save_auxv(Task* t); remote_ptr saved_interpreter_base() { return saved_interpreter_base_; } void save_interpreter_base(Task* t, std::vector auxv); std::string saved_ld_path() { return saved_ld_path_;} void save_ld_path(Task* t, remote_ptr); void read_mm_map(Task* t, NativeArch::prctl_mm_map* map); /** * Reads the /proc//maps entry for a specific address. Does no caching. * If performed on a file in a btrfs file system, this may return the * wrong device number! If you stick to anonymous or special file * mappings, this should be OK. */ KernelMapping read_kernel_mapping(Task* t, remote_ptr addr); /** * Same as read_kernel_mapping, but reads rr's own memory map. */ static KernelMapping read_local_kernel_mapping(uint8_t* addr); static uint32_t chaos_mode_min_stack_size() { return 8 * 1024 * 1024; } /* Returns null if we should return ENOMEM because there is no free space available. */ remote_ptr chaos_mode_find_free_memory(RecordTask* t, size_t len, remote_ptr hint); enum class FindFreeMemoryPolicy { /* Use the first free memory after `after` */ STRICT_SEARCH, /* Optimize for speed by starting the search from the address of the last area returned by find_free_memory (if greater than `after`). */ USE_LAST_FREE_HINT, }; remote_ptr find_free_memory(Task* t, size_t len, remote_ptr after = remote_ptr(), FindFreeMemoryPolicy policy = FindFreeMemoryPolicy::STRICT_SEARCH); /** * The return value indicates whether we (re)created the preload_thread_locals * area. */ bool post_vm_clone(Task* t); /** * TaskUid for the task whose locals are stored in the preload_thread_locals * area. */ const TaskUid& thread_locals_tuid() { return thread_locals_tuid_; } void set_thread_locals_tuid(const TaskUid& tuid) { thread_locals_tuid_ = tuid; } /** * Call this when the memory at [addr,addr+len) was externally overwritten. * This will attempt to update any breakpoints that may be set within the * range (resetting them and storing the new value). */ void maybe_update_breakpoints(Task* t, remote_ptr addr, size_t len); /** * Call this to ensure that the mappings in `range` during replay has the same length * is collapsed to a single mapping. The caller guarantees that all the * mappings in the range can be coalesced (because they corresponded to a single * mapping during recording). * The end of the range might be in the middle of a mapping. * The start of the range might also be in the middle of a mapping. */ void ensure_replay_matches_single_recorded_mapping(Task* t, MemoryRange range); /** * Print process maps. */ static void print_process_maps(Task* t); void add_stap_semaphore_range(Task* t, MemoryRange range); void remove_stap_semaphore_range(Task* t, MemoryRange range); bool is_stap_semaphore(remote_ptr addr); bool legacy_breakpoint_mode() { return stopping_breakpoint_table_ != nullptr; } remote_code_ptr do_breakpoint_fault_addr() { return do_breakpoint_fault_addr_; } remote_code_ptr stopping_breakpoint_table() { return stopping_breakpoint_table_; } int stopping_breakpoint_table_entry_size() { return stopping_breakpoint_table_entry_size_; } // Also sets brk_ptr. enum { RRVDSO_PAGE_OFFSET = 2, RRPAGE_RECORD_PAGE_OFFSET = 3, RRPAGE_REPLAY_PAGE_OFFSET = 4 }; void map_rr_page(AutoRemoteSyscalls& remote); struct UnmapOptions { bool exclude_vdso_vvar; UnmapOptions() : exclude_vdso_vvar(false) {} }; void unmap_all_but_rr_mappings(AutoRemoteSyscalls& remote, UnmapOptions options = UnmapOptions()); void erase_task(Task* t) { this->HasTaskSet::erase_task(t); if (task_set().size() != 0) { fd_tables_changed(); } } /** * Called when the set of different fd tables associated with tasks * in this address space may have changed (e.g. a task changed its fd table, * or a task got added or removed, etc). */ void fd_tables_changed(); static MemoryRange get_global_exclusion_range(const RecordSession* session); // Whether to return WatchConfigs consisting of only aligned locations // suitable for hardware watchpoint registers. enum WatchpointAlignment { UNALIGNED, ALIGNED }; // Returns true if the range is completely covered by private mappings bool range_is_private_mapping(const MemoryRange& range) const; private: struct Breakpoint; typedef std::map BreakpointMap; class Watchpoint; /** * Called after a successful execve to set up the new AddressSpace. * Also called once for the initial spawn. */ AddressSpace(Task* t, const std::string& exe, uint32_t exec_count); /** * Called when an AddressSpace is cloned due to a fork() or a Session * clone. After this, and the task is properly set up, post_vm_clone will * be called. */ AddressSpace(Session* session, const AddressSpace& o, pid_t leader_tid, uint32_t leader_serial, uint32_t exec_count); /** * After an exec, populate the new address space of |t| with * the existing mappings we find in /proc/maps. */ void populate_address_space(Task* t); void unmap_internal(Task* t, remote_ptr addr, ssize_t num_bytes); bool update_watchpoint_value(const MemoryRange& range, Watchpoint& watchpoint); void update_watchpoint_values(remote_ptr start, remote_ptr end); // Whether to handle all watchpoints or just data watchpoints whose data // has changed. In the latter case we clear their changed status. enum WatchpointFilter { ALL_WATCHPOINTS, CHANGED_WATCHPOINTS }; // Whether to update the watchpoint's assigned register list. Use // UPDATE_WATCHPOINT_REGISTER_ASSIGNMENTS when we'll use the watchpoints // to configure HW watchpoint registers. enum UpdateWatchpointRegisterAssignments { UPDATE_WATCHPOINT_REGISTER_ASSIGNMENTS, DONT_UPDATE_WATCHPOINT_REGISTER_ASSIGNMENTS }; std::vector get_watchpoints_internal(WatchpointFilter filter, WatchpointAlignment alignment, UpdateWatchpointRegisterAssignments update_watchpoint_register_assignments); /** * Construct a minimal set of watchpoints to be enabled based * on |set_watchpoint()| calls, and program them for each task * in this address space. */ bool allocate_watchpoints(); /** * Merge the mappings adjacent to |it| in memory that are * semantically "adjacent mappings" of the same resource as * well, for example have adjacent file offsets and the same * prot and flags. */ void coalesce_around(Task* t, MemoryMap::iterator it); /** * Erase |it| from |breakpoints| and restore any memory in * this it may have overwritten. */ void destroy_breakpoint(BreakpointMap::const_iterator it); /** * For each mapped segment overlapping [addr, addr + * num_bytes), call |f|. Pass |f| the overlapping mapping, * the mapped resource, and the range of addresses remaining * to be iterated over. * * Pass |ITERATE_CONTIGUOUS| to stop iterating when the last * contiguous mapping after |addr| within the region is seen. * Default is to iterate all mappings in the region. * * The callback takes parameters by value to avoid dangling * references if the memory map is modified inside the callback. */ enum { ITERATE_DEFAULT, ITERATE_CONTIGUOUS }; void for_each_in_range( remote_ptr addr, ssize_t num_bytes, std::function f, int how = ITERATE_DEFAULT); /** * Map |m| of |r| into this address space, and coalesce any * mappings of |r| that are adjacent to |m|. */ void map_and_coalesce(Task* t, const KernelMapping& m, const KernelMapping& recorded_map, EmuFile::shr_ptr emu_file, std::unique_ptr mapped_file_stat, void* local_addr, std::shared_ptr monitored); void remove_from_map(const MemoryRange& range) { mem.erase(range); monitored_mem.erase(range.start()); } void add_to_map(const Mapping& m) { mem[m.map] = m; if (m.monitored_shared_memory) { monitored_mem.insert(m.map.start()); } } /** * Call this only during recording. */ template void at_preload_init_arch(Task* t); enum { EXEC_BIT = 1 << 0, READ_BIT = 1 << 1, WRITE_BIT = 1 << 2 }; /** Return the access bits above needed to watch |type|. */ static int access_bits_of(WatchType type); /** * Represents a refcount set on a particular address. Because there * can be multiple refcounts of multiple types set on a single * address, Breakpoint stores explicit USER and INTERNAL breakpoint * refcounts. Clients adding/removing breakpoints at this addr must * call ref()/unref() as appropriate. */ struct Breakpoint { Breakpoint() : internal_count(0), user_count(0) {} Breakpoint(const Breakpoint& o) = default; // AddressSpace::destroy_all_breakpoints() can cause this // destructor to be invoked while we have nonzero total // refcount, so the most we can DEBUG_ASSERT is that the refcounts // are valid. ~Breakpoint() { DEBUG_ASSERT(internal_count >= 0 && user_count >= 0); } void ref(BreakpointType which) { DEBUG_ASSERT(internal_count >= 0 && user_count >= 0); ++*counter(which); } int unref(BreakpointType which) { DEBUG_ASSERT(internal_count > 0 || user_count > 0); --*counter(which); DEBUG_ASSERT(internal_count >= 0 && user_count >= 0); return internal_count + user_count; } BreakpointType type() const { // NB: USER breakpoints need to be processed before // INTERNAL ones. We want to give the debugger a // chance to dispatch commands before we attend to the // internal rr business. So if there's a USER "ref" // on the breakpoint, treat it as a USER breakpoint. return user_count > 0 ? BKPT_USER : BKPT_INTERNAL; } uint8_t* original_data() { return overwritten_data; } // "Refcounts" of breakpoints set at |addr|. The breakpoint // object must be unique since we have to save the overwritten // data, and we can't enforce the order in which breakpoints // are set/removed. int internal_count, user_count; uint8_t overwritten_data[MAX_BKPT_INSTRUCTION_LENGTH]; int* counter(BreakpointType which) { DEBUG_ASSERT(BKPT_INTERNAL == which || BKPT_USER == which); int* p = BKPT_USER == which ? &user_count : &internal_count; DEBUG_ASSERT(*p >= 0); return p; } }; // XXX one is tempted to merge Breakpoint and Watchpoint into a single // entity, but the semantics are just different enough that separate // objects are easier for now. /** * Track the watched accesses of a contiguous range of memory * addresses. */ class Watchpoint { public: Watchpoint(size_t num_bytes) : exec_count(0), read_count(0), write_count(0), value_bytes(num_bytes), valid(false), changed(false) {} Watchpoint(const Watchpoint&) = default; ~Watchpoint() { assert_valid(); } void watch(int which) { assert_valid(); exec_count += (EXEC_BIT & which) != 0; read_count += (READ_BIT & which) != 0; write_count += (WRITE_BIT & which) != 0; } int unwatch(int which) { assert_valid(); if (EXEC_BIT & which) { DEBUG_ASSERT(exec_count > 0); --exec_count; } if (READ_BIT & which) { DEBUG_ASSERT(read_count > 0); --read_count; } if (WRITE_BIT & which) { DEBUG_ASSERT(write_count > 0); --write_count; } return exec_count + read_count + write_count; } int watched_bits() const { return (exec_count > 0 ? EXEC_BIT : 0) | (read_count > 0 ? READ_BIT : 0) | (write_count > 0 ? WRITE_BIT : 0); } void assert_valid() const { DEBUG_ASSERT(exec_count >= 0 && read_count >= 0 && write_count >= 0); } // Watchpoints stay alive until all watched access typed have // been cleared. We track refcounts of each watchable access // separately. int exec_count, read_count, write_count; // Debug registers allocated for read/exec access checking. // Write watchpoints are always triggered by checking for actual memory // value changes. Read/exec watchpoints can't be triggered that way, so // we look for these registers being triggered instead. std::vector debug_regs_for_exec_read; std::vector value_bytes; bool valid; bool changed; }; // All breakpoints set in this VM. BreakpointMap breakpoints; /* Path of the real executable image this address space was * exec()'d with. */ std::string exe; /* Path of the interpreter, if any, of exe. */ std::string interp_name_; /* Base address of the interpreter (might be null!) */ remote_ptr interp_base_; /* Pid of first task for this address space */ pid_t leader_tid_; /* Serial number of first task for this address space */ uint32_t leader_serial; uint32_t exec_count; // Only valid during recording remote_ptr brk_start; /* Current brk. Not necessarily page-aligned. */ remote_ptr brk_end; /* All segments mapped into this address space. */ MemoryMap mem; /* Sizes of SYSV shm segments, by address. We use this to determine the size * of memory regions unmapped via shmdt(). */ std::map, size_t> shm_sizes; std::set> monitored_mem; /* madvise DONTFORK regions */ std::set dont_fork; /* madvise WIPEONFORK regions */ std::set wipe_on_fork; // The session that created this. We save a ref to it so that // we can notify it when we die. Session* session_; // tid of the task whose thread-locals are in preload_thread_locals TaskUid thread_locals_tuid_; /* First mapped byte of the vdso. */ remote_ptr vdso_start_addr; // The monkeypatcher that's handling this address space. std::unique_ptr monkeypatch_state; // The watchpoints set for tasks in this VM. Watchpoints are // programmed per Task, but we track them per address space on // behalf of debuggers that assume that model. std::map watchpoints; std::vector> saved_watchpoints; // Tracee memory is read and written through this fd, which is // opened for the tracee's magic /proc/[tid]/mem device. The // advantage of this over ptrace is that we can access it even // when the tracee isn't at a ptrace-stop. It's also // theoretically faster for large data transfers, which rr can // do often. // // Users of child_mem_fd should fall back to ptrace-based memory // access when child_mem_fd is not open. ScopedFd child_mem_fd; remote_code_ptr traced_syscall_ip_; remote_code_ptr privileged_traced_syscall_ip_; bool syscallbuf_enabled_; remote_code_ptr do_breakpoint_fault_addr_; // These fields are deprecated and have been replaced by the // breakpoint_value mechanism. They are retained for replayability // of old traces. remote_code_ptr stopping_breakpoint_table_; int stopping_breakpoint_table_entry_size_; std::vector saved_auxv_; remote_ptr saved_interpreter_base_; std::string saved_ld_path_; remote_ptr last_free_memory; /** * The time of the first event that ran code for a task in this address space. * 0 if no such event has occurred. */ FrameTime first_run_event_; std::set> stap_semaphores; /** * For each architecture, the offset of a syscall instruction with that * architecture's VDSO, or 0 if not known. */ static uint32_t offset_to_syscall_in_vdso[SupportedArch_MAX + 1]; /** * Ensure that the cached mapping of |t| matches /proc/maps, * using adjacent-map-merging heuristics that are as lenient * as possible given the data available from /proc/maps. */ static void check_segment_iterator(void* vasp, Task* t, const struct map_iterator_data* data); AddressSpace operator=(const AddressSpace&) = delete; ScopedFd child_pagemap_fd; }; /** * The following helper is used to iterate over a tracee's memory * map. */ class KernelMapIterator { public: KernelMapIterator(Task* t, bool* ok = nullptr); KernelMapIterator(pid_t tid, bool* ok = nullptr) : tid(tid) { init(ok); } ~KernelMapIterator(); // It's very important to keep in mind that btrfs files can have the wrong // device number! const KernelMapping& current(std::string* raw_line = nullptr) { if (raw_line) { *raw_line = this->raw_line; } return km; } bool at_end() { return !maps_file; } void operator++(); private: void init(bool* ok = nullptr); pid_t tid; FILE* maps_file; std::string raw_line; KernelMapping km; }; } // namespace rr #endif /* RR_ADDRESS_SPACE_H_ */ rr-5.7.0/src/AutoRemoteSyscalls.cc000066400000000000000000001021061450675474200170640ustar00rootroot00000000000000/* -*- Mode: C++; tab-width: 8; c-basic-offset: 2; indent-tabs-mode: nil; -*- */ #include "AutoRemoteSyscalls.h" #include #include #include #include #include #include "rr/rr.h" #include "RecordSession.h" #include "RecordTask.h" #include "ReplaySession.h" #include "Session.h" #include "Task.h" #include "core.h" #include "kernel_abi.h" #include "kernel_metadata.h" #include "log.h" #include "util.h" using namespace std; namespace rr { /** * The ABI of the socketcall syscall is a nightmare; the first arg to * the kernel is the sub-operation, and the second argument is a * pointer to the args. The args depend on the sub-op. */ template struct socketcall_args { typename Arch::signed_long args[3]; } __attribute__((packed)); void AutoRestoreMem::init(const void* mem, ssize_t num_bytes) { ASSERT(remote.task(), remote.enable_mem_params() == AutoRemoteSyscalls::ENABLE_MEMORY_PARAMS) << "Memory parameters were disabled"; len = num_bytes; saved_sp = remote.regs().sp(); remote.regs().set_sp(remote.regs().sp() - len); remote.task()->set_regs(remote.regs()); if (remote.task()->is_exiting()) { // Leave addr == nullptr return; } addr = remote.regs().sp(); data.resize(len); bool ok = true; remote.task()->read_bytes_helper(addr, len, data.data(), &ok); if (mem) { remote.task()->write_bytes_helper(addr, len, mem, &ok); } if (!ok) { addr = nullptr; } } AutoRestoreMem::~AutoRestoreMem() { DEBUG_ASSERT(saved_sp == remote.regs().sp() + len); if (addr) { // XXX what should we do if this task was sigkilled but the address // space is used by other live tasks? remote.task()->write_bytes_helper(addr, len, data.data()); } remote.regs().set_sp(remote.regs().sp() + len); remote.task()->set_regs(remote.regs()); } static bool is_SIGTRAP_default_and_unblocked(Task* t) { if (!t->session().is_recording()) { return true; } RecordTask* rt = static_cast(t); return rt->sig_disposition(SIGTRAP) == SIGNAL_DEFAULT && !rt->is_sig_blocked(SIGTRAP); } AutoRemoteSyscalls::AutoRemoteSyscalls(Task* t, MemParamsEnabled enable_mem_params) : t(t), initial_regs(t->regs()), initial_ip(t->ip()), initial_sp(t->regs().sp()), initial_at_seccomp(t->ptrace_event() == PTRACE_EVENT_SECCOMP), restore_wait_status(t->status()), new_tid_(-1), scratch_mem_was_mapped(false), use_singlestep_path(false), enable_mem_params_(enable_mem_params), restore_sigmask(false), need_sigpending_renable(false) { if (initial_at_seccomp) { // This should only ever happen during recording - we don't use the // seccomp traps during replay. ASSERT(t, t->session().is_recording()); } // We support two paths for syscalls: // -- a fast path using a privileged untraced syscall and PTRACE_SINGLESTEP. // This only requires a single task-wait. // -- a slower path using a privileged traced syscall and PTRACE_SYSCALL/ // PTRACE_CONT via Task::enter_syscall(). This requires 2 or 3 task-waits // depending on whether the seccomp event fires before the syscall-entry // event. // Use the slow path when running under rr, because the rr recording us // needs to see and trace these tracee syscalls, and if they're untraced by // us they're also untraced by the outer rr. // Use the slow path if SIGTRAP is blocked or ignored because otherwise // the PTRACE_SINGLESTEP will cause the kernel to unblock it. setup_path(t->vm()->has_rr_page() && !running_under_rr() && is_SIGTRAP_default_and_unblocked(t)); if (enable_mem_params == ENABLE_MEMORY_PARAMS) { maybe_fix_stack_pointer(); } if (t->status().is_syscall() && t->regs().syscall_may_restart()) { // VERY rare corner case alert: It is possible for the following sequence // of events to occur: // // 1. Thread A is in a blocking may-restart syscall and gets interrupted by a tg-targeted signal // 2. Thread B dequeues the signal // 3. Thread A is in the syscall-exit-stop with TIF_SIGPENDING set (with registers indicating syscall restart) // 4. We get here to perform an AutoRemoteSyscall // 5. During AutoRemoteSyscall, TIF_SIGPENDING gets cleared on return to userspace // 6. We finish the AutoRemoteSyscall and re-apply the registers. // 7. ... As a result, the kernel does not check whether it needs to perform the /// syscall-restart register adjustment because TIF_SIGPENDING is not set. // 8. The -ERESTART error code leaks to userspace. // // Arguably this is a kernel bug, but it's not clear how the behavior should be changed. // // To work around this, we forcibly re-enable TIF_SIGPENDING when cleaning up // AutoRemoteSyscall (see below). need_sigpending_renable = true; } if (t->session().is_recording()) { RecordTask *rt = static_cast(t); if (rt->schedule_frozen) { // If we're explicitly controlling the schedule, make sure not to accidentally run // any signals that we were not meant to be able to see. restore_sigmask = true; sigmask_to_restore = rt->get_sigmask(); sig_set_t all_blocked; memset(&all_blocked, 0xff, sizeof(all_blocked)); // Ignore the process dying here - we'll notice later. (void)rt->set_sigmask(all_blocked); } } } void AutoRemoteSyscalls::setup_path(bool enable_singlestep_path) { #if defined(__aarch64__) // XXXkhuey this fast path doesn't work on AArch64 yet, go slow instead enable_singlestep_path = false; #endif if (!replaced_bytes.empty()) { // XXX what to do here to clean up if the task died unexpectedly? t->write_mem(remote_ptr(initial_regs.ip().to_data_ptr()), replaced_bytes.data(), replaced_bytes.size()); } remote_code_ptr syscall_ip; use_singlestep_path = enable_singlestep_path; if (use_singlestep_path) { syscall_ip = AddressSpace::rr_page_syscall_entry_point( AddressSpace::UNTRACED, AddressSpace::PRIVILEGED, AddressSpace::RECORDING_AND_REPLAY, t->arch()); } else { syscall_ip = t->vm()->traced_syscall_ip(); } initial_regs.set_ip(syscall_ip); // We need to make sure to clear any breakpoints or other alterations of // the syscall instruction we're using. Note that the tracee may have set its // own breakpoints or otherwise modified the instruction, so suspending our // own breakpoint is insufficient. std::vector syscall = rr::syscall_instruction(t->arch()); bool ok = true; replaced_bytes = t->read_mem(initial_regs.ip().to_data_ptr(), syscall.size(), &ok); if (!ok) { // The task died return; } if (replaced_bytes == syscall) { replaced_bytes.clear(); } else { t->write_mem(initial_regs.ip().to_data_ptr(), syscall.data(), syscall.size(), &ok); } } static bool is_usable_area(const KernelMapping& km) { return (km.prot() & (PROT_READ | PROT_WRITE)) == (PROT_READ | PROT_WRITE) && (km.flags() & MAP_PRIVATE); } void AutoRemoteSyscalls::maybe_fix_stack_pointer() { if (!t->session().done_initial_exec()) { return; } remote_ptr last_stack_byte = t->regs().sp() - 1; if (t->vm()->has_mapping(last_stack_byte)) { auto m = t->vm()->mapping_of(last_stack_byte); if (is_usable_area(m.map) && m.map.start() + 2048 <= t->regs().sp()) { // 'sp' is in a stack region and there's plenty of space there. No need // to fix anything. return; } } MemoryRange found_stack; for (const auto& m : t->vm()->maps()) { if (is_usable_area(m.map)) { found_stack = m.map; break; } }; if (found_stack.start().is_null()) { AutoRemoteSyscalls remote(t, DISABLE_MEMORY_PARAMS); found_stack = MemoryRange(remote.infallible_mmap_syscall_if_alive( remote_ptr(), 4096, PROT_READ | PROT_WRITE, MAP_PRIVATE | MAP_ANONYMOUS, -1, 0), 4096); ASSERT(t, !found_stack.start().is_null()) << "Tracee unexpectedly died here"; scratch_mem_was_mapped = true; } fixed_sp = found_stack.end(); DEBUG_ASSERT(!fixed_sp.is_null()); initial_regs.set_sp(fixed_sp); } AutoRemoteSyscalls::~AutoRemoteSyscalls() { restore_state_to(t); } void AutoRemoteSyscalls::restore_state_to(Task* t) { // Check if the task was unexpectedly killed via SIGKILL or equivalent. bool is_exiting = !t->is_stopped() || t->ptrace_event() == PTRACE_EVENT_EXIT || t->was_reaped(); // Unmap our scatch region if required if (scratch_mem_was_mapped && !is_exiting) { AutoRemoteSyscalls remote(t, DISABLE_MEMORY_PARAMS); remote.infallible_syscall(syscall_number_for_munmap(arch()), fixed_sp - 4096, 4096); } if (!replaced_bytes.empty()) { // XXX how to clean up if the task died and the address space is shared with live task? t->write_mem(remote_ptr(initial_regs.ip().to_data_ptr()), replaced_bytes.data(), replaced_bytes.size()); } auto regs = initial_regs; regs.set_ip(initial_ip); regs.set_sp(initial_sp); if (is_exiting) { // Don't restore status; callers need to see the task is exiting. // And the other stuff we don't below won't work. // But do restore registers so it looks like the exit happened in a clean state. t->set_regs(regs); return; } if (t->arch() == aarch64 && regs.syscall_may_restart()) { // On AArch64, the kernel restarts aborted syscalls using an internal `orig_x0`. // This gets overwritten everytime we make a syscall so we need to restore it // if we are at a syscall that may restart. // The kernel `orig_x0` isn't accessible from ptrace AFAICT but fortunately // it does **NOT** get reset on syscall exit so we can actually set it's value // just by making a dummy syscall with the correct x0 value. auto restart_res = regs.syscall_result(); regs.set_ip(t->vm()->traced_syscall_ip()); // This can be any side-effect-free syscall that doesn't care about arg1. // The kernel sets its `orig_x0` no matter whether the syscall actually needs it. regs.set_syscallno(rr::ARM64Arch::getpid); regs.set_arg1(regs.orig_arg1()); t->set_regs(regs); if (t->enter_syscall(true)) { if (!t->resume_execution(RESUME_SYSCALL, RESUME_WAIT_NO_EXIT, RESUME_NO_TICKS)) { // Tracee died unexpectedly, there is nothing more we can do. // Do not restore the status, we want callers to see that the task died. return; } } regs.set_ip(initial_ip); regs.set_syscallno(regs.original_syscallno()); regs.set_syscall_result(restart_res); } // If we were sitting at a seccomp trap, try to get back there by resuming // here. Since the original register contents caused a seccomp trap, // re-running the syscall with the same registers should put us right back // to this same seccomp trap. if (initial_at_seccomp && t->ptrace_event() != PTRACE_EVENT_SECCOMP) { regs.set_ip(initial_ip.decrement_by_syscall_insn_length(t->arch())); regs.set_syscallno(regs.original_syscallno()); t->set_regs(regs); RecordTask* rt = static_cast(t); while (true) { if (!rt->resume_execution(RESUME_CONT, RESUME_WAIT_NO_EXIT, RESUME_NO_TICKS)) { // Tracee died unexpectedly, there is nothing more we can do. // Do not restore the status, we want callers to see that the task died. return; } if (rt->ptrace_event()) break; rt->stash_sig(); } ASSERT(rt, rt->ptrace_event() == PTRACE_EVENT_SECCOMP); } else { // Restore stomped registers. t->set_regs(regs); } t->set_status(restore_wait_status); if (restore_sigmask) { static_cast(t)->set_sigmask(sigmask_to_restore); } if (need_sigpending_renable) { // The purpose of this PTRACE_INTERRUPT is to re-enable TIF_SIGPENDING on // the tracee, without forcing any actual signals on it. Since PTRACE_INTERRUPT // needs to be able to interrupt re-startable system calls, it is required // to set TIF_SIGPENDING, but the fact that this works is of course a very // deep implementation detail. // If this fails then the tracee must be dead or no longer traced, in which // case we no longer care about its TIF_SIGPENDING status. t->do_ptrace_interrupt(); } } static bool ignore_signal(Task* t) { int sig = t->stop_sig(); if (!sig) { return false; } if (t->session().is_replaying()) { if (ReplaySession::is_ignored_signal(sig)) { return true; } } else if (t->session().is_recording()) { auto rt = static_cast(t); if (sig != rt->session().syscallbuf_desched_sig()) { rt->stash_sig(); } return true; } siginfo_t siginfo; errno = 0; t->fallible_ptrace(PTRACE_GETSIGINFO, nullptr, &siginfo); if (errno) { ASSERT(t, false) << "Unexpected signal " << signal_name(sig); } else { ASSERT(t, false) << "Unexpected signal " << siginfo; } return false; } long AutoRemoteSyscalls::syscall_base(int syscallno, Registers& callregs) { LOG(debug) << "syscall " << syscall_name(syscallno, t->arch()) << " " << callregs; if (t->is_exiting()) { LOG(debug) << "Task is dying, don't try anything."; ASSERT(t, t->stopped_or_unexpected_exit()) << "Already seen exit event"; return -ESRCH; } if ((int)callregs.arg1() == SIGTRAP && use_singlestep_path && (is_sigaction_syscall(syscallno, t->arch()) || is_rt_sigaction_syscall(syscallno, t->arch()) || is_signal_syscall(syscallno, t->arch()))) { // Don't use the fast path if we're about to set up a signal handler // for SIGTRAP! LOG(debug) << "Disabling singlestep path due to SIGTRAP sigaction"; setup_path(false); callregs.set_ip(initial_regs.ip()); } callregs.set_original_syscallno(syscallno); callregs.set_syscallno(syscallno); t->set_regs(callregs); bool from_seccomp = initial_at_seccomp && t->ptrace_event() == PTRACE_EVENT_SECCOMP; if (use_singlestep_path && !from_seccomp) { while (true) { if (!t->resume_execution(RESUME_SINGLESTEP, RESUME_WAIT_NO_EXIT, RESUME_NO_TICKS)) { // Tracee was killed, there is nothing more we can do. ASSERT(t, t->stopped_or_unexpected_exit()) << "Couldn't singlestep"; return -ESRCH; } LOG(debug) << "Used singlestep path; status=" << t->status(); // When a PTRACE_EVENT_EXIT is returned we don't update registers if (t->ip() != callregs.ip()) { // We entered the syscall, so stop now break; } if (t->stop_sig() == SIGTRAP && t->get_siginfo().si_code == TRAP_TRACE) { // On aarch64, if we were previously in a syscall-exit stop, continuing // with PTRACE_SINGLESTEP will result in incurring a trap upon execution // of the first instruction in userspace. Ignore such a trap. continue; } if (ignore_signal(t)) { // We were interrupted by a signal before we even entered the syscall continue; } ASSERT(t, false) << "Unexpected status " << t->status(); } } else { if (from_seccomp) { LOG(debug) << "Skipping enter_syscall - already at seccomp stop"; } else { if (!t->enter_syscall(true)) { // Tracee was killed, there is nothing more we can do. // Ensure callers see the task death status. ASSERT(t, t->stopped_or_unexpected_exit()) << "couldn't enter syscall"; return -ESRCH; } LOG(debug) << "Used enter_syscall; status=" << t->status(); } if (!t->resume_execution(RESUME_SYSCALL, RESUME_WAIT_NO_EXIT, RESUME_NO_TICKS)) { // Tracee was killed, there is nothing more we can do. // Ensure callers see the task death status. ASSERT(t, t->stopped_or_unexpected_exit()) << "couldn't resume syscall"; return -ESRCH; } LOG(debug) << "syscall exit status=" << t->status(); } while (true) { if (t->status().is_syscall() || (t->stop_sig() == SIGTRAP && is_kernel_trap(t->get_siginfo().si_code))) { // If we got a SIGTRAP then we assume that's our singlestep and we're // done. break; } if (is_clone_syscall(syscallno, t->arch()) && t->clone_syscall_is_complete(&new_tid_, t->arch())) { if (!t->resume_execution(RESUME_SYSCALL, RESUME_WAIT_NO_EXIT, RESUME_NO_TICKS)) { // Tracee was killed, there is nothing more we can do. ASSERT(t, t->stopped_or_unexpected_exit()) << "Couldn't resume clone"; return -ESRCH; } LOG(debug) << "got clone event; new status=" << t->status(); continue; } if (ignore_signal(t)) { if (t->regs().syscall_may_restart()) { if (!t->enter_syscall(true)) { // Tracee was killed, there is nothing more we can do. ASSERT(t, t->stopped_or_unexpected_exit()) << "Couldn't restart"; return -ESRCH; } LOG(debug) << "signal ignored; restarting syscall, status=" << t->status(); if (!t->resume_execution(RESUME_SYSCALL, RESUME_WAIT_NO_EXIT, RESUME_NO_TICKS)) { // Tracee was killed, there is nothing more we can do. ASSERT(t, t->stopped_or_unexpected_exit()) << "Couldn't resume restart"; return -ESRCH; } LOG(debug) << "syscall exit status=" << t->status(); continue; } LOG(debug) << "signal ignored"; // We have been notified of a signal after a non-interruptible syscall // completed. Don't continue, we're done here. break; } ASSERT(t, false) << "Unexpected status " << t->status(); break; } LOG(debug) << "done, result=" << t->regs().syscall_result(); return t->regs().syscall_result(); } SupportedArch AutoRemoteSyscalls::arch() const { return t->arch(); } template static void write_socketcall_args(Task* t, remote_ptr remote_mem, typename Arch::signed_long arg1, typename Arch::signed_long arg2, typename Arch::signed_long arg3, bool* ok) { socketcall_args sc_args = { { arg1, arg2, arg3 } }; t->write_mem(remote_mem.cast>(), sc_args, ok); } template struct fd_message { // Unfortunately we need to send at least one byte of data in our // message for it to work char data; typename Arch::iovec msgdata; char cmsgbuf[Arch::cmsg_space(sizeof(int))]; typename Arch::msghdr msg; // XXX: Could make this conditional on Arch socketcall_args socketcall; void init(remote_ptr> base) { data = 0; msgdata.iov_base = REMOTE_PTR_FIELD(base, data); msgdata.iov_len = 1; memset(&msg, 0, sizeof(msg)); msg.msg_control = REMOTE_PTR_FIELD(base, cmsgbuf); msg.msg_controllen = sizeof(cmsgbuf); msg.msg_iov = REMOTE_PTR_FIELD(base, msgdata); msg.msg_iovlen = 1; } fd_message(remote_ptr> base) { init(base); } fd_message() { init((uintptr_t)this); } remote_ptr> remote_this() { return msgdata.iov_base.rptr().as_int(); } remote_ptr remote_msg() { return REMOTE_PTR_FIELD(remote_this(), msg); } remote_ptr> remote_sc_args() { return REMOTE_PTR_FIELD(remote_this(), socketcall); } remote_ptr remote_cmsgdata() { return REMOTE_PTR_FIELD(remote_this(), cmsgbuf).as_int() + (uintptr_t)Arch::cmsg_data(NULL); } }; template static long child_sendmsg(AutoRemoteSyscalls& remote, int child_sock, int fd) { AutoRestoreMem remote_buf(remote, nullptr, sizeof(fd_message)); fd_message msg(remote_buf.get().cast>()); // Pull the puppet strings to have the child send its fd // to us. Similarly to above, we DONT_WAIT on the // call to finish, since it's likely not defined whether the // sendmsg() may block on our recvmsg()ing what the tracee // sent us (in which case we would deadlock with the tracee). // We call sendmsg on child socket, but first we have to prepare a lot of // data. auto cmsg = reinterpret_cast(msg.cmsgbuf); cmsg->cmsg_len = Arch::cmsg_len(sizeof(fd)); cmsg->cmsg_level = SOL_SOCKET; cmsg->cmsg_type = SCM_RIGHTS; *static_cast(Arch::cmsg_data(cmsg)) = fd; if (has_socketcall_syscall(Arch::arch())) { socketcall_args sc_args = { { child_sock, (typename Arch::signed_long)msg.remote_msg().as_int(), 0 } }; msg.socketcall = sc_args; } bool ok = true; remote.task()->write_bytes_helper(remote_buf.get().cast(), sizeof(msg), &msg, &ok); if (!ok) { return -ESRCH; } if (!has_socketcall_syscall(Arch::arch())) { return remote.syscall(Arch::sendmsg, child_sock, msg.remote_msg(), 0); } return remote.syscall(Arch::socketcall, SYS_SENDMSG, msg.remote_sc_args()); } template static long child_recvmsg(AutoRemoteSyscalls& remote, int child_sock) { AutoRestoreMem remote_buf(remote, nullptr, sizeof(fd_message)); fd_message msg(remote_buf.get().cast>()); bool ok = true; if (has_socketcall_syscall(Arch::arch())) { socketcall_args sc_args = { { child_sock, (typename Arch::signed_long)msg.remote_msg().as_int(), 0 } }; msg.socketcall = sc_args; } remote.task()->write_bytes_helper(remote_buf.get().cast(), sizeof(msg), &msg, &ok); if (!ok) { ASSERT(remote.task(), errno == ESRCH) << "Error writing " << remote_buf.get() << " in " << remote.task()->tid; LOG(debug) << "Failed to write memory"; return -ESRCH; } int ret = 0; if (has_socketcall_syscall(Arch::arch())) { ret = remote.syscall(Arch::socketcall, SYS_RECVMSG, msg.remote_sc_args()); } else { ret = remote.syscall(Arch::recvmsg, child_sock, msg.remote_msg(), 0); } if (ret < 0) { LOG(debug) << "Failed to recvmsg " << ret; return ret; } int their_fd = remote.task()->read_mem(msg.remote_cmsgdata(), &ok); if (!ok) { ASSERT(remote.task(), errno == ESRCH); LOG(debug) << "Failed to read msg"; return -ESRCH; } return their_fd; } #define MAX_FDS_READ 2 // Try to read a single-character message from `sock`. Will collect // up to MAX_FDS_READ fds in an SCM_RIGHTS control message and return those // fds. Returns an empty vector if reading the message fails. static vector maybe_receive_fds(ScopedFd& sock, bool blocking = true) { vector ret; struct msghdr msg; memset(&msg, 0, sizeof(msg)); char ch; struct iovec iov = { &ch, 1 }; msg.msg_iov = &iov; msg.msg_iovlen = 1; char cmsgbuf[(CMSG_SPACE(MAX_FDS_READ * sizeof(int)))]; msg.msg_control = cmsgbuf; msg.msg_controllen = sizeof(cmsgbuf); int flags = MSG_CMSG_CLOEXEC; if (!blocking) { flags |= MSG_DONTWAIT; } if (recvmsg(sock, &msg, flags) < 0) { return ret; } struct cmsghdr* cmsg = CMSG_FIRSTHDR(&msg); if (!cmsg || cmsg->cmsg_level != SOL_SOCKET || cmsg->cmsg_type != SCM_RIGHTS) { FATAL() << "Invalid cmsg"; } int num_fds = (cmsg->cmsg_len - CMSG_LEN(0))/sizeof(int); for (int i = 0; i < num_fds; i++) { int fd; memcpy(&fd, CMSG_DATA(cmsg) + i*sizeof(int), sizeof(int)); DEBUG_ASSERT(fd >= 0); ret.push_back(ScopedFd(fd)); } return ret; } static void sendmsg_socket(ScopedFd& sock, int fd_to_send) { fd_message msg; struct msghdr *msgp = (struct msghdr*)&msg.msg; struct cmsghdr* cmsg = CMSG_FIRSTHDR(msgp); cmsg->cmsg_level = SOL_SOCKET; cmsg->cmsg_type = SCM_RIGHTS; cmsg->cmsg_len = CMSG_LEN(sizeof(fd_to_send)); *(int*)CMSG_DATA(cmsg) = fd_to_send; if (0 > sendmsg(sock, msgp, 0)) { FATAL() << "Failed to send fd"; } } static Task* thread_group_leader_for_fds(Task* t) { for (Task* tt : t->fd_table()->task_set()) { if (tt->tgid() == tt->rec_tid && !tt->seen_ptrace_exit_event()) { return tt; } } return nullptr; } template ScopedFd AutoRemoteSyscalls::retrieve_fd_arch(int fd) { ScopedFd ret; if (!pid_fd.is_open()) { // Try to use pidfd_getfd to get the fd without round-tripping to the tracee. // pidfd_getfd requires a threadgroup leader, so find one if we can. Task* tg_leader_for_fds = thread_group_leader_for_fds(t); if (tg_leader_for_fds) { pid_fd = ScopedFd(::syscall(NativeArch::pidfd_open, tg_leader_for_fds->tid, 0)); ASSERT(t, pid_fd.is_open() || errno == ENOSYS) << "Error in pidfd_open errno=" << errno_name(errno); } } if (pid_fd.is_open()) { ret = ScopedFd(::syscall(NativeArch::pidfd_getfd, pid_fd.get(), fd, 0)); if (ret.is_open()) { return ret; } ASSERT(t, errno == ENOSYS) << "Failed in pidfd_getfd errno=" << errno_name(errno); } // Clear out any pending message in the socket. maybe_receive_fds(task()->session().tracee_socket_receiver_fd(), false); long child_syscall_result = child_sendmsg(*this, task()->session().tracee_fd_number(), fd); if (child_syscall_result == -ESRCH) { return ret; } ASSERT(t, child_syscall_result > 0) << "Failed to sendmsg() in tracee; err=" << errno_name(-child_syscall_result); vector fds = maybe_receive_fds(task()->session().tracee_socket_fd()); ASSERT(t, !fds.empty()) << "Failed to receive fd"; ASSERT(t, fds.size() == 1); return std::move(fds[0]); } ScopedFd AutoRemoteSyscalls::retrieve_fd(int fd) { RR_ARCH_FUNCTION(retrieve_fd_arch, arch(), fd); } template int AutoRemoteSyscalls::send_fd_arch(const ScopedFd &our_fd) { if (!our_fd.is_open()) { return -EBADF; } // Clear out any pending message from the socket. maybe_receive_fds(task()->session().tracee_socket_receiver_fd(), false); LOG(debug) << "Sending fd " << our_fd.get() << " via socket fd " << task()->session().tracee_socket_fd().get(); sendmsg_socket(task()->session().tracee_socket_fd(), our_fd.get()); long child_syscall_result = child_recvmsg(*this, task()->session().tracee_fd_number()); // If the child died before reading the message from the socket, // the message will still be in the socket buffer and will be received // the next time we try to send something to a tracee. That's why // before using tracee_socket_receiver_fd we need to drain up to one message // from it. ASSERT(t, child_syscall_result >= 0 || child_syscall_result == -ESRCH) << "Failed to recvmsg() in tracee; err=" << errno_name(-child_syscall_result); return child_syscall_result; } int AutoRemoteSyscalls::send_fd(const ScopedFd &our_fd) { RR_ARCH_FUNCTION(send_fd_arch, arch(), our_fd); } void AutoRemoteSyscalls::infallible_close_syscall_if_alive(int child_fd) { infallible_syscall_if_alive(syscall_number_for_close(arch()), child_fd); } int AutoRemoteSyscalls::infallible_send_fd_if_alive(const ScopedFd &our_fd) { int child_fd = send_fd(our_fd); ASSERT(t, child_fd >= 0 || (child_fd == -ESRCH && !t->session().is_replaying())) << "Failed to send fd; err=" << errno_name(-child_fd); return child_fd; } void AutoRemoteSyscalls::infallible_send_fd_dup(const ScopedFd& our_fd, int dup_to, int dup3_flags) { int remote_fd = infallible_send_fd_if_alive(our_fd); ASSERT(t, remote_fd >= 0); if (remote_fd != dup_to) { long ret = infallible_syscall(syscall_number_for_dup3(arch()), remote_fd, dup_to, dup3_flags); ASSERT(task(), ret == dup_to); infallible_close_syscall_if_alive(remote_fd); } } remote_ptr AutoRemoteSyscalls::infallible_mmap_syscall_if_alive( remote_ptr addr, size_t length, int prot, int flags, int child_fd, uint64_t offset_bytes) { ASSERT(t, offset_bytes % page_size() == 0) << "mmap offset (" << offset_bytes << ") must be multiple of page size (" << page_size() << ")"; // The first syscall argument is called "arg 1", so // our syscall-arg-index template parameter starts // with "1". remote_ptr ret = has_mmap2_syscall(arch()) ? infallible_syscall_ptr_if_alive(syscall_number_for_mmap2(arch()), addr, length, prot, flags, child_fd, (off_t)offset_bytes / 4096) : infallible_syscall_ptr_if_alive(syscall_number_for_mmap(arch()), addr, length, prot, flags, child_fd, offset_bytes); if (flags & MAP_FIXED) { if (ret) { ASSERT(t, addr == ret) << "MAP_FIXED at " << addr << " but got " << ret; } else { if (!t->vm()->has_mapping(addr)) { KernelMapping km = t->vm()->read_kernel_mapping(t, addr); if (km.size()) { ASSERT(t, km.start() == addr && km.size() == ceil_page_size(length)); // The mapping was created. Pretend this call succeeded. ret = addr; } } } } return ret; } bool AutoRemoteSyscalls::infallible_munmap_syscall_if_alive( remote_ptr addr, size_t length) { long ret = infallible_syscall_if_alive(syscall_number_for_munmap(arch()), addr, length); if (ret) { if (t->vm()->has_mapping(addr)) { KernelMapping km = t->vm()->read_kernel_mapping(t, addr); if (!km.size()) { // The unmap happened but the task must have died before // reporting the status. ret = 0; } } } return !ret; } int64_t AutoRemoteSyscalls::infallible_lseek_syscall(int fd, int64_t offset, int whence) { switch (arch()) { case x86: { AutoRestoreMem mem(*this, &offset, sizeof(int64_t)); infallible_syscall(syscall_number_for__llseek(arch()), fd, offset >> 32, offset, mem.get(), whence); return t->read_mem(mem.get().cast()); } case x86_64: case aarch64: return infallible_syscall(syscall_number_for_lseek(arch()), fd, offset, whence); default: ASSERT(t, false) << "Unknown arch"; return -1; } } void AutoRemoteSyscalls::check_syscall_result(long ret, int syscallno, bool allow_death) { if (word_size(t->arch()) == 4) { // Sign-extend ret because it can be a 32-bit negative errno ret = (int)ret; } if (ret == -ESRCH && allow_death && !t->session().is_replaying()) { return; } if (-4096 < ret && ret < 0) { string extra_msg; if (is_open_syscall(syscallno, arch())) { extra_msg = " opening " + t->read_c_str(t->regs().arg1()); } else if (is_openat_syscall(syscallno, arch())) { extra_msg = " opening " + t->read_c_str(t->regs().arg2()); } else if (is_mremap_syscall(syscallno, arch()) || is_mmap_syscall(syscallno, arch())) { AddressSpace::print_process_maps(t); } ASSERT(t, false) << "Syscall " << syscall_name(syscallno, arch()) << " failed with errno " << errno_name(-ret) << extra_msg << " arg1=0x" << hex << t->regs().arg1() << " arg2=0x" << t->regs().arg2() << " arg3=0x" << hex << t->regs().arg3() << " arg4=0x" << t->regs().arg4() << " arg5=0x" << hex << t->regs().arg5() << " arg6=0x" << t->regs().arg6(); } } void AutoRemoteSyscalls::finish_direct_mmap( remote_ptr rec_addr, size_t length, int prot, int flags, const string& backing_file_name, int backing_file_open_flags, off64_t backing_offset_bytes, struct stat& real_file, string& real_file_name) { int fd; LOG(debug) << "directly mmap'ing " << length << " bytes of " << backing_file_name << " at offset " << HEX(backing_offset_bytes); ASSERT(task(), !(flags & MAP_GROWSDOWN)); /* Open in the tracee the file that was mapped during * recording. */ { AutoRestoreMem child_str(*this, backing_file_name.c_str()); if (word_size(t->arch()) == 4) { backing_file_open_flags |= RR_LARGEFILE_32; } fd = infallible_syscall(syscall_number_for_openat(arch()), -1, child_str.get().as_int(), backing_file_open_flags); } /* And mmap that file. */ infallible_mmap_syscall_if_alive(rec_addr, length, /* (We let SHARED|WRITEABLE * mappings go through while * they're not handled properly, * but we shouldn't do that.) */ prot, (flags & ~MAP_SYNC) | MAP_FIXED, fd, /* MAP_SYNC is used to request direct mapping * (DAX) from the filesystem for persistent * memory devices (requires * MAP_SHARED_VALIDATE). Drop it for the * backing file. */ backing_offset_bytes); // While it's open, grab the link reference. real_file = task()->stat_fd(fd); real_file_name = task()->file_name_of_fd(fd); /* Don't leak the tmp fd. The mmap doesn't need the fd to * stay open. */ infallible_close_syscall_if_alive(fd); } } // namespace rr rr-5.7.0/src/AutoRemoteSyscalls.h000066400000000000000000000262321450675474200167330ustar00rootroot00000000000000/* -*- Mode: C++; tab-width: 8; c-basic-offset: 2; indent-tabs-mode: nil; -*- */ #ifndef RR_AUTO_REMOTE_SYSCALLS_H_ #define RR_AUTO_REMOTE_SYSCALLS_H_ #include #include #include "Registers.h" #include "ScopedFd.h" #include "Task.h" namespace rr { class AutoRemoteSyscalls; class Task; /** * Helpers to make remote syscalls on behalf of a Task. Usage looks * like * * AutoRemoteSyscalls remote(t); // prepare remote syscalls * remote.syscall(syscall_number_for_open(remote.arch()), ...); // make *syscalls * ... * // when |remote| goes out of scope, remote syscalls are finished */ /** * Cookie used to restore stomped memory, usually prepared as the * argument to a remote syscall. */ class AutoRestoreMem { public: /** * Write |mem| into address space of the Task prepared for * remote syscalls in |remote|, in such a way that the write * will be undone. The address of the reserved mem space is * available via |get|. * If |mem| is null, data is not written, only the space is reserved. */ AutoRestoreMem(AutoRemoteSyscalls& remote, const void* mem, ssize_t num_bytes) : remote(remote) { init(mem, num_bytes); } /** * Convenience constructor for pushing a C string |str|, including * the trailing '\0' byte. */ AutoRestoreMem(AutoRemoteSyscalls& remote, const char* str) : remote(remote) { init((const uint8_t*)str, strlen(str) + 1 /*null byte*/); } ~AutoRestoreMem(); /** * Get a pointer to the reserved memory. * Returns null if we failed. */ remote_ptr get() const { return addr; } /** * Return size of reserved memory buffer. */ size_t size() const { return data.size(); } private: void init(const void* mem, ssize_t num_bytes); AutoRemoteSyscalls& remote; /* Address of tmp mem. */ remote_ptr addr; /* Saved data. */ std::vector data; /* (We keep this around for error checking.) */ remote_ptr saved_sp; /* Length of tmp mem. */ size_t len; AutoRestoreMem& operator=(const AutoRestoreMem&) = delete; AutoRestoreMem(const AutoRestoreMem&) = delete; void* operator new(size_t) = delete; void operator delete(void*) = delete; }; /** * RAII helper to prepare a Task for remote syscalls and undo any * preparation upon going out of scope. Note that this restores register * values when going out of scope, so *all* changes to Task's register * state are lost. */ class AutoRemoteSyscalls { public: enum MemParamsEnabled { ENABLE_MEMORY_PARAMS, DISABLE_MEMORY_PARAMS }; /** * Prepare |t| for a series of remote syscalls. * * NBBB! Before preparing for a series of remote syscalls, * the caller *must* ensure the callee will not receive any * signals. This code does not attempt to deal with signals. */ AutoRemoteSyscalls(Task* t, MemParamsEnabled enable_mem_params = ENABLE_MEMORY_PARAMS); /** * Undo in |t| any preparations that were made for a series of * remote syscalls. */ ~AutoRemoteSyscalls(); /** * If t's stack pointer doesn't look valid, temporarily adjust it to * the top of *some* stack area. */ void maybe_fix_stack_pointer(); /** * "Initial" registers saved from the target task. * * NB: a non-const reference is returned because some power * users want to update the registers that are restored after * finishing remote syscalls. Perhaps these users should be * fixed, or you should just be careful. */ Registers& regs() { return initial_regs; } /** * Undo any preparations to make remote syscalls in the context of |t|. * * This is usually called automatically by the destructor; * don't call it directly unless you really know what you'd * doing. *ESPECIALLY* don't call this on a |t| other than * the one passed to the constructor, unless you really know * what you're doing. */ void restore_state_to(Task* t); /** * Make |syscallno| with variadic |args| (limited to 6 on * x86). Return the raw kernel return value. * Returns -ESRCH if the process dies or has died. */ template long syscall(int syscallno, Rest... args) { Registers callregs = regs(); // The first syscall argument is called "arg 1", so // our syscall-arg-index template parameter starts // with "1". return syscall_helper<1>(syscallno, callregs, args...); } // Aborts on all errors. // DEPRECATED. Use infallible_syscall_if_alive instead. template long infallible_syscall(int syscallno, Rest... args) { Registers callregs = regs(); // The first syscall argument is called "arg 1", so // our syscall-arg-index template parameter starts // with "1". long ret = syscall_helper<1>(syscallno, callregs, args...); check_syscall_result(ret, syscallno, false); return ret; } // Aborts on all errors other than -ESRCH. Aborts on -ESRCH // if this is a replay task (they should never unexpectedly die). template long infallible_syscall_if_alive(int syscallno, Rest... args) { Registers callregs = regs(); // The first syscall argument is called "arg 1", so // our syscall-arg-index template parameter starts // with "1". long ret = syscall_helper<1>(syscallno, callregs, args...); check_syscall_result(ret, syscallno); return ret; } /** Returns null if the tracee is dead */ template remote_ptr infallible_syscall_ptr_if_alive(int syscallno, Rest... args) { Registers callregs = regs(); long ret = syscall_helper<1>(syscallno, callregs, args...); check_syscall_result(ret, syscallno); return ret == -ESRCH ? 0 : ret; } /** * Remote mmap syscalls are common and non-trivial due to the need to * select either mmap2 or mmap. * Returns null if the process died (or was already dead) without * creating the map. "Dead" includes reaching PTRACE_EVENT_EXIT. * If the mapping is FIXED and no mapping currently exists at the address, * and the syscall creates the mapping but the process dies before returning * success, we fix the result to indicate that the syscall succeeded. * Creating FIXED mappings in areas free according to AddressSpace is the only * reliable way to keep AddressSpace in sync with reality in the event of * unexpected process death racing with this operation. */ remote_ptr infallible_mmap_syscall_if_alive(remote_ptr addr, size_t length, int prot, int flags, int child_fd, uint64_t offset_bytes); /** * Returns false if the process died (or was already dead) without * unmapping the area. "Dead" includes reaching PTRACE_EVENT_EXIT. * If a mapping currently exists at the address, and the syscall unmaps * the mapping but the process dies before returning success, we fix * the result to indicate that the syscall succeeded. */ bool infallible_munmap_syscall_if_alive(remote_ptr addr, size_t length); /** TODO replace with infallible_lseek_syscall_if_alive */ int64_t infallible_lseek_syscall(int fd, int64_t offset, int whence); /** Close the fd in the child. If the child died, just ignore that. */ void infallible_close_syscall_if_alive(int child_fd); /** The Task in the context of which we're making syscalls. */ Task* task() const { return t; } /** * A small helper to get at the Task's arch. * Out-of-line to avoid including Task.h here. */ SupportedArch arch() const; /** * Arranges for 'fd' to be transmitted to this process and returns * our opened version of it. * Returns a closed fd if the process dies or has died. */ ScopedFd retrieve_fd(int fd); /** * Arranges for 'fd' to be transmitted to the tracee and returns * a file descriptor in the tracee that corresponds to the same file * description. * Returns a negative value if this fails. */ int send_fd(const ScopedFd &fd); /** * Arranges for 'fd' to be transmitted to the tracee and returns * a file descriptor in the tracee that corresponds to the same file * description. * Aborts if that fails. * Returns -ESRCH if the tracee is dead (and is not replaying) */ int infallible_send_fd_if_alive(const ScopedFd& our_fd); /** * `send_fd` the given file descriptor, making sure that it ends up as fd * `dup_to`, (dup'ing it there and closing the original if necessary) * TODO replace with infallible_send_fd_dup_if_alive */ void infallible_send_fd_dup(const ScopedFd& our_fd, int dup_to, int dup3_flags); /** * Remotely invoke in |t| the specified syscall with the given * arguments. The arguments must of course be valid in |t|, * and no checking of that is done by this function. * * The syscall is finished in |t| and the result is returned. */ long syscall_base(int syscallno, Registers& callregs); MemParamsEnabled enable_mem_params() { return enable_mem_params_; } /** * When the syscall is 'clone', this will be recovered from the * PTRACE_EVENT_FORK/VFORK/CLONE. */ pid_t new_tid() { return new_tid_; } /* Do the open/mmap/close dance for a particular file */ void finish_direct_mmap(remote_ptr rec_addr, size_t length, int prot, int flags, const std::string& backing_file_name, int backing_file_open_flags, off64_t backing_offset_bytes, struct stat& real_file, std::string& real_file_name); // Calling this with allow_death false is DEPRECATED. void check_syscall_result(long ret, int syscallno, bool allow_death = true); private: void setup_path(bool enable_singlestep_path); /** * "Recursively" build the set of syscall registers in * |callregs|. |Index| is the syscall arg that will be set to * |arg|, and |args| are the remaining arguments. */ template long syscall_helper(int syscallno, Registers& callregs, T arg, Rest... args) { callregs.set_arg(arg); return syscall_helper(syscallno, callregs, args...); } /** * "Recursion" "base case": no more arguments to build, so * just make the syscall and return the kernel return value. */ template long syscall_helper(int syscallno, Registers& callregs) { return syscall_base(syscallno, callregs); } template ScopedFd retrieve_fd_arch(int fd); template int send_fd_arch(const ScopedFd &fd); Task* t; Registers initial_regs; remote_code_ptr initial_ip; remote_ptr initial_sp; bool initial_at_seccomp; remote_ptr fixed_sp; std::vector replaced_bytes; WaitStatus restore_wait_status; ScopedFd pid_fd; pid_t new_tid_; /* Whether we had to mmap a scratch region because none was found */ bool scratch_mem_was_mapped; bool use_singlestep_path; MemParamsEnabled enable_mem_params_; bool restore_sigmask; sig_set_t sigmask_to_restore; bool need_sigpending_renable; AutoRemoteSyscalls& operator=(const AutoRemoteSyscalls&) = delete; AutoRemoteSyscalls(const AutoRemoteSyscalls&) = delete; }; } // namespace rr #endif // RR_AUTO_REMOTE_SYSCALLS_H_ rr-5.7.0/src/BpfMapMonitor.h000066400000000000000000000012711450675474200156420ustar00rootroot00000000000000/* -*- Mode: C++; tab-width: 8; c-basic-offset: 2; indent-tabs-mode: nil; -*- */ #ifndef RR_BPF_MAP_MONITOR_H_ #define RR_BPF_MAP_MONITOR_H_ #include "FileMonitor.h" namespace rr { /** * A FileMonitor attached to BPF map fds to record their key and value size. */ class BpfMapMonitor : public FileMonitor { public: BpfMapMonitor(uint64_t key_size, uint64_t value_size) : key_size_(key_size), value_size_(value_size) {} virtual Type type() override { return BpfMap; } uint64_t key_size() const { return key_size_; } uint64_t value_size() const { return value_size_; } private: uint64_t key_size_; uint64_t value_size_; }; } // namespace rr #endif /* RR_BPF_MAP_MONITOR_H_ */ rr-5.7.0/src/BreakpointCondition.h000066400000000000000000000005461450675474200170760ustar00rootroot00000000000000/* -*- Mode: C++; tab-width: 8; c-basic-offset: 2; indent-tabs-mode: nil; -*- */ #ifndef RR_BREAKPOINT_CONDITION_H_ #define RR_BREAKPOINT_CONDITION_H_ namespace rr { class Task; class BreakpointCondition { public: virtual ~BreakpointCondition() {} virtual bool evaluate(Task* t) const = 0; }; } // namespace rr #endif // RR_BREAKPOINT_CONDITION_H_ rr-5.7.0/src/BuildidCommand.cc000066400000000000000000000022761450675474200161440ustar00rootroot00000000000000/* -*- Mode: C++; tab-width: 8; c-basic-offset: 2; indent-tabs-mode: nil; -*- */ #include #include "log.h" #include "Command.h" #include "ElfReader.h" #include "ScopedFd.h" using namespace std; namespace rr { class BuildidCommand : public Command { public: virtual int run(vector& args) override; protected: BuildidCommand(const char* name, const char* help) : Command(name, help) {} static BuildidCommand singleton; }; BuildidCommand BuildidCommand::singleton( "buildid", " rr buildid\n" " Accepts paths on stdin, prints buildids on stdout. Will terminate when\n" " either an empty line or an invalid path is provided.\n"); int BuildidCommand::run(vector& args) { if (!args.empty()) { fprintf(stderr, "Unexpected arguments!"); return 1; } string input; while (getline(cin, input)) { if (input.empty()) { break; } ScopedFd fd = ScopedFd(input.c_str(), O_RDONLY, 0); if (!fd.is_open()) { LOG(error) << "Failed to open `" << input << "`"; return 1; } ElfFileReader reader(fd); auto buildid = reader.read_buildid(); fprintf(stdout, "%s\n", buildid.c_str()); } return 0; } } // namespace rr rr-5.7.0/src/CPUFeaturesCommand.cc000066400000000000000000000027131450675474200167120ustar00rootroot00000000000000/* -*- Mode: C++; tab-width: 8; c-basic-offset: 2; indent-tabs-mode: nil; -*- */ #include "Command.h" #include "GdbServer.h" #include "main.h" #include "util.h" using namespace std; namespace rr { class CPUFeaturesCommand : public Command { public: virtual int run(vector& args) override; protected: CPUFeaturesCommand(const char* name, const char* help) : Command(name, help) {} static CPUFeaturesCommand singleton; }; CPUFeaturesCommand CPUFeaturesCommand::singleton( "cpufeatures", " rr cpufeatures\n" " Print `rr record` command line options that will limit the tracee\n" " to CPU features this machine supports.\n" " Useful for trace portability: run `rr cpufeatures` on the machine\n" " you plan to replay on, then add those command-line parameters to\n" " `rr record` on the recording machine.\n"); int CPUFeaturesCommand::run(vector& args) { while (parse_global_option(args)) { } CPUIDData features = cpuid(CPUID_GETFEATURES, 0); CPUIDData extended_features = cpuid(CPUID_GETEXTENDEDFEATURES, 0); CPUIDData features_xsave = cpuid(CPUID_GETXSAVE, 1); fprintf(stdout, "--disable-cpuid-features 0x%x,0x%x " "--disable-cpuid-features-ext 0x%x,0x%x,0x%x " "--disable-cpuid-features-xsave 0x%x\n", ~features.ecx, ~features.edx, ~extended_features.ebx, ~extended_features.ecx, ~extended_features.edx, ~features_xsave.eax); return 0; } } // namespace rr rr-5.7.0/src/CPUIDBugDetector.cc000066400000000000000000000050061450675474200162570ustar00rootroot00000000000000/* -*- Mode: C++; tab-width: 8; c-basic-offset: 2; indent-tabs-mode: nil; -*- */ #include "CPUIDBugDetector.h" #include "Event.h" #include "Flags.h" #include "ReplaySession.h" #include "ReplayTask.h" #include "kernel_abi.h" using namespace std; namespace rr { #if defined(__i386__) || defined(__x86_64__) extern "C" int cpuid_loop(int iterations); void CPUIDBugDetector::run_detection_code() { // Call cpuid_loop to generate trace data we can use to detect // the cpuid rcb undercount bug. This generates 4 geteuid // calls which should have 2 rcbs between each of the // 3 consecutive pairs. cpuid_loop(4); } #else // Other platforms don't have cpuid, but keep the calling code clean, by // just making this a no-op there. void CPUIDBugDetector::run_detection_code() {} #endif static bool rcb_counts_ok(ReplayTask* t, uint64_t prev, uint64_t current) { uint32_t expected_count = 2 + PerfCounters::ticks_for_direct_call(t); if (current - prev == expected_count) { return true; } if (!Flags::get().suppress_environment_warnings) { fprintf( stderr, "\n" "rr: Warning: You appear to be running in a VMWare guest with a bug\n" " where a conditional branch instruction between two CPUID " "instructions\n" " sometimes fails to be counted by the conditional branch " "performance\n" " counter. Work around this problem by adding\n" " monitor_control.disable_hvsim_clusters = true\n" " to your .vmx file.\n" "\n"); } return false; } void CPUIDBugDetector::notify_reached_syscall_during_replay(ReplayTask* t) { // We only care about events that happen before the first exec, // when our detection code runs. if (!is_x86ish(t->arch())) { return; } if (t->session().done_initial_exec()) { return; } const Event& ev = t->current_trace_frame().event(); if (!is_geteuid32_syscall(ev.Syscall().number, t->arch()) && !is_geteuid_syscall(ev.Syscall().number, t->arch())) { return; } uint64_t trace_rcb_count = t->current_trace_frame().ticks(); uint64_t actual_rcb_count = t->tick_count(); if (trace_rcb_count_at_last_geteuid32 > 0 && !detected_cpuid_bug) { if (!rcb_counts_ok(t, trace_rcb_count_at_last_geteuid32, trace_rcb_count) || !rcb_counts_ok(t, actual_rcb_count_at_last_geteuid32, actual_rcb_count)) { detected_cpuid_bug = true; } } trace_rcb_count_at_last_geteuid32 = trace_rcb_count; actual_rcb_count_at_last_geteuid32 = actual_rcb_count; } } // namespace rr rr-5.7.0/src/CPUIDBugDetector.h000066400000000000000000000025231450675474200161220ustar00rootroot00000000000000/* -*- Mode: C++; tab-width: 8; c-basic-offset: 2; indent-tabs-mode: nil; -*- */ #ifndef RR_CPUID_BUG_DETECTOR_H_ #define RR_CPUID_BUG_DETECTOR_H_ #include namespace rr { class ReplayTask; /** * Helper to detect when the "CPUID can cause rcbs to be lost" bug is present. * See http://robert.ocallahan.org/2014/09/vmware-cpuid-conditional-branch.html * * This bug is caused by VMM optimizations described in * https://www.usenix.org/system/files/conference/atc12/atc12-final158.pdf * that cause instruction sequences related to CPUID to be optimized, * eliminating the user-space execution of a conditional branch between two * CPUID instructions (in some circumstances). */ class CPUIDBugDetector { public: CPUIDBugDetector() : trace_rcb_count_at_last_geteuid32(0), actual_rcb_count_at_last_geteuid32(0), detected_cpuid_bug(false) {} /** * Call this in the context of the first spawned process to run the * code that triggers the bug. */ static void run_detection_code(); /** * Call this when task t enters a traced syscall during replay. */ void notify_reached_syscall_during_replay(ReplayTask* t); private: uint64_t trace_rcb_count_at_last_geteuid32; uint64_t actual_rcb_count_at_last_geteuid32; bool detected_cpuid_bug; }; } // namespace rr #endif /* RR_CPUID_BUG_DETECTOR_H_ */ rr-5.7.0/src/Command.cc000066400000000000000000000116311450675474200146420ustar00rootroot00000000000000/* -*- Mode: C++; tab-width: 8; c-basic-offset: 2; indent-tabs-mode: nil; -*- */ #define _BSD_SOURCE #include "Command.h" #include #include #include #include "TraceStream.h" #include "core.h" #include "main.h" using namespace std; namespace rr { bool ParsedOption::verify_valid_int(int64_t min, int64_t max) const { if (int_value < min || int_value > max) { fprintf( stderr, "Value %s for parameter %s was not valid (allowed range %lld-%lld)\n", value.c_str(), arg.c_str(), (long long)min, (long long)max); return false; } return true; } static vector* command_list; Command::Command(const char* name, const char* help) : name(name), help(help) { if (!command_list) { command_list = new vector(); } command_list->push_back(this); } Command* Command::command_for_name(const std::string& name) { for (auto& it : *command_list) { if (strcmp(it->name, name.c_str()) == 0) { return it; } } return nullptr; } bool Command::less_than_by_name(Command* c1, Command* c2) { return strcmp(c1->name, c2->name) < 0; } void Command::print_help_all(FILE* out) { vector cmds; for (auto& it : *command_list) { if (!it->help) { continue; } cmds.push_back(it); } sort(cmds.begin(), cmds.end(), less_than_by_name); for (auto& it : cmds) { const char* c = strchr(it->help, '\n'); if (c) { fprintf(out, "%.*s\n", (int)(c - it->help), it->help); } else { fputs(it->help, out); } } } void Command::print_help(FILE* out) { if (help) { fputs(help, out); print_global_options(out); } else { print_usage(out); } } static bool consume_args(std::vector& args, size_t count) { args.erase(args.begin(), args.begin() + count); return true; } static void assign_param(ParsedOption* opt, const char* s) { opt->value = s; opt->int_value = INT64_MIN; if (!opt->value.empty()) { char* end; int64_t v = strtoll(s, &end, 0); if (*end == 0) { opt->int_value = v; } } } bool Command::parse_option(std::vector& args, const OptionSpec* option_specs, size_t count, ParsedOption* out) { if (args.size() == 0 || args[0][0] != '-') { return false; } out->arg = args[0]; for (size_t i = 0; i < count; ++i) { if (args[0][1] == option_specs[i].short_name && args[0][1] >= 32) { out->short_name = option_specs[i].short_name; switch (option_specs[i].param) { case NO_PARAMETER: if (args[0][2] == 0) { return consume_args(args, 1); } return false; case HAS_PARAMETER: if (args[0][2] == '=') { assign_param(out, args[0].c_str() + 3); return consume_args(args, 1); } if (args[0][2] != 0) { assign_param(out, args[0].c_str() + 2); return consume_args(args, 1); } if (args.size() >= 2) { assign_param(out, args[1].c_str()); return consume_args(args, 2); } return false; default: DEBUG_ASSERT(0 && "Unknown parameter type"); } } else if (args[0][1] == '-') { size_t equals = args[0].find('='); size_t cmp_len = (equals == string::npos ? 9999 : equals) - 2; if (cmp_len >= strlen(option_specs[i].long_name) && strncmp(args[0].c_str() + 2, option_specs[i].long_name, cmp_len) == 0) { out->short_name = option_specs[i].short_name; switch (option_specs[i].param) { case NO_PARAMETER: return consume_args(args, 1); case HAS_PARAMETER: if (equals == string::npos) { if (args.size() >= 2) { assign_param(out, args[1].c_str()); return consume_args(args, 2); } return false; } assign_param(out, args[0].c_str() + equals + 1); return consume_args(args, 1); default: DEBUG_ASSERT(0 && "Unknown parameter type"); } } } } return false; } bool Command::verify_not_option(std::vector& args) { if (args.size() > 0 && args[0][0] == '-') { if (args[0].length() == 2 && args[0][1] == '-') { args.erase(args.begin()); return true; } fprintf(stderr, "Invalid option %s\n", args[0].c_str()); return false; } return true; } bool Command::parse_optional_trace_dir(vector& args, string* out) { if (!verify_not_option(args)) { return false; } if (args.size() > 0) { *out = args[0]; args.erase(args.begin()); } else { *out = string(); } return true; } bool Command::parse_literal(std::vector& args, const char* lit) { if (args.size() > 0 && args[0] == lit) { args.erase(args.begin()); return true; } else { return false; } } } // namespace rr rr-5.7.0/src/Command.h000066400000000000000000000037401450675474200145060ustar00rootroot00000000000000/* -*- Mode: C++; tab-width: 8; c-basic-offset: 2; indent-tabs-mode: nil; -*- */ #ifndef RR_COMMAND_H_ #define RR_COMMAND_H_ #ifndef _DEFAULT_SOURCE #define _DEFAULT_SOURCE 1 #endif #include #include #include #include #include namespace rr { class TraceReader; enum OptionParameters { NO_PARAMETER, HAS_PARAMETER }; struct OptionSpec { char short_name; const char* long_name; OptionParameters param; }; struct ParsedOption { char short_name; std::string arg; std::string value; int64_t int_value; bool verify_valid_int(int64_t min = INT64_MIN + 1, int64_t max = INT64_MAX) const; }; /** * rr command-line commands. Objects of this class must be static, since * they are expected to be immortal. */ class Command { public: static Command* command_for_name(const std::string& name); static void print_help_all(FILE* out); /* Runs the command with the given parameters. Returns an exit code. */ virtual int run(std::vector& args) = 0; void print_help(FILE* out); static bool verify_not_option(std::vector& args); static bool parse_optional_trace_dir(std::vector& args, std::string* out); static bool parse_option(std::vector& args, const OptionSpec* option_specs, size_t count, ParsedOption* out); template static bool parse_option(std::vector& args, const OptionSpec (&option_specs)[N], ParsedOption* out) { return parse_option(args, option_specs, N, out); } static bool parse_literal(std::vector& args, const char* lit); protected: Command(const char* name, const char* help); virtual ~Command() {} static bool less_than_by_name(Command* c1, Command* c2); const char* name; const char* help; }; } // namespace rr #endif // RR_COMMAND_H_ rr-5.7.0/src/CompressedReader.cc000066400000000000000000000130101450675474200165040ustar00rootroot00000000000000/* -*- Mode: C++; tab-width: 8; c-basic-offset: 2; indent-tabs-mode: nil; -*- */ #define _LARGEFILE64_SOURCE #include "CompressedReader.h" #include #include #include #include #include #include #include #include "CompressedWriter.h" #include "core.h" #include "util.h" using namespace std; namespace rr { CompressedReader::CompressedReader(const string& filename) : fd(new ScopedFd(filename.c_str(), O_CLOEXEC | O_RDONLY | O_LARGEFILE)) { fd_offset = 0; error = !fd->is_open(); if (error) { eof = false; } else { char ch; eof = pread(*fd, &ch, 1, fd_offset) == 0; } buffer_read_pos = 0; buffer_skip_bytes = 0; have_saved_state = false; } CompressedReader::CompressedReader(const CompressedReader& other) { fd = other.fd; fd_offset = other.fd_offset; error = other.error; eof = other.eof; buffer_read_pos = other.buffer_read_pos; buffer_skip_bytes = other.buffer_skip_bytes; buffer = other.buffer; have_saved_state = false; DEBUG_ASSERT(!other.have_saved_state); } CompressedReader::~CompressedReader() { close(); } static bool read_all(const ScopedFd& fd, size_t size, void* data, uint64_t* offset) { ssize_t ret = read_to_end(fd, *offset, data, size); if (ret == (ssize_t)size) { *offset += size; return true; } return false; } static bool do_decompress(std::vector& compressed, std::vector& uncompressed) { size_t out_size = uncompressed.size(); return BrotliDecoderDecompress(compressed.size(), compressed.data(), &out_size, uncompressed.data()) == BROTLI_DECODER_RESULT_SUCCESS && out_size == uncompressed.size(); } bool CompressedReader::get_buffer(const uint8_t** data, size_t* size) { process_skip(); if (error) { return false; } if (buffer_read_pos >= buffer.size() && !eof) { if (!refill_buffer()) { return false; } DEBUG_ASSERT(buffer_read_pos < buffer.size()); } *data = &buffer[buffer_read_pos]; *size = buffer.size() - buffer_read_pos; return true; } void CompressedReader::process_skip() { while (buffer_skip_bytes > 0 && !error) { if (buffer_read_pos < buffer.size()) { size_t amount = std::min(buffer_skip_bytes, buffer.size() - buffer_read_pos); buffer_skip_bytes -= amount; buffer_read_pos += amount; continue; } if (!refill_buffer(&buffer_skip_bytes)) { return; } } } bool CompressedReader::read(void* data, size_t size) { process_skip(); while (size > 0) { if (error) { return false; } if (buffer_read_pos < buffer.size()) { size_t amount = std::min(size, buffer.size() - buffer_read_pos); memcpy(data, &buffer[buffer_read_pos], amount); size -= amount; data = static_cast(data) + amount; buffer_read_pos += amount; continue; } if (!refill_buffer()) { return false; } } return true; } bool CompressedReader::refill_buffer(size_t* skip_bytes) { if (have_saved_state && !have_saved_buffer) { std::swap(buffer, saved_buffer); have_saved_buffer = true; } while (true) { CompressedWriter::BlockHeader header; if (!read_all(*fd, sizeof(header), &header, &fd_offset)) { error = true; return false; } if (skip_bytes && *skip_bytes >= header.uncompressed_length) { fd_offset += header.compressed_length; *skip_bytes -= header.uncompressed_length; char ch; if (pread(*fd, &ch, 1, fd_offset) == 0) { eof = true; return false; } continue; } std::vector compressed_buf; compressed_buf.resize(header.compressed_length); if (!read_all(*fd, compressed_buf.size(), &compressed_buf[0], &fd_offset)) { error = true; return false; } char ch; if (pread(*fd, &ch, 1, fd_offset) == 0) { eof = true; } buffer.resize(header.uncompressed_length); buffer_read_pos = 0; if (!do_decompress(compressed_buf, buffer)) { error = true; return false; } return true; } } void CompressedReader::rewind() { DEBUG_ASSERT(!have_saved_state); fd_offset = 0; buffer_read_pos = 0; buffer_skip_bytes = 0; buffer.clear(); eof = false; } void CompressedReader::close() { fd = nullptr; } void CompressedReader::save_state() { DEBUG_ASSERT(!have_saved_state); process_skip(); have_saved_state = true; have_saved_buffer = false; saved_fd_offset = fd_offset; saved_buffer_read_pos = buffer_read_pos; } void CompressedReader::restore_state() { DEBUG_ASSERT(have_saved_state); have_saved_state = false; if (saved_fd_offset < fd_offset) { eof = false; } fd_offset = saved_fd_offset; if (have_saved_buffer) { std::swap(buffer, saved_buffer); saved_buffer.clear(); } buffer_read_pos = saved_buffer_read_pos; buffer_skip_bytes = 0; } void CompressedReader::discard_state() { DEBUG_ASSERT(have_saved_state); have_saved_state = false; if (have_saved_buffer) { saved_buffer.clear(); } } uint64_t CompressedReader::uncompressed_bytes() const { uint64_t offset = 0; uint64_t uncompressed_bytes = 0; CompressedWriter::BlockHeader header; while (read_all(*fd, sizeof(header), &header, &offset)) { uncompressed_bytes += header.uncompressed_length; offset += header.compressed_length; } return uncompressed_bytes; } uint64_t CompressedReader::compressed_bytes() const { return lseek(*fd, 0, SEEK_END); } } // namespace rr rr-5.7.0/src/CompressedReader.h000066400000000000000000000043711450675474200163600ustar00rootroot00000000000000/* -*- Mode: C++; tab-width: 8; c-basic-offset: 2; indent-tabs-mode: nil; -*- */ #ifndef RR_COMPRESSED_READER_H_ #define RR_COMPRESSED_READER_H_ #include #include #include #include #include #include "ScopedFd.h" namespace rr { /** * CompressedReader opens an input file written by CompressedWriter * and reads data from it. Currently data is decompressed by the thread that * calls read(). */ class CompressedReader { public: CompressedReader(const std::string& filename); CompressedReader(const CompressedReader& aOther); ~CompressedReader(); bool good() const { return !error; } bool at_end() const { const_cast(this)->process_skip(); return eof && buffer_read_pos == buffer.size(); } // Returns true if successful. Otherwise there's an error and good() // will be false. bool read(void* data, size_t size); // Returns pointer/size of some buffered data. Does not change the state. // Returns zero size if at EOF. bool get_buffer(const uint8_t** data, size_t* size); // Advances the read position by the given size. void skip(size_t size) { buffer_skip_bytes += size; } void rewind(); void close(); /** * Save the current position. Nested saves are not allowed. */ void save_state(); /** * Restore previously saved position. */ void restore_state(); /** * Discard saved position */ void discard_state(); /** * Gathers stats on the file stream. These are independent of what's * actually been read. */ uint64_t uncompressed_bytes() const; uint64_t compressed_bytes() const; protected: void process_skip(); bool refill_buffer(size_t* skip_bytes = nullptr); /* Our fd might be the dup of another fd, so we can't rely on its current file position. Instead track the current position in fd_offset and use pread. */ uint64_t fd_offset; std::shared_ptr fd; bool error; bool eof; std::vector buffer; // within `buffer` size_t buffer_read_pos; size_t buffer_skip_bytes; bool have_saved_state; bool have_saved_buffer; uint64_t saved_fd_offset; std::vector saved_buffer; size_t saved_buffer_read_pos; }; } // namespace rr #endif /* RR_COMPRESSED_READER_H_ */ rr-5.7.0/src/CompressedWriter.cc000066400000000000000000000202461450675474200165670ustar00rootroot00000000000000/* -*- Mode: C++; tab-width: 8; c-basic-offset: 2; indent-tabs-mode: nil; -*- */ #define _LARGEFILE64_SOURCE #include "CompressedWriter.h" #include #include #include #include #include #include #include #include #include "core.h" #include "util.h" using namespace std; namespace rr { /* See * http://robert.ocallahan.org/2017/07/selecting-compression-algorithm-for-rr.html */ static const int BROTLI_LEVEL = 5; void* CompressedWriter::compression_thread_callback(void* p) { static_cast(p)->compression_thread(); return nullptr; } CompressedWriter::CompressedWriter(const string& filename, size_t block_size, uint32_t num_threads) : fd(filename.c_str(), O_CLOEXEC | O_WRONLY | O_CREAT | O_EXCL | O_LARGEFILE, 0400) { this->block_size = block_size; threads.resize(num_threads); thread_pos.resize(num_threads); buffer.resize(block_size * (num_threads + 2)); pthread_mutex_init(&mutex, nullptr); pthread_cond_init(&cond, nullptr); for (uint32_t i = 0; i < num_threads; ++i) { thread_pos[i] = UINT64_MAX; } next_thread_pos = 0; next_thread_end_pos = 0; closing = false; write_error = false; producer_reserved_pos = 0; producer_reserved_write_pos = 0; producer_reserved_upto_pos = 0; error = false; if (fd < 0) { error = true; return; } // Make sure the compression threads block all signals sigset_t set; sigset_t old_mask; sigfillset(&set); sigprocmask(SIG_BLOCK, &set, &old_mask); // Hold the lock so threads don't inspect the 'threads' array // until we've finished initializing it. pthread_mutex_lock(&mutex); for (uint32_t i = 0; i < num_threads; ++i) { while (true) { int err = pthread_create(&threads[i], nullptr, compression_thread_callback, this); if (err == EAGAIN) { sched_yield(); // Give other processes a chance to exit. continue; } else if (err != 0) { SAFE_FATAL(err, "Failed to create compression threads!"); } break; } size_t last_slash = filename.rfind('/'); string thread_name = string("compress ") + (last_slash == string::npos ? filename : filename.substr(last_slash + 1)); pthread_setname_np(threads[i], thread_name.substr(0, 15).c_str()); } pthread_mutex_unlock(&mutex); sigprocmask(SIG_SETMASK, &old_mask, nullptr); } CompressedWriter::~CompressedWriter() { close(); pthread_mutex_destroy(&mutex); pthread_cond_destroy(&cond); } void CompressedWriter::write(const void* data, size_t size) { while (!error && size > 0) { uint64_t reservation_size = producer_reserved_upto_pos - producer_reserved_write_pos; if (reservation_size == 0) { update_reservation(WAIT); continue; } size_t buf_offset = (size_t)(producer_reserved_write_pos % buffer.size()); size_t amount = min(buffer.size() - buf_offset, (size_t)min(reservation_size, size)); memcpy(&buffer[buf_offset], data, amount); producer_reserved_write_pos += amount; data = static_cast(data) + amount; size -= amount; } if (!error && producer_reserved_write_pos - producer_reserved_pos >= buffer.size() / 2) { update_reservation(NOWAIT); } } void CompressedWriter::update_reservation(WaitFlag wait_flag) { pthread_mutex_lock(&mutex); next_thread_end_pos = producer_reserved_write_pos; producer_reserved_pos = producer_reserved_write_pos; // Wake up threads that might be waiting to consume data. pthread_cond_broadcast(&cond); while (!error) { if (write_error) { error = true; break; } uint64_t completed_pos = next_thread_pos; for (uint32_t i = 0; i < thread_pos.size(); ++i) { completed_pos = min(completed_pos, thread_pos[i]); } producer_reserved_upto_pos = completed_pos + buffer.size(); if (producer_reserved_pos < producer_reserved_upto_pos || wait_flag == NOWAIT) { break; } pthread_cond_wait(&cond, &mutex); } pthread_mutex_unlock(&mutex); } void CompressedWriter::compression_thread() { pthread_mutex_lock(&mutex); int thread_index; pthread_t self = pthread_self(); for (thread_index = 0; threads[thread_index] != self; ++thread_index) { } // Add slop for incompressible data vector outputbuf; outputbuf.resize((size_t)(block_size * 1.1) + sizeof(BlockHeader)); BlockHeader* header = reinterpret_cast(&outputbuf[0]); while (true) { if (!write_error && next_thread_pos < next_thread_end_pos && (closing || next_thread_pos + block_size <= next_thread_end_pos)) { thread_pos[thread_index] = next_thread_pos; next_thread_pos = min(next_thread_end_pos, next_thread_pos + block_size); // header->uncompressed_length must be <= block_size, // therefore fits in a size_t. header->uncompressed_length = (size_t)(next_thread_pos - thread_pos[thread_index]); pthread_mutex_unlock(&mutex); header->compressed_length = do_compress(thread_pos[thread_index], header->uncompressed_length, &outputbuf[sizeof(BlockHeader)], outputbuf.size() - sizeof(BlockHeader)); pthread_mutex_lock(&mutex); if (header->compressed_length == 0) { write_error = true; } // wait until we're the next thread that needs to write while (!write_error) { bool other_thread_write_first = false; for (uint32_t i = 0; i < thread_pos.size(); ++i) { if (thread_pos[i] < thread_pos[thread_index]) { other_thread_write_first = true; } } if (!other_thread_write_first) { break; } pthread_cond_wait(&cond, &mutex); } if (!write_error) { pthread_mutex_unlock(&mutex); write_all(fd, &outputbuf[0], sizeof(BlockHeader) + header->compressed_length); pthread_mutex_lock(&mutex); } thread_pos[thread_index] = UINT64_MAX; // do a broadcast because we might need to unblock // the producer thread or a compressor thread waiting // for us to write. pthread_cond_broadcast(&cond); continue; } if (closing && (write_error || next_thread_pos == next_thread_end_pos)) { break; } pthread_cond_wait(&cond, &mutex); } pthread_mutex_unlock(&mutex); } void CompressedWriter::close(Sync sync) { if (!fd.is_open()) { return; } update_reservation(NOWAIT); pthread_mutex_lock(&mutex); closing = true; pthread_cond_broadcast(&cond); pthread_mutex_unlock(&mutex); for (auto i = threads.begin(); i != threads.end(); ++i) { pthread_join(*i, nullptr); } if (sync == SYNC) { if (fsync(fd) < 0) { error = true; } } if (write_error) { error = true; } fd.close(); } size_t CompressedWriter::do_compress(uint64_t offset, size_t length, uint8_t* outputbuf, size_t outputbuf_len) { BrotliEncoderState* state = BrotliEncoderCreateInstance(NULL, NULL, NULL); if (!state) { DEBUG_ASSERT(0 && "BrotliEncoderCreateInstance failed"); } if (!BrotliEncoderSetParameter(state, BROTLI_PARAM_QUALITY, BROTLI_LEVEL)) { DEBUG_ASSERT(0 && "Brotli initialization failed"); } size_t ret = 0; while (length > 0) { size_t buf_offset = (size_t)(offset % buffer.size()); size_t amount = min(length, buffer.size() - buf_offset); const uint8_t* in = &buffer[buf_offset]; if (!BrotliEncoderCompressStream(state, BROTLI_OPERATION_PROCESS, &amount, &in, &outputbuf_len, &outputbuf, &ret)) { DEBUG_ASSERT(0 && "Brotli compression failed"); } size_t consumed = in - &buffer[buf_offset]; offset += consumed; length -= consumed; } size_t zero = 0; if (!BrotliEncoderCompressStream(state, BROTLI_OPERATION_FINISH, &zero, NULL, &outputbuf_len, &outputbuf, &ret)) { DEBUG_ASSERT(0 && "Brotli compression failed"); } BrotliEncoderDestroyInstance(state); return ret; } } // namespace rr rr-5.7.0/src/CompressedWriter.h000066400000000000000000000053511450675474200164310ustar00rootroot00000000000000/* -*- Mode: C++; tab-width: 8; c-basic-offset: 2; indent-tabs-mode: nil; -*- */ #ifndef RR_COMPRESSED_WRITER_H_ #define RR_COMPRESSED_WRITER_H_ #include #include #include #include #include #include "ScopedFd.h" namespace rr { /** * CompressedWriter opens an output file and writes compressed blocks to it. * Blocks of a fixed but unspecified size (currently 1MB) are compressed. * Each block of compressed data is written to the file preceded by two * 32-bit words: the size of the compressed data (excluding block header) * and the size of the uncompressed data, in that order. See BlockHeader below. * * We use multiple threads to perform compression. The threads are * responsible for the actual data writes. The thread that creates the * CompressedWriter is the "producer" thread and must also be the caller of * 'write'. The producer thread may block in 'write' if 'buffer_size' bytes are * being compressed. * * Each data block is compressed independently using zlib. */ class CompressedWriter { public: CompressedWriter(const std::string& filename, size_t buffer_size, uint32_t num_threads); ~CompressedWriter(); // Call only on producer thread bool good() const { return !error; } // Call only on producer thread. void write(const void* data, size_t size); enum Sync { DONT_SYNC, SYNC }; // Call only on producer thread void close(Sync sync = DONT_SYNC); struct BlockHeader { uint32_t compressed_length; uint32_t uncompressed_length; }; protected: enum WaitFlag { WAIT, NOWAIT }; void update_reservation(WaitFlag wait_flag); static void* compression_thread_callback(void* p); void compression_thread(); size_t do_compress(uint64_t offset, size_t length, uint8_t* outputbuf, size_t outputbuf_len); // Immutable while threads are running ScopedFd fd; int block_size; pthread_mutex_t mutex; pthread_cond_t cond; std::vector threads; // Carefully shared... std::vector buffer; // BEGIN protected by 'mutex' /* position in output stream that this thread is currently working on, * or UINT64_MAX if it's idle */ std::vector thread_pos; /* position in output stream of data to dispatch to next thread */ uint64_t next_thread_pos; /* position in output stream of end of data ready to dispatch */ uint64_t next_thread_end_pos; bool closing; bool write_error; // END protected by 'mutex' /* producer thread only */ /* Areas in the buffer that have been reserved for write() */ uint64_t producer_reserved_pos; uint64_t producer_reserved_write_pos; uint64_t producer_reserved_upto_pos; bool error; }; } // namespace rr #endif /* RR_COMPRESSED_WRITER_H_ */ rr-5.7.0/src/DiversionSession.cc000066400000000000000000000214771450675474200166030ustar00rootroot00000000000000/* -*- Mode: C++; tab-width: 8; c-basic-offset: 2; indent-tabs-mode: nil; -*- */ #include "DiversionSession.h" #include #include "AutoRemoteSyscalls.h" #include "ReplaySession.h" #include "core.h" #include "kernel_metadata.h" #include "log.h" using namespace std; namespace rr { DiversionSession::DiversionSession(int cpu_binding) : emu_fs(EmuFs::create()), fake_rdstc(uint64_t(1) << 60), cpu_binding_(cpu_binding) {} DiversionSession::~DiversionSession() { // We won't permanently leak any OS resources by not ensuring // we've cleaned up here, but sessions can be created and // destroyed many times, and we don't want to temporarily hog // resources. kill_all_tasks(); DEBUG_ASSERT(tasks().size() == 0 && vms().size() == 0); DEBUG_ASSERT(emu_fs->size() == 0); } static void finish_emulated_syscall_with_ret(Task* t, long ret) { t->finish_emulated_syscall(); Registers r = t->regs(); r.set_syscall_result(ret); t->set_regs(r); } /** * Execute the syscall contained in |t|'s current register set. The * return value of the syscall is set for |t|'s registers, to be * returned to the tracee task. */ static void execute_syscall(Task* t) { t->finish_emulated_syscall(); AutoRemoteSyscalls remote(t); remote.syscall(remote.regs().original_syscallno(), remote.regs().arg1(), remote.regs().arg2(), remote.regs().arg3(), remote.regs().arg4(), remote.regs().arg5(), remote.regs().arg6()); remote.regs().set_syscall_result(t->regs().syscall_result()); } uint64_t DiversionSession::next_rdtsc_value() { uint64_t rdtsc_value = fake_rdstc; fake_rdstc += 1 << 20; // 1M cycles return rdtsc_value; } template static void process_syscall_arch(Task* t, int syscallno) { LOG(debug) << "Processing " << syscall_name(syscallno, Arch::arch()); if (syscallno == Arch::ioctl && t->is_desched_event_syscall()) { // The arm/disarm-desched ioctls are emulated as no-ops. // However, because the rr preload library expects these // syscalls to succeed and aborts if they don't, we fudge a // "0" return value. finish_emulated_syscall_with_ret(t, 0); return; } if (syscallno == t->session().syscall_number_for_rrcall_rdtsc()) { uint64_t rdtsc_value = static_cast(&t->session())->next_rdtsc_value(); LOG(debug) << "Faking rrcall_rdtsc syscall with value " << rdtsc_value; remote_ptr out_param(t->regs().arg1()); t->write_mem(out_param, rdtsc_value); finish_emulated_syscall_with_ret(t, 0); return; } switch (syscallno) { // We blacklist these syscalls because the params include // namespaced identifiers that are different in replay than // recording, and during replay they may refer to different, // live resources. For example, if a recorded tracees kills // one of its threads, then during replay that killed pid // might refer to a live process outside the tracee tree. We // don't want diversion tracees randomly shooting down other // processes! // // We optimistically assume that filesystem operations were // intended by the user. // // There's a potential problem with "fd confusion": in the // diversion tasks, fds returned from open() during replay are // emulated. But those fds may accidentally refer to live fds // in the task fd table. So write()s etc may not be writing // to the file the tracee expects. However, the only real fds // that leak into tracees are the stdio fds, and there's not // much harm that can be caused by accidental writes to them. case Arch::ipc: case Arch::kill: case Arch::rt_sigqueueinfo: case Arch::rt_tgsigqueueinfo: case Arch::tgkill: case Arch::tkill: // fork/vfork/clone are likely to lead to disaster because we only // ever allow a single task to run. case Arch::fork: case Arch::vfork: case Arch::clone: { LOG(debug) << "Suppressing syscall " << syscall_name(syscallno, t->arch()); Registers r = t->regs(); r.set_syscall_result(-ENOSYS); t->set_regs(r); return; } case Arch::prctl: { Registers r = t->regs(); int op = r.arg1(); if (op == PR_SET_TSC) { LOG(debug) << "Suppressing syscall " << syscall_name(syscallno, t->arch()); r.set_syscall_result(-ENOSYS); t->set_regs(r); return; } break; } case Arch::gettid: { auto tid = t->own_namespace_tid(); LOG(debug) << "Emulating gettid with " << tid; Registers r = t->regs(); r.set_syscall_result(tid); t->set_regs(r); return; } case Arch::getpid: { auto pid = t->thread_group()->tgid_own_namespace; LOG(debug) << "Emulating getpid with " << pid; Registers r = t->regs(); r.set_syscall_result(pid); t->set_regs(r); return; } } LOG(debug) << "Executing syscall " << syscall_name(syscallno, t->arch()); execute_syscall(t); } static void process_syscall(Task* t, int syscallno){ RR_ARCH_FUNCTION(process_syscall_arch, t->arch(), t, syscallno) } static bool maybe_handle_task_exit(Task* t, TaskContext* context, DiversionSession::DiversionResult* result) { if (t->ptrace_event() != PTRACE_EVENT_EXIT && !t->was_reaped()) { return false; } t->did_kill(); t->detach(); delete t; // This is now a dangling pointer, so clear it. context->task = nullptr; result->status = DiversionSession::DIVERSION_EXITED; result->break_status.task_context = *context; result->break_status.task_exit = true; return true; } /** * Advance execution until either a signal is received (including a SIGTRAP * generated by a single-step) or a syscall is made. */ DiversionSession::DiversionResult DiversionSession::diversion_step( Task* t, RunCommand command, int signal_to_deliver) { DEBUG_ASSERT(command != RUN_SINGLESTEP_FAST_FORWARD); assert_fully_initialized(); DiversionResult result; TaskContext context(t); // An exit might have occurred while processing a previous syscall. if (maybe_handle_task_exit(t, &context, &result)) { return result; } t->set_in_diversion(true); while (true) { switch (command) { case RUN_CONTINUE: { LOG(debug) << "Continuing to next syscall"; bool ok = t->resume_execution(RESUME_SYSEMU, RESUME_WAIT, RESUME_UNLIMITED_TICKS, signal_to_deliver); ASSERT(t, ok) << "Tracee was killed unexpectedly"; break; } case RUN_SINGLESTEP: { LOG(debug) << "Stepping to next insn/syscall"; bool ok = t->resume_execution(RESUME_SYSEMU_SINGLESTEP, RESUME_WAIT, RESUME_UNLIMITED_TICKS, signal_to_deliver); ASSERT(t, ok) << "Tracee was killed unexpectedly"; break; } default: FATAL() << "Illegal run command " << command; } if (maybe_handle_task_exit(t, &context, &result)) { return result; } result.status = DIVERSION_CONTINUE; if (t->stop_sig()) { LOG(debug) << "Pending signal: " << t->get_siginfo(); result.break_status = diagnose_debugger_trap(t, command); if (t->stop_sig() == SIGTRAP && !result.break_status.breakpoint_hit && result.break_status.watchpoints_hit.empty() && !result.break_status.singlestep_complete) { result.break_status.signal = unique_ptr(new siginfo_t(t->get_siginfo())); result.break_status.signal->si_signo = t->stop_sig(); } else if (t->stop_sig() == SIGSEGV) { auto trapped_instruction = trapped_instruction_at(t, t->ip()); if (trapped_instruction == TrappedInstruction::RDTSC) { size_t len = trapped_instruction_len(trapped_instruction); uint64_t rdtsc_value = next_rdtsc_value(); LOG(debug) << "Faking RDTSC instruction with value " << rdtsc_value; Registers r = t->regs(); r.set_ip(r.ip() + len); r.set_ax((uint32_t)rdtsc_value); r.set_dx(rdtsc_value >> 32); t->set_regs(r); result.break_status = BreakStatus(); continue; } } LOG(debug) << "Diversion break at ip=" << (void*)t->ip().register_value() << "; break=" << result.break_status.breakpoint_hit << ", watch=" << !result.break_status.watchpoints_hit.empty() << ", singlestep=" << result.break_status.singlestep_complete; ASSERT(t, !result.break_status.singlestep_complete || command == RUN_SINGLESTEP); return result; } break; } if (t->status().is_syscall()) { t->apply_syscall_entry_regs(); } process_syscall(t, t->regs().original_syscallno()); check_for_watchpoint_changes(t, result.break_status); return result; } } // namespace rr rr-5.7.0/src/DiversionSession.h000066400000000000000000000043751450675474200164430ustar00rootroot00000000000000/* -*- Mode: C++; tab-width: 8; c-basic-offset: 2; indent-tabs-mode: nil; -*- */ #ifndef RR_DIVERSION_SESSION_H_ #define RR_DIVERSION_SESSION_H_ #include "EmuFs.h" #include "Session.h" namespace rr { class ReplaySession; /** * A DiversionSession lets you run task(s) forward without replay. * Clone a ReplaySession to a DiversionSession to execute some arbitrary * code for its side effects. * * Diversion allows tracees to execute freely, as in "recorder" * mode, but doesn't attempt to record any data. Diverter * emulates the syscalls it's able to (such as writes to stdio fds), * and essentially ignores the syscalls it doesn't know how to * implement. Tracees can easily get into inconsistent states within * diversion mode, and no attempt is made to detect or rectify that. * * Diverter mode is designed to support short-lived diversions from * "replayer" sessions, as required to support gdb's |call foo()| * feature. A diversion is created for the call frame, then discarded * when the call finishes (loosely speaking). */ class DiversionSession final : public Session { public: DiversionSession(int cpu_binding); typedef std::shared_ptr shr_ptr; ~DiversionSession(); EmuFs& emufs() const { return *emu_fs; } enum DiversionStatus { // Some execution was done. diversion_step() can be called again. DIVERSION_CONTINUE, // All tracees are dead. diversion_step() should not be called again. DIVERSION_EXITED }; struct DiversionResult { DiversionStatus status; BreakStatus break_status; }; /** * Try make progress in this diversion session. Run task t if possible. */ DiversionResult diversion_step(Task* t, RunCommand command = RUN_CONTINUE, int signal_to_deliver = 0); virtual DiversionSession* as_diversion() override { return this; } virtual int cpu_binding() const override { return cpu_binding_; } void set_tracee_fd_number(int fd_number) { tracee_socket_fd_number = fd_number; } void on_create(Task *t) override { this->Session::on_create(t); } uint64_t next_rdtsc_value(); private: friend class ReplaySession; std::shared_ptr emu_fs; uint64_t fake_rdstc; int cpu_binding_; }; } // namespace rr #endif // RR_DIVERSION_SESSION_H_ rr-5.7.0/src/DumpCommand.cc000066400000000000000000000332121450675474200154670ustar00rootroot00000000000000/* -*- Mode: C++; tab-width: 8; c-basic-offset: 2; indent-tabs-mode: nil; -*- */ #include "DumpCommand.h" #include #include #include #include #include #include #include "preload/preload_interface.h" #include "AddressSpace.h" #include "Command.h" #include "TraceStream.h" #include "core.h" #include "kernel_metadata.h" #include "log.h" #include "main.h" #include "util.h" using namespace std; namespace rr { class DumpCommand : public Command { public: virtual int run(vector& args) override; protected: DumpCommand(const char* name, const char* help) : Command(name, help) {} static DumpCommand singleton; }; DumpCommand DumpCommand::singleton( "dump", " rr dump [OPTIONS] [] [...]\n" " Event specs can be either an event number like `127', or a range\n" " like `1000-5000', or `end' for the last record in the trace.\n" " By default, all events are dumped.\n" " -b, --syscallbuf dump syscallbuf contents\n" " -e, --task-events dump task events\n" " -m, --recorded-metadata dump recorded data metadata\n" " -p, --mmaps dump mmap data\n" " -r, --raw dump trace frames in a more easily\n" " machine-parseable format instead of the\n" " default human-readable format\n" " -s, --statistics dump statistics about the trace\n" " -t, --tid= dump events only for the specified tid\n"); static bool parse_dump_arg(vector& args, DumpFlags& flags) { if (parse_global_option(args)) { return true; } static const OptionSpec options[] = { { 0, "socket-addresses", NO_PARAMETER }, { 'b', "syscallbuf", NO_PARAMETER }, { 'e', "task-events", NO_PARAMETER }, { 'm', "recorded-metadata", NO_PARAMETER }, { 'p', "mmaps", NO_PARAMETER }, { 'r', "raw", NO_PARAMETER }, { 's', "statistics", NO_PARAMETER }, { 't', "tid", HAS_PARAMETER }, }; ParsedOption opt; if (!Command::parse_option(args, options, &opt)) { return false; } switch (opt.short_name) { case 'b': flags.dump_syscallbuf = true; break; case 'e': flags.dump_task_events = true; break; case 'm': flags.dump_recorded_data_metadata = true; break; case 'p': flags.dump_mmaps = true; break; case 'r': flags.raw_dump = true; break; case 's': flags.dump_statistics = true; break; case 't': if (!opt.verify_valid_int(1, INT32_MAX)) { return false; } flags.only_tid = opt.int_value; break; case 0: flags.dump_socket_addrs = true; break; default: DEBUG_ASSERT(0 && "Unknown option"); } return true; } static void dump_syscallbuf_data(TraceReader& trace, FILE* out, const TraceFrame& frame, const DumpFlags& flags) { if (frame.event().type() != EV_SYSCALLBUF_FLUSH) { return; } auto buf = trace.read_raw_data(); size_t bytes_remaining = buf.data.size() - sizeof(struct syscallbuf_hdr); auto flush_hdr = reinterpret_cast(buf.data.data()); if (flush_hdr->num_rec_bytes > bytes_remaining) { fprintf(stderr, "Malformed trace file (bad recorded-bytes count)\n"); notifying_abort(); } if (flags.raw_dump) { fprintf(out, " "); for (unsigned long i = 0; i < sizeof(syscallbuf_hdr); ++i) { fprintf(out, "%2.2x", *(buf.data.data() + (uintptr_t)i)); } fprintf(out, "\n"); } bytes_remaining = flush_hdr->num_rec_bytes; auto record_ptr = reinterpret_cast(flush_hdr + 1); auto end_ptr = record_ptr + bytes_remaining; while (record_ptr < end_ptr) { auto record = reinterpret_cast(record_ptr); // Buffered syscalls always use the task arch fprintf(out, " { syscall:'%s', ret:0x%lx, size:0x%lx%s%s }\n", syscall_name(record->syscallno, frame.regs().arch()).c_str(), (long)record->ret, (long)record->size, record->desched ? ", desched:1" : "", record->replay_assist ? ", replay_assist:1" : ""); if (flags.raw_dump) { fprintf(out, " "); for (unsigned long i = 0; i < record->size; ++i) { fprintf(out, "%2.2x", *(record_ptr + (uintptr_t)i)); } fprintf(out, "\n"); } if (record->size < sizeof(*record)) { fprintf(stderr, "Malformed trace file (bad record size)\n"); notifying_abort(); } record_ptr += stored_record_size(record->size); } if (flags.dump_mmaps) { for (auto& record : frame.event().SyscallbufFlush().mprotect_records) { char prot_flags[] = "rwx"; if (!(record.prot & PROT_READ)) { prot_flags[0] = '-'; } if (!(record.prot & PROT_WRITE)) { prot_flags[1] = '-'; } if (!(record.prot & PROT_EXEC)) { prot_flags[2] = '-'; } fprintf(out, " { start:'%p', size:'%" PRIx64 "', prot:%s }\n", (void*)record.start, record.size, prot_flags); } } } static void print_socket_addr(FILE* out, const struct NativeArch::sockaddr_storage& sa) { char buf[256]; auto sockaddr = reinterpret_cast(&sa); switch (sockaddr->ss_family) { case AF_INET: { auto sockaddr_in = reinterpret_cast(sockaddr); if (inet_ntop(AF_INET, &sockaddr_in->sin_addr, buf, sizeof(buf) - 1)) { fprintf(out, "%s:%d", buf, sockaddr_in->sin_port); } else { FATAL(); } break; } case AF_INET6: { auto sockaddr_in6 = reinterpret_cast(sockaddr); if (inet_ntop(AF_INET6, &sockaddr_in6->sin6_addr, buf, sizeof(buf) - 1)) { fprintf(out, "%s:%d", buf, sockaddr_in6->sin6_port); } else { FATAL(); } break; } default: fputs("", out); break; } } static void dump_socket_addrs(FILE* out, const TraceFrame& frame) { if (frame.event().type() != EV_SYSCALL) { return; } auto syscall = frame.event().Syscall(); if (syscall.socket_addrs) { fputs(" Local socket address '", out); print_socket_addr(out, (*syscall.socket_addrs.get())[0]); fputs("' Remote socket address '", out); print_socket_addr(out, (*syscall.socket_addrs.get())[1]); fputs("'\n", out); } } static void dump_task_event(FILE* out, const TraceTaskEvent& event) { switch (event.type()) { case TraceTaskEvent::CLONE: fprintf(out, " TraceTaskEvent::CLONE tid=%d parent=%d clone_flags=0x%x\n", event.tid(), event.parent_tid(), event.clone_flags()); break; case TraceTaskEvent::EXEC: fprintf(out, " TraceTaskEvent::EXEC tid=%d file=%s\n", event.tid(), event.file_name().c_str()); break; case TraceTaskEvent::EXIT: fprintf(out, " TraceTaskEvent::EXIT tid=%d status=%d\n", event.tid(), event.exit_status().get()); break; case TraceTaskEvent::DETACH: fprintf(out, " TraceTaskEvent::DETACH tid=%d\n", event.tid()); break; default: FATAL() << "Unknown TraceTaskEvent"; break; } } /** * Dump all events from the current to trace that match |spec| to * |out|. |spec| has the following syntax: /\d+(-\d+)?/, expressing * either a single event number of a range, and may be null to * indicate "dump all events". * * This function is side-effect-y, in that the trace file isn't * rewound in between matching each spec. Therefore specs should be * constructed so as to match properly on a serial linear scan; that * is, they should comprise disjoint and monotonically increasing * event sets. No attempt is made to enforce this or normalize specs. */ static void dump_events_matching(TraceReader& trace, const DumpFlags& flags, FILE* out, const string* spec, const unordered_map& task_events) { uint32_t start = 0, end = numeric_limits::max(); bool only_end = false; if (spec && *spec == "end") { only_end = true; } else { // Try to parse the "range" syntax '[start]-[end]'. if (spec && 2 > sscanf(spec->c_str(), "%u-%u", &start, &end)) { // Fall back on assuming the spec is a single event // number, however it parses out with atoi(). start = end = atoi(spec->c_str()); } } bool process_raw_data = flags.dump_syscallbuf || flags.dump_recorded_data_metadata; while (!trace.at_end()) { auto frame = trace.read_frame(start); if (end < frame.time()) { return; } if (only_end ? trace.at_end() : (start <= frame.time() && frame.time() <= end && (!flags.only_tid || flags.only_tid == frame.tid()))) { if (flags.raw_dump) { frame.dump_raw(out); } else { frame.dump(out); } if (flags.dump_syscallbuf) { dump_syscallbuf_data(trace, out, frame, flags); } if (flags.dump_task_events) { auto it = task_events.find(frame.time()); if (it != task_events.end()) { dump_task_event(out, it->second); } } while (true) { TraceReader::MappedData data; bool found; KernelMapping km = trace.read_mapped_region(&data, &found, TraceReader::DONT_VALIDATE); if (!found) { break; } if (flags.dump_mmaps) { char prot_flags[] = "rwxp"; if (!(km.prot() & PROT_READ)) { prot_flags[0] = '-'; } if (!(km.prot() & PROT_WRITE)) { prot_flags[1] = '-'; } if (!(km.prot() & PROT_EXEC)) { prot_flags[2] = '-'; } if (km.flags() & MAP_SHARED) { prot_flags[3] = 's'; } const char* fsname = km.fsname().c_str(); if (data.source == TraceReader::SOURCE_ZERO) { static const char source_zero[] = ""; fsname = source_zero; } fprintf(out, " { map_file:\"%s\", addr:%p, length:%p, " "prot_flags:\"%s\", file_offset:0x%llx, " "device:%lld, inode:%lld, " "data_file:\"%s\", data_offset:0x%llx, " "file_size:0x%llx }\n", fsname, (void*)km.start().as_int(), (void*)km.size(), prot_flags, (long long)km.file_offset_bytes(), (long long)km.device(), (long long)km.inode(), data.file_name.c_str(), (long long)data.data_offset_bytes, (long long)data.file_size_bytes); } } TraceReader::RawDataMetadata data; while (process_raw_data && trace.read_raw_data_metadata_for_frame(data)) { if (flags.dump_recorded_data_metadata) { fprintf(out, " { tid:%d, addr:%p, length:%p", data.rec_tid, (void*)data.addr.as_int(), (void*)data.size); if (!data.holes.empty()) { fputs(", holes:[", out); bool first = true; for (auto& h : data.holes) { if (!first) { fputs(", ", out); } fprintf(out, "%p-%p", (void*)h.offset, (void*)(h.offset + h.size)); } fputs("]", out); } fputs(" }\n", out); } } if (flags.dump_socket_addrs) { dump_socket_addrs(out, frame); } if (!flags.raw_dump) { fprintf(out, "}\n"); } } else { while (true) { TraceReader::MappedData data; KernelMapping km = trace.read_mapped_region(&data, nullptr, TraceReader::DONT_VALIDATE); if (km.size() == 0) { break; } } TraceReader::RawDataMetadata data; while (process_raw_data && trace.read_raw_data_metadata_for_frame(data)) { } } } } static void dump_statistics(const TraceReader& trace, FILE* out) { uint64_t uncompressed = trace.uncompressed_bytes(); uint64_t compressed = trace.compressed_bytes(); fprintf(out, "// Uncompressed bytes %" PRIu64 ", compressed bytes %" PRIu64 ", ratio %.2fx\n", uncompressed, compressed, double(uncompressed) / compressed); } void dump(const string& trace_dir, const DumpFlags& flags, const vector& specs, FILE* out) { TraceReader trace(trace_dir); if (flags.raw_dump) { fprintf(out, "global_time tid reason ticks " "hw_interrupts page_faults instructions " "eax ebx ecx edx esi edi ebp orig_eax esp eip eflags\n"); } unordered_map task_events; FrameTime last_time = 0; while (true) { FrameTime time; TraceTaskEvent r = trace.read_task_event(&time); if (time < last_time) { FATAL() << "TraceTaskEvent times non-monotonic"; } if (r.type() == TraceTaskEvent::NONE) { break; } task_events.insert(make_pair(time, r)); last_time = time; } if (specs.size() > 0) { for (size_t i = 0; i < specs.size(); ++i) { dump_events_matching(trace, flags, out, &specs[i], task_events); } } else { // No specs => dump all events. dump_events_matching(trace, flags, out, nullptr /*all events*/, task_events); } if (flags.dump_statistics) { dump_statistics(trace, out); } } int DumpCommand::run(vector& args) { DumpFlags flags; while (parse_dump_arg(args, flags)) { } string trace_dir; if (!parse_optional_trace_dir(args, &trace_dir)) { print_help(stderr); return 1; } dump(trace_dir, flags, args, stdout); return 0; } } // namespace rr rr-5.7.0/src/DumpCommand.h000066400000000000000000000016471450675474200153400ustar00rootroot00000000000000/* -*- Mode: C++; tab-width: 8; c-basic-offset: 2; indent-tabs-mode: nil; -*- */ #ifndef RR_DUMP_COMMAND_H_ #define RR_DUMP_COMMAND_H_ #ifndef _DEFAULT_SOURCE #define _DEFAULT_SOURCE 1 #endif #include #include #include #include namespace rr { struct DumpFlags { bool dump_syscallbuf; bool dump_recorded_data_metadata; bool dump_mmaps; bool dump_task_events; bool raw_dump; bool dump_statistics; bool dump_socket_addrs; int only_tid; DumpFlags() : dump_syscallbuf(false), dump_recorded_data_metadata(false), dump_mmaps(false), dump_task_events(false), raw_dump(false), dump_statistics(false), dump_socket_addrs(false), only_tid(0) {} }; void dump(const std::string& trace_dir, const DumpFlags& flags, const std::vector& specs, FILE* out); } // namespace rr #endif // RR_DUMP_COMMAND_H_ rr-5.7.0/src/Dwarf.cc000066400000000000000000000537251450675474200143410ustar00rootroot00000000000000/* -*- Mode: C++; tab-width: 8; c-basic-offset: 2; indent-tabs-mode: nil; -*- */ #include "Dwarf.h" #include #include "log.h" using namespace std; namespace rr { struct Dwarf32 { typedef uint32_t Offset; static const uint8_t EntrySize = 4; struct CompilationUnitPreamble { uint32_t unit_length; }; }; struct Dwarf64 { typedef uint64_t Offset; static const uint8_t EntrySize = 8; struct __attribute__((packed)) CompilationUnitPreamble { uint32_t magic; /* 0xffffffff */ uint64_t unit_length; }; }; template struct __attribute__((packed)) Dwarf4CompilationUnitHeader { typedef D Size; typename D::CompilationUnitPreamble preamble; uint16_t version; typename D::Offset debug_abbrev_offset; uint8_t address_size; void install_dwo_id(DwarfCompilationUnit* unit) const { unit->set_dwo_id(0); } }; template struct __attribute__((packed)) Dwarf5CompilationUnitHeader { typedef D Size; typename D::CompilationUnitPreamble preamble; uint16_t version; uint8_t unit_type; uint8_t address_size; typename D::Offset debug_abbrev_offset; void install_dwo_id(DwarfCompilationUnit* unit) const { unit->set_dwo_id(0); } }; template struct __attribute__((packed)) Dwarf5SkeletonSplitCompilationUnitHeader { typedef D Size; typename D::CompilationUnitPreamble preamble; uint16_t version; uint8_t unit_type; uint8_t address_size; typename D::Offset debug_abbrev_offset; uint64_t dwo_id; void install_dwo_id(DwarfCompilationUnit* unit) const { unit->set_dwo_id(dwo_id); } }; template struct __attribute__((packed)) Dwarf2LineNumberTableHeader { typedef D Size; typename D::CompilationUnitPreamble preamble; uint16_t version; typename D::Offset header_length; uint8_t minimum_instruction_length; uint8_t default_is_stmt; int8_t line_base; uint8_t line_range; uint8_t opcode_base; bool read_directories(const DwarfCompilationUnit& cu, DwarfSpan span, const DebugStrSpans& debug_str, std::vector& directories, std::vector& files) const; }; template struct __attribute__((packed)) Dwarf4LineNumberTableHeader { typedef D Size; typename D::CompilationUnitPreamble preamble; uint16_t version; typename D::Offset header_length; uint8_t minimum_instruction_length; uint8_t maximum_operations_per_instruction; uint8_t default_is_stmt; int8_t line_base; uint8_t line_range; uint8_t opcode_base; bool read_directories(const DwarfCompilationUnit& cu, DwarfSpan span, const DebugStrSpans& debug_str, std::vector& directories, std::vector& files) const; }; template struct __attribute__((packed)) Dwarf5LineNumberTableHeader { typedef D Size; typename D::CompilationUnitPreamble preamble; uint16_t version; uint8_t address_size; uint8_t segment_selector_size; typename D::Offset header_length; uint8_t minimum_instruction_length; uint8_t maximum_operations_per_instruction; uint8_t default_is_stmt; int8_t line_base; uint8_t line_range; uint8_t opcode_base; bool read_directories(const DwarfCompilationUnit& cu, DwarfSpan span, const DebugStrSpans& debug_str, std::vector& directories, std::vector& files) const; }; uint64_t DwarfSpan::read_uleb(bool* ok) { uint64_t ret = 0; int shift = 0; while (start < end) { uint8_t b = *start; ++start; ret |= (b & 0x7f) << shift; if (!(b & 0x80)) { return ret; } shift += 7; if (shift >= 64) { *ok = false; return 0; } } *ok = false; return 0; } DwarfSpan DwarfSpan::read_leb_ref(bool* ok) { DwarfSpan ret(*this); while (start < end) { if (!(*start & 0x80)) { ++start; ret.end = start; return ret; } ++start; } *ok = false; return ret; } const char* DwarfSpan::read_null_terminated_string(bool* ok) { const void* p = memchr(start, 0, size()); if (!p) { LOG(warn) << "String was not null-terminated"; *ok = false; return nullptr; } const char* ret = reinterpret_cast(start); start = static_cast(p) + 1; return ret; } DwarfAbbrev* DwarfAbbrevSet::lookup(uint64_t code) { auto it = abbrevs.find(code); if (it != abbrevs.end()) { return it->second.get(); } while (!remaining_span.empty()) { bool ok = true; uint64_t abbrev_code = remaining_span.read_uleb(&ok); unique_ptr abbrev(new DwarfAbbrev); abbrev->tag = (DWTag)remaining_span.read_uleb(&ok); abbrev->children = (DWChildren)remaining_span.read_value(&ok); auto abbrev_raw = abbrev.get(); while (true) { uint64_t name = remaining_span.read_uleb(&ok); DWForm form = (DWForm)remaining_span.read_uleb(&ok); if (!name && !form) { break; } DwarfSpan constant; if (form == DW_FORM_implicit_const) { constant = remaining_span.read_leb_ref(&ok); } abbrev->attributes.push_back({ name, form, constant }); } if (!ok) { LOG(warn) << "Invalid DWARF abbrev table!"; return nullptr; } abbrevs.insert(make_pair(abbrev_code, std::move(abbrev))); if (code == abbrev_code) { return abbrev_raw; } } return nullptr; } DwarfAbbrevSet& DwarfAbbrevs::lookup(uint64_t offset) { auto it = abbrevs.find(offset); if (it != abbrevs.end()) { return *it->second; } unique_ptr set(new DwarfAbbrevSet(debug_abbrev.subspan(offset))); auto set_raw = set.get(); abbrevs.insert(make_pair(offset, std::move(set))); return *set_raw; } static DwarfAbbrev null_abbrev; DwarfDIE::DwarfDIE(DwarfSpan span, DwarfAbbrevSet& abbrevs, uint8_t dwarf_size, uint8_t address_size, bool* ok) : address_size(address_size), dwarf_size(dwarf_size) { uint64_t code = span.read_uleb(ok); if (!*ok) { return; } if (code == 0) { abbrev = &null_abbrev; return; } abbrev = abbrevs.lookup(code); if (!abbrev) { LOG(warn) << "No abbrev found for DIE"; *ok = false; return; } attr_span = span; } static size_t form_size(DWForm form, size_t address_size, size_t dwarf_size, DwarfSpan* span, bool* ok) { if (form == DW_FORM_indirect) { form = (DWForm)span->read_uleb(ok); if (!*ok) { return 0; } } if (form == DW_FORM_udata) { auto before = span->size(); DwarfSpan a_span(*span); a_span.read_uleb(ok); if (!*ok) { return 0; } return before - a_span.size(); } switch (form) { case DW_FORM_addr: return address_size; case DW_FORM_addrx: return dwarf_size; case DW_FORM_data1: return 1; case DW_FORM_data2: return 2; case DW_FORM_data4: return 4; case DW_FORM_data8: return 8; case DW_FORM_data16: return 16; case DW_FORM_flag: return 1; case DW_FORM_strp: return dwarf_size; case DW_FORM_line_strp: return dwarf_size; case DW_FORM_strx: return dwarf_size; case DW_FORM_strx1: return 1; case DW_FORM_strx2: return 2; case DW_FORM_strx3: return 3; case DW_FORM_strx4: return 4; case DW_FORM_string: { auto before = span->size(); DwarfSpan a_span(*span); a_span.read_null_terminated_string(ok); if (!*ok) { return 0; } return before - a_span.size(); } case DW_FORM_sec_offset: return dwarf_size; case DW_FORM_flag_present: return 0; case DW_FORM_implicit_const: return 0; case DW_FORM_rnglistx: return dwarf_size; case DW_FORM_strp_sup: return dwarf_size; case DW_FORM_GNU_strp_alt: return dwarf_size; default: LOG(warn) << "form " << form << " not supported!"; *ok = false; return 0; } } DwarfSpan DwarfDIE::find_attribute(DWAttr attr, DWForm* form, bool* ok) const { DwarfSpan span = attr_span; for (auto& a : abbrev->attributes) { size_t size = form_size(a.form, address_size, dwarf_size, &span, ok); DwarfSpan a_span = span.consume(size); if (a.name == attr) { *form = a.form; if (a.form == DW_FORM_implicit_const) { a_span = a.constant; } return a_span; } } return DwarfSpan(); } static uint64_t decode_unsigned_literal(DwarfSpan span, bool* ok) { int shift = 0; uint64_t ret = 0; while (!span.empty()) { if (shift >= 64) { LOG(warn) << "Literal too large"; *ok = false; return 0; } ret |= (uint64_t)span.read_value(ok) << shift; shift += 8; } return ret; } static int64_t decode_section_ptr(DwarfSpan span, DWForm form, bool* ok) { switch (form) { case DW_FORM_data1: case DW_FORM_data2: case DW_FORM_data4: case DW_FORM_data8: case DW_FORM_sec_offset: { uint64_t ret = decode_unsigned_literal(span, ok); if (ret > INT64_MAX) { LOG(warn) << "section ptr out of range"; *ok = false; return 0; } return ret; } default: LOG(warn) << "Unknown section ptr form " << form; *ok = false; return 0; } } static uint64_t decode_unsigned(DwarfSpan span, DWForm form, bool* ok) { switch (form) { case DW_FORM_data1: case DW_FORM_data2: case DW_FORM_data4: case DW_FORM_data8: { return decode_unsigned_literal(span, ok); } case DW_FORM_udata: { return span.read_uleb(ok); } default: LOG(warn) << "Unknown unsigned form " << form; *ok = false; return 0; } } static const char* decode_string(const DwarfCompilationUnit& cu, DwarfSpan span, DWForm form, const DebugStrSpans& debug_strs, bool* ok) { switch (form) { case DW_FORM_strp: { uint64_t offset = decode_unsigned_literal(span, ok); if (!*ok) { return nullptr; } return debug_strs.debug_str.subspan(offset).read_null_terminated_string(ok); } case DW_FORM_strp_sup: case DW_FORM_GNU_strp_alt: { uint64_t offset = decode_unsigned_literal(span, ok); if (!*ok) { return nullptr; } return debug_strs.debug_str_sup.subspan(offset).read_null_terminated_string(ok); } case DW_FORM_line_strp: { uint64_t offset = decode_unsigned_literal(span, ok); if (!*ok) { return nullptr; } return debug_strs.debug_line_str.subspan(offset).read_null_terminated_string(ok); } case DW_FORM_strx: case DW_FORM_strx1: case DW_FORM_strx2: case DW_FORM_strx3: case DW_FORM_strx4: { uint64_t index = decode_unsigned_literal(span, ok) * cu.entry_size() + cu.str_offsets_base(); if (!*ok) { return nullptr; } uint64_t offset = cu.read_entry_sized_value(debug_strs.debug_str_offsets.subspan(index), ok); if (!*ok) { return nullptr; } return debug_strs.debug_str.subspan(offset).read_null_terminated_string(ok); } case DW_FORM_string: return span.read_null_terminated_string(ok); default: LOG(warn) << "Unknown string form " << form; *ok = false; return 0; } } int64_t DwarfDIE::section_ptr_attr(DWAttr attr, bool* ok) const { DWForm form; auto span = find_attribute(attr, &form, ok); if (span.empty() || !*ok) { return -1; } return decode_section_ptr(span, form, ok); } uint64_t DwarfDIE::unsigned_attr(DWAttr attr, bool* found, bool* ok) const { DWForm form; auto span = find_attribute(attr, &form, ok); if (span.empty() || !*ok) { *found = false; return 0; } *found = true; return decode_unsigned(span, form, ok); } const char* DwarfDIE::string_attr(const DwarfCompilationUnit& cu, DWAttr attr, const DebugStrSpans& debug_strs, bool* ok) const { DWForm form; auto span = find_attribute(attr, &form, ok); if (span.empty() || !*ok) { return nullptr; } return decode_string(cu, span, form, debug_strs, ok); } DwarfCompilationUnit DwarfCompilationUnit::next(DwarfSpan* debug_info, DwarfAbbrevs& abbrevs, bool* ok) { DwarfCompilationUnit ret; uint32_t word = DwarfSpan(*debug_info).read_value(ok); if (!*ok) { return ret; } if (word == 0xFFFFFFFF) { ret.init_size(debug_info, abbrevs, ok); } else { ret.init_size(debug_info, abbrevs, ok); } return ret; } template void DwarfCompilationUnit::init_size(DwarfSpan* debug_info, DwarfAbbrevs& abbrevs, bool* ok) { auto h = DwarfSpan(*debug_info).read>(ok); if (!*ok) { return; } if (2 <= h->version && h->version <= 4) { init>(debug_info, abbrevs, ok); } else if (h->version == 5) { auto hh = DwarfSpan(*debug_info).read>(ok); if (!*ok) { return; } if (hh->unit_type == DW_UT_skeleton || hh->unit_type == DW_UT_split_compile) { init>(debug_info, abbrevs, ok); } else { init>(debug_info, abbrevs, ok); } } else { LOG(warn) << "Unknown compilation unit version " << h->version; *ok = false; } } template void DwarfCompilationUnit::init(DwarfSpan* debug_info, DwarfAbbrevs& abbrevs, bool* ok) { DwarfSpan span(*debug_info); auto h = span.read(ok); if (!*ok) { return; } uint64_t length = h->preamble.unit_length; if (length >= UINT64_MAX - 12) { LOG(warn) << "Invalid CU length"; *ok = false; return; } debug_info->consume(length + sizeof(h->preamble)); DwarfAbbrevSet& abbrev_set = abbrevs.lookup(h->debug_abbrev_offset); die_ = make_unique(span, abbrev_set, sizeof(typename H::Size::Offset), h->address_size, ok); if (!*ok) { return; } if (die_->tag() != DW_TAG_compile_unit && die_->tag() != DW_TAG_partial_unit && die_->tag() != DW_TAG_skeleton_unit) { LOG(warn) << "CU DIE is not DW_TAG_compilation_unit/DW_TAG_partial_unit/DW_TAG_skeleton_unit!"; *ok = false; return; } entry_size_ = H::Size::EntrySize; h->install_dwo_id(this); } uint64_t DwarfCompilationUnit::read_entry_sized_value(DwarfSpan span, bool* ok) const { if (entry_size() == 4) { return span.read_value(ok); } else if (entry_size() == 8) { return span.read_value(ok); } else { LOG(warn) << "Unknown entry size " << entry_size(); *ok = false; return 0; } } DwarfLineNumberTable::DwarfLineNumberTable(const DwarfCompilationUnit& cu, DwarfSpan span, const DebugStrSpans& debug_str, bool* ok) { uint32_t word = DwarfSpan(span).read_value(ok); if (!*ok) { return; } if (word == 0xFFFFFFFF) { init_size(cu, span, debug_str, ok); } else { init_size(cu, span, debug_str, ok); } } template void DwarfLineNumberTable::init_size(const DwarfCompilationUnit& cu, DwarfSpan span, const DebugStrSpans& debug_str, bool* ok) { auto h = DwarfSpan(span).read>(ok); if (!*ok) { return; } if (2 <= h->version && h->version <= 3) { init>(cu, span, debug_str, ok); } else if (h->version == 4) { init>(cu, span, debug_str, ok); } else if (h->version == 5) { init>(cu, span, debug_str, ok); } else { LOG(warn) << "Unknown compilation unit version " << h->version; *ok = false; } } static bool read_dwarf2_directories(DwarfSpan span, std::vector& directories, std::vector& files) { bool ok = true; directories.push_back(nullptr); while (true) { const char* dir = span.read_null_terminated_string(&ok); if (!ok) { return ok; } if (!*dir) { break; } directories.push_back(dir); } files.push_back({ 0, nullptr }); while (true) { const char* file = span.read_null_terminated_string(&ok); if (!ok) { return ok; } if (!*file) { break; } uint64_t dir = span.read_uleb(&ok); if (dir >= directories.size()) { LOG(warn) << "Invalid directory index, bailing"; return false; } span.read_uleb(&ok); // timestamp span.read_uleb(&ok); // length if (!ok) { return ok; } files.push_back({ dir, file }); } return ok; } template bool Dwarf2LineNumberTableHeader::read_directories(const DwarfCompilationUnit&, DwarfSpan span, const DebugStrSpans&, std::vector& directories, std::vector& files) const { return read_dwarf2_directories(span, directories, files); } template bool Dwarf4LineNumberTableHeader::read_directories(const DwarfCompilationUnit&, DwarfSpan span, const DebugStrSpans&, std::vector& directories, std::vector& files) const { return read_dwarf2_directories(span, directories, files); } struct FileEntryFormat { DWLnct content_type; DWForm form; }; template bool Dwarf5LineNumberTableHeader::read_directories(const DwarfCompilationUnit& cu, DwarfSpan span, const DebugStrSpans& debug_str, std::vector& directories, std::vector& files) const { bool ok = true; uint64_t directory_entry_format_count = span.read_uleb(&ok); if (!ok) { return ok; } bool seen_lnct_path = false; std::vector directory_formats; for (uint64_t i = 0; i < directory_entry_format_count; ++i) { DWLnct content_type = (DWLnct)span.read_uleb(&ok); if (!ok) { return ok; } if (content_type == DW_LNCT_path) { if (seen_lnct_path) { LOG(warn) << "DW_LNCT_path appears twice in directories!"; return false; } seen_lnct_path = true; } DWForm form = (DWForm)span.read_uleb(&ok); if (!ok) { return ok; } directory_formats.push_back({ content_type, form }); } if (!seen_lnct_path) { LOG(warn) << "DW_LNCT_path does not appear in directories"; return false; } uint64_t directories_count = span.read_uleb(&ok); if (!ok) { return ok; } for (uint64_t i = 0; i < directories_count; ++i) { for (auto format: directory_formats) { switch (format.content_type) { case DW_LNCT_path: { size_t size = form_size(format.form, address_size, Size::EntrySize, &span, &ok); DwarfSpan a_span = span.consume(size); auto directory = decode_string(cu, a_span, format.form, debug_str, &ok); if (!ok) { return ok; } directories.push_back(directory); break; } default: LOG(warn) << "Unknown DW_LNCT " << format.content_type << " for directory"; return false; } } } uint64_t file_entry_format_count = span.read_uleb(&ok); if (!ok) { return ok; } seen_lnct_path = false; std::vector file_formats; for (uint64_t i = 0; i < file_entry_format_count; ++i) { DWLnct content_type = (DWLnct)span.read_uleb(&ok); if (!ok) { return ok; } if (content_type == DW_LNCT_path) { if (seen_lnct_path) { LOG(warn) << "DW_LNCT_path appears twice in files!"; return false; } seen_lnct_path = true; } DWForm form = (DWForm)span.read_uleb(&ok); if (!ok) { return ok; } file_formats.push_back({ content_type, form }); } if (!seen_lnct_path) { LOG(warn) << "DW_LNCT_path does not appear in files"; return false; } uint64_t files_count = span.read_uleb(&ok); if (!ok) { return ok; } for (uint64_t i = 0; i < files_count; ++i) { uint64_t directory_index = 0; const char* file_path = NULL; for (auto format: file_formats) { switch (format.content_type) { case DW_LNCT_path: { size_t size = form_size(format.form, address_size, Size::EntrySize, &span, &ok); DwarfSpan a_span = span.consume(size); file_path = decode_string(cu, a_span, format.form, debug_str, &ok); if (!ok) { return ok; } break; } case DW_LNCT_directory_index: { size_t size = form_size(format.form, address_size, Size::EntrySize, &span, &ok); DwarfSpan a_span = span.consume(size); directory_index = decode_unsigned(a_span, format.form, &ok); if (!ok) { return ok; } break; } case DW_LNCT_md5: { if (format.form != DW_FORM_data16) { LOG(warn) << "md5 has unexpected form " << format.form; return false; } size_t size = form_size(format.form, address_size, Size::EntrySize, &span, &ok); span.consume(size); break; } default: LOG(warn) << "Unknown DW_LNCT " << format.content_type << " for file"; return false; } } files.push_back({ directory_index, file_path }); } return true; } template void DwarfLineNumberTable::init(const DwarfCompilationUnit& cu, DwarfSpan span, const DebugStrSpans& debug_str, bool* ok) { auto h = span.read(ok); if (!*ok) { return; } for (uint8_t i = 1; i < h->opcode_base; ++i) { span.read_uleb(ok); } if (!*ok) { return; } *ok = h->read_directories(cu, span, debug_str, directories_, file_names_); } } // namespace rr rr-5.7.0/src/Dwarf.h000066400000000000000000000144641450675474200142000ustar00rootroot00000000000000/* -*- Mode: C++; tab-width: 8; c-basic-offset: 2; indent-tabs-mode: nil; -*- */ #ifndef RR_DWARF_H_ #define RR_DWARF_H_ #include #include #include #include #include "cpp_supplement.h" namespace rr { enum DWTag { DW_TAG_null = 0, DW_TAG_compile_unit = 0x11, DW_TAG_partial_unit = 0x3c, DW_TAG_skeleton_unit = 0x4a, }; enum DWAttr { DW_AT_name = 0x03, DW_AT_stmt_list = 0x10, DW_AT_comp_dir = 0x1b, DW_AT_str_offsets_base = 0x72, DW_AT_dwo_name = 0x76, DW_AT_GNU_dwo_name = 0x2130, DW_AT_GNU_dwo_id = 0x2131, }; enum DWChildren { DW_CHILDREN_no = 0x00, DW_CHILDREN_yes = 0x01 }; enum DWForm { DW_FORM_addr = 0x01, DW_FORM_block2 = 0x03, DW_FORM_block4 = 0x04, DW_FORM_data2 = 0x05, DW_FORM_data4 = 0x06, DW_FORM_data8 = 0x07, DW_FORM_string = 0x08, DW_FORM_data1 = 0x0b, DW_FORM_flag = 0x0c, DW_FORM_strp = 0x0e, DW_FORM_udata= 0x0f, DW_FORM_indirect = 0x16, DW_FORM_sec_offset = 0x17, DW_FORM_flag_present = 0x19, DW_FORM_strx = 0x1a, DW_FORM_addrx = 0x1b, DW_FORM_strp_sup = 0x1d, DW_FORM_data16 = 0x1e, DW_FORM_line_strp = 0x1f, DW_FORM_implicit_const = 0x21, DW_FORM_rnglistx = 0x23, DW_FORM_strx1 = 0x25, DW_FORM_strx2 = 0x26, DW_FORM_strx3 = 0x27, DW_FORM_strx4 = 0x28, DW_FORM_GNU_strp_alt = 0x1f21, }; enum DWLnct { DW_LNCT_path = 0x1, DW_LNCT_directory_index = 0x2, DW_LNCT_md5 = 0x5, }; enum DWUt { DW_UT_compile = 0x01, DW_UT_skeleton = 0x04, DW_UT_split_compile = 0x05, }; class DwarfSpan { public: DwarfSpan(const uint8_t* start, const uint8_t* end) : start(start), end(end) {} DwarfSpan(const DwarfSpan& other) = default; DwarfSpan& operator=(const DwarfSpan& other) = default; DwarfSpan() : start(nullptr), end(nullptr) {} size_t size() const { return end - start; } uint64_t read_uleb(bool* ok); DwarfSpan read_leb_ref(bool* ok); const char* read_null_terminated_string(bool* ok); template const T* read(bool *ok) { if (size() < sizeof(T)) { *ok = false; return nullptr; } auto ret = reinterpret_cast(start); start += sizeof(T); return ret; } template T read_value(bool *ok) { const T* r = read(ok); return r ? *r : T(); } bool empty() { return start == end; } DwarfSpan subspan(uint64_t offset, uint64_t sz = UINT64_MAX) const { DwarfSpan ret(*this); if (size() <= offset) { ret.start = end; return ret; } ret.start += offset; if (ret.size() <= sz) { return ret; } ret.end = ret.start + sz; return ret; } DwarfSpan consume(uint64_t sz) { DwarfSpan ret(*this); if (size() <= sz) { start = end; return ret; } ret.end = ret.start + sz; start = ret.end; return ret; } private: const uint8_t* start; const uint8_t* end; }; struct DebugStrSpans { DwarfSpan debug_str; DwarfSpan debug_str_sup; DwarfSpan debug_str_offsets; DwarfSpan debug_line_str; }; struct DwarfAbbrevAttribute { uint64_t name; DWForm form; DwarfSpan constant; // DWARF5 }; struct DwarfAbbrev { DwarfAbbrev() : tag(DW_TAG_null), children(DW_CHILDREN_no) {} std::vector attributes; DWTag tag; DWChildren children; }; class DwarfAbbrevSet { public: DwarfAbbrevSet(DwarfSpan span) : remaining_span(span) {} DwarfAbbrev* lookup(uint64_t code); private: std::unordered_map> abbrevs; DwarfSpan remaining_span; }; class DwarfAbbrevs { public: DwarfAbbrevs(DwarfSpan debug_abbrev) : debug_abbrev(debug_abbrev) {} DwarfAbbrevSet& lookup(uint64_t offset); private: DwarfSpan debug_abbrev; std::unordered_map> abbrevs; }; class DwarfCompilationUnit; class DwarfDIE { public: DwarfDIE(DwarfSpan span, DwarfAbbrevSet& abbrevs, uint8_t dwarf_size, uint8_t address_size, bool* ok); DWTag tag() const { return abbrev->tag; } // Returns empty span if not found DwarfSpan find_attribute(DWAttr attr, DWForm* form, bool* ok) const; // Returns -1 if no attr int64_t section_ptr_attr(DWAttr attr, bool* ok) const; // Sets *found to false if not found. uint64_t unsigned_attr(DWAttr attr, bool* found, bool* ok) const; // Returns nullptr if no attr const char* string_attr(const DwarfCompilationUnit& unit, DWAttr attr, const DebugStrSpans& debug_str, bool* ok) const; private: DwarfAbbrev* abbrev; DwarfSpan attr_span; uint8_t address_size; uint8_t dwarf_size; }; class DwarfCompilationUnit { public: // Consumes debug_info span and leaves rest behind static DwarfCompilationUnit next(DwarfSpan* debug_info, DwarfAbbrevs& abbrevs, bool* ok); const DwarfDIE& die() const { return *die_; } uint64_t dwo_id() const { return dwo_id_; } void set_dwo_id(uint64_t dwo_id) { dwo_id_ = dwo_id; } uint64_t str_offsets_base() const { return str_offsets_base_; } void set_str_offsets_base(uint64_t str_offsets_base) { str_offsets_base_ = str_offsets_base; } uint8_t entry_size() const { return entry_size_; } uint64_t read_entry_sized_value(DwarfSpan span, bool* ok) const; private: DwarfCompilationUnit() {} template void init_size(DwarfSpan* debug_info, DwarfAbbrevs& abbrevs, bool* ok); template void init(DwarfSpan* debug_info, DwarfAbbrevs& abbrevs, bool* ok); std::unique_ptr die_; uint64_t dwo_id_; uint64_t str_offsets_base_; uint8_t entry_size_; }; struct DwarfSourceFile { uint64_t directory_index; const char* file_name; }; class DwarfLineNumberTable { public: DwarfLineNumberTable(const DwarfCompilationUnit& cu, DwarfSpan span, const DebugStrSpans& debug_strs, bool* ok); // Null directory pointer means "compilation dir". The first entry is null. const std::vector& directories() const { return directories_; } // Null file name means "compilation unit name". The first entry is null. const std::vector& file_names() const { return file_names_; } private: template void init_size(const DwarfCompilationUnit& cu, DwarfSpan span, const DebugStrSpans& debug_strs, bool* ok); template void init(const DwarfCompilationUnit& cu, DwarfSpan span, const DebugStrSpans& debug_strs, bool* ok); std::vector directories_; std::vector file_names_; }; } // namespace rr #endif /* RR_DWARF_H_ */ rr-5.7.0/src/ElfReader.cc000066400000000000000000000460751450675474200151270ustar00rootroot00000000000000/* -*- Mode: C++; tab-width: 8; c-basic-offset: 2; indent-tabs-mode: nil; -*- */ #include "ElfReader.h" #include #include #include #include #include #include "log.h" #include "util.h" using namespace std; namespace rr { class ElfReaderImplBase { public: ElfReaderImplBase(ElfReader& r) : r(r), ok_(false) {} virtual ~ElfReaderImplBase() {} virtual SymbolTable read_symbols(const char* symtab, const char* strtab) = 0; virtual DynamicSection read_dynamic() = 0; virtual Debuglink read_debuglink() = 0; virtual Debugaltlink read_debugaltlink() = 0; virtual string read_buildid() = 0; virtual string read_interp() = 0; virtual bool addr_to_offset(uintptr_t addr, uintptr_t& offset) = 0; virtual SectionOffsets find_section_file_offsets(const char* name) = 0; virtual const vector* decompress_section(SectionOffsets offsets) = 0; bool ok() { return ok_; } protected: ElfReader& r; vector>> decompressed_sections; bool ok_; }; template class ElfReaderImpl : public ElfReaderImplBase { public: ElfReaderImpl(ElfReader& r); virtual SymbolTable read_symbols(const char* symtab, const char* strtab) override; virtual DynamicSection read_dynamic() override; virtual Debuglink read_debuglink() override; virtual Debugaltlink read_debugaltlink() override; virtual string read_buildid() override; virtual string read_interp() override; virtual bool addr_to_offset(uintptr_t addr, uintptr_t& offset) override; virtual SectionOffsets find_section_file_offsets(const char* name) override; virtual const vector* decompress_section(SectionOffsets offsets) override; private: const typename Arch::ElfShdr* find_section(const char* n); const typename Arch::ElfPhdr* find_programheader(uint32_t pt); const typename Arch::ElfEhdr* elfheader; const typename Arch::ElfPhdr* programheader; const typename Arch::ElfShdr* sections; size_t programheader_size; size_t sections_size; vector section_names; }; template unique_ptr elf_reader_impl_arch(ElfReader& r) { return unique_ptr(new ElfReaderImpl(r)); } unique_ptr elf_reader_impl(ElfReader& r, SupportedArch arch) { RR_ARCH_FUNCTION(elf_reader_impl_arch, arch, r); } template ElfReaderImpl::ElfReaderImpl(ElfReader& r) : ElfReaderImplBase(r) { elfheader = r.read(0); if (!elfheader || memcmp(elfheader, ELFMAG, SELFMAG) != 0 || elfheader->e_ident[EI_CLASS] != Arch::elfclass || elfheader->e_ident[EI_DATA] != Arch::elfendian || elfheader->e_machine != Arch::elfmachine || elfheader->e_shentsize != sizeof(typename Arch::ElfShdr) || elfheader->e_phentsize != sizeof(typename Arch::ElfPhdr) || elfheader->e_shstrndx >= elfheader->e_shnum) { LOG(debug) << "Invalid ELF file: invalid header"; return; } programheader = r.read(elfheader->e_phoff, elfheader->e_phnum); if (!programheader || !elfheader->e_phnum) { LOG(debug) << "Invalid ELF file: no program headers"; return; } programheader_size = elfheader->e_phnum; sections = r.read(elfheader->e_shoff, elfheader->e_shnum); if (!sections || !elfheader->e_shnum) { LOG(debug) << "Invalid ELF file: no sections"; return; } sections_size = elfheader->e_shnum; auto& section_names_section = sections[elfheader->e_shstrndx]; const char* section_names_ptr = r.read(section_names_section.sh_offset, section_names_section.sh_size); if (!section_names_ptr || !section_names_section.sh_size) { LOG(debug) << "Invalid ELF file: can't read section names"; return; } // Ensure final 0 section_names.resize(section_names_section.sh_size); memcpy(section_names.data(), section_names_ptr, section_names.size()); section_names[section_names.size() - 1] = 0; ok_ = true; } template const typename Arch::ElfPhdr* ElfReaderImpl::find_programheader(uint32_t pt) { const typename Arch::ElfPhdr* ph = nullptr; for (size_t i = 0; i < programheader_size; ++i) { auto& p = programheader[i]; if (p.p_type == pt) { ph = &p; } } if (!ph) { LOG(debug) << "Missing program header " << pt; } return ph; } template const typename Arch::ElfShdr* ElfReaderImpl::find_section(const char* n) { const typename Arch::ElfShdr* section = nullptr; for (size_t i = 0; i < sections_size; ++i) { auto& s = sections[i]; if (s.sh_name >= section_names.size()) { LOG(debug) << "Invalid ELF file: invalid name offset for section " << i; continue; } const char* name = section_names.data() + s.sh_name; if (strcmp(name, n) == 0) { if (section) { LOG(debug) << "Invalid ELF file: duplicate symbol section " << n; return nullptr; } section = &s; } } if (!section) { LOG(debug) << "Missing section " << n; } return section; } template SectionOffsets ElfReaderImpl::find_section_file_offsets( const char* name) { SectionOffsets offsets = { 0, 0, false }; const typename Arch::ElfShdr* section = find_section(name); if (!section) { return offsets; } offsets.start = section->sh_offset; offsets.end = section->sh_offset + section->sh_size; offsets.compressed = !!(section->sh_flags & SHF_COMPRESSED); return offsets; } template const vector* ElfReaderImpl::decompress_section(SectionOffsets offsets) { DEBUG_ASSERT(offsets.compressed); auto hdr = r.read(offsets.start); if (!hdr) { LOG(warn) << "section at " << offsets.start << " is marked compressed but is too small"; return nullptr; } size_t decompressed_size = 0; if (hdr->ch_type == ELFCOMPRESS_ZLIB) { decompressed_size = hdr->ch_size; offsets.start += sizeof(typename Arch::ElfChdr); } else { auto legacy_hdr = r.read_bytes(offsets.start, 4); if (!memcmp("ZLIB", legacy_hdr, 4)) { auto be_size = r.read(offsets.start + 4); decompressed_size = be64toh(*be_size); offsets.start += 12; } else { LOG(warn) << "section at " << offsets.start << " is marked compressed but uses unrecognized" << " type " << HEX(hdr->ch_type); return nullptr; } } unique_ptr> v(new vector()); v->resize(decompressed_size); z_stream stream; memset(&stream, 0, sizeof(stream)); int result = inflateInit(&stream); if (result != Z_OK) { FATAL() << "inflateInit failed!"; return nullptr; } stream.avail_in = offsets.end - offsets.start; stream.next_in = (unsigned char*)r.read_bytes(offsets.start, stream.avail_in); stream.next_out = &v->front(); stream.avail_out = v->size(); result = inflate(&stream, Z_FINISH); if (result != Z_STREAM_END) { FATAL() << "inflate failed!"; return nullptr; } result = inflateEnd(&stream); if (result != Z_OK) { FATAL() << "inflateEnd failed!"; return nullptr; } decompressed_sections.push_back(std::move(v)); return decompressed_sections.back().get(); } template SymbolTable ElfReaderImpl::read_symbols(const char* symtab, const char* strtab) { SymbolTable result; if (!ok()) { return result; } const typename Arch::ElfShdr* symbols = find_section(symtab); if (!symbols) { return result; } const typename Arch::ElfShdr* strings = find_section(strtab); if (!strings) { return result; } if (symbols->sh_entsize != sizeof(typename Arch::ElfSym)) { LOG(debug) << "Invalid ELF file: incorrect symbol size " << symbols->sh_entsize; return result; } if (symbols->sh_size % symbols->sh_entsize) { LOG(debug) << "Invalid ELF file: incorrect symbol section size " << symbols->sh_size; return result; } if (strings->sh_size == 0) { LOG(debug) << "Invalid ELF file: empty string table"; return result; } size_t symbol_list_size = symbols->sh_size / symbols->sh_entsize; auto symbol_list = r.read( symbols->sh_offset, symbol_list_size); if (!symbol_list) { LOG(debug) << "Invalid ELF file: can't read symbols " << symtab; return result; } auto strtab_ptr = r.read(strings->sh_offset, strings->sh_size); if (!strtab_ptr) { LOG(debug) << "Invalid ELF file: can't read strings " << strtab; return result; } result.strtab.resize(strings->sh_size); memcpy(result.strtab.data(), strtab_ptr, result.strtab.size()); result.strtab[result.strtab.size() - 1] = 0; result.symbols.resize(symbol_list_size); for (size_t i = 0; i < symbol_list_size; ++i) { auto& s = symbol_list[i]; if (s.st_shndx >= sections_size) { // Don't leave this entry uninitialized result.symbols[i] = SymbolTable::Symbol(0, 0); continue; } result.symbols[i] = SymbolTable::Symbol(s.st_value, s.st_name); } return result; } template DynamicSection ElfReaderImpl::read_dynamic() { DynamicSection result; if (!ok()) { return result; } const typename Arch::ElfShdr* dynamic = find_section(".dynamic"); if (!dynamic) { return result; } const typename Arch::ElfShdr* dynstr = find_section(".dynstr"); if (!dynstr) { return result; } if (dynamic->sh_entsize != sizeof(typename Arch::ElfDyn)) { LOG(debug) << "Invalid ELF file: incorrect .dynamic size " << dynamic->sh_entsize; return result; } if (!dynamic->sh_size) { return result; } if (dynamic->sh_size % dynamic->sh_entsize) { LOG(debug) << "Invalid ELF file: incorrect .dynamic section size " << dynamic->sh_size; return result; } if (dynstr->sh_size == 0) { LOG(debug) << "Invalid ELF file: empty string table"; return result; } size_t dyn_list_size = dynamic->sh_size / dynamic->sh_entsize; auto dyn_list = r.read( dynamic->sh_offset, dyn_list_size); if (!dyn_list) { LOG(debug) << "Invalid ELF file: can't read .dynamic"; return result; } auto strtab = r.read(dynstr->sh_offset, dynstr->sh_size); if (!strtab) { LOG(debug) << "Invalid ELF file: can't read .dynstr"; return result; } result.strtab.resize(dynstr->sh_size); memcpy(result.strtab.data(), strtab, result.strtab.size()); result.strtab[result.strtab.size() - 1] = 0; result.entries.resize(dyn_list_size); for (size_t i = 0; i < dyn_list_size; ++i) { auto& s = dyn_list[i]; result.entries[i] = DynamicSection::Entry(s.d_tag, s.d_val); } return result; } static bool null_terminated(const char* p, size_t size, string& out) { size_t len = strnlen(p, size); if (len == size) { LOG(warn) << "Invalid file name"; return false; } out = string(p, len); return true; } template Debuglink ElfReaderImpl::read_debuglink() { Debuglink result; if (!ok()) { return result; } const typename Arch::ElfShdr* debuglink = find_section(".gnu_debuglink"); if (!debuglink) { return result; } if (debuglink->sh_size < 8) { LOG(warn) << "Invalid ELF file: unexpected .gnu_debuglink length"; return result; } size_t crc_offset = debuglink->sh_size - 4; if (!r.read_into(debuglink->sh_offset + crc_offset, &result.crc)) { LOG(warn) << "Invalid ELF file: can't read .gnu_debuglink crc checksum"; return result; } const char* file_name = r.read(debuglink->sh_offset, crc_offset); if (!file_name) { LOG(warn) << "Invalid ELF file: can't read .gnu_debuglink file_name"; return result; } null_terminated(file_name, crc_offset, result.file_name); return result; } template Debugaltlink ElfReaderImpl::read_debugaltlink() { Debugaltlink result; if (!ok()) { return result; } const typename Arch::ElfShdr* debuglink = find_section(".gnu_debugaltlink"); if (!debuglink) { return result; } // Last 20 bytes are the build ID of the target file. Ignore for now. if (debuglink->sh_size < 21) { LOG(warn) << "Invalid ELF file: unexpected .gnu_debugaltlink length"; return result; } size_t build_id_offset = debuglink->sh_size - 20; const char* file_name = r.read(debuglink->sh_offset, build_id_offset); if (!file_name) { LOG(warn) << "Invalid ELF file: can't read .gnu_debugaltlink file_name"; return result; } null_terminated(file_name, build_id_offset, result.file_name); return result; } template string ElfReaderImpl::read_buildid() { string result; if (!ok()) { return result; } for (size_t i = 0; i < sections_size; ++i) { auto& s = sections[i]; if (s.sh_type != SHT_NOTE) { continue; } auto offset = s.sh_offset; auto nhdr = r.read(offset); if (!nhdr) { LOG(error) << "Failed to read ELF note"; return result; } offset += sizeof(*nhdr); char name[4] = { 0 }; if (!(nhdr->n_namesz == 4 && r.read_into(offset, &name) && memcmp("GNU", name, 4) == 0 && nhdr->n_descsz > 0)) { continue; } // Note members are 4 byte aligned, twiddle bits to round up if necessary. offset += (nhdr->n_namesz + 3) & ~0x3; if (nhdr->n_type != NT_GNU_BUILD_ID) { continue; } const uint8_t* id = r.read(offset, nhdr->n_descsz); if (!id) { LOG(error) << "Failed to read ELF note contents"; return result; } result.reserve(nhdr->n_descsz); for (unsigned i = 0; i < nhdr->n_descsz; ++i) { char byte[3] = { 0 }; snprintf(&byte[0], 3, "%02x", id[i]); result.append(byte); } break; } return result; } template string ElfReaderImpl::read_interp() { string result; if (!ok()) { return result; } const typename Arch::ElfPhdr* ph = find_programheader(PT_INTERP); if (!ph) { return result; } const char* file_name = r.read(ph->p_offset, ph->p_filesz); if (!file_name) { LOG(warn) << "Invalid ELF file: can't read PT_INTERP"; return result; } null_terminated(file_name, ph->p_filesz, result); return result; } template bool ElfReaderImpl::addr_to_offset(uintptr_t addr, uintptr_t& offset) { for (size_t i = 0; i < sections_size; ++i) { const auto& section = sections[i]; // Skip the section if it either "occupies no space in the file" or // doesn't have a valid address because it does not "occupy memory // during process execution". if (section.sh_type == SHT_NOBITS || !(section.sh_flags & SHF_ALLOC)) { continue; } if (addr >= section.sh_addr && addr - section.sh_addr < section.sh_size) { offset = addr - section.sh_addr + section.sh_offset; return true; } } return false; } ElfReader::ElfReader(SupportedArch arch) : arch_(arch), map(nullptr), size(0) {} ElfReader::~ElfReader() {} ElfReaderImplBase& ElfReader::impl() { if (!impl_) { impl_ = elf_reader_impl(*this, arch_); } return *impl_; } SymbolTable ElfReader::read_symbols(const char* symtab, const char* strtab) { return impl().read_symbols(symtab, strtab); } DynamicSection ElfReader::read_dynamic() { return impl().read_dynamic(); } Debuglink ElfReader::read_debuglink() { return impl().read_debuglink(); } Debugaltlink ElfReader::read_debugaltlink() { return impl().read_debugaltlink(); } SectionOffsets ElfReader::find_section_file_offsets(const char* name) { return impl().find_section_file_offsets(name); } DwarfSpan ElfReader::dwarf_section(const char* name, bool known_to_be_compressed) { SectionOffsets offsets = impl().find_section_file_offsets(name); offsets.compressed |= known_to_be_compressed; if (offsets.start && offsets.compressed) { auto decompressed = impl().decompress_section(offsets); return DwarfSpan(&decompressed->front(), &decompressed->back()); } return DwarfSpan(map + offsets.start, map + offsets.end); } string ElfReader::read_buildid() { return impl().read_buildid(); } string ElfReader::read_interp() { return impl().read_interp(); } bool ElfReader::addr_to_offset(uintptr_t addr, uintptr_t& offset) { return impl().addr_to_offset(addr, offset); } bool ElfReader::ok() { return impl().ok(); } ElfFileReader::ElfFileReader(ScopedFd& fd, SupportedArch arch) : ElfReader(arch) { struct stat st; if (fstat(fd, &st) < 0) { FATAL() << "Can't stat fd"; } if (st.st_size > 0) { map = static_cast(mmap(nullptr, st.st_size, PROT_READ, MAP_PRIVATE, fd, 0)); if (map == MAP_FAILED) { FATAL() << "Can't map fd"; } } size = st.st_size; } ElfFileReader::~ElfFileReader() { if (map) { munmap(map, size); } } ScopedFd ElfFileReader::open_debug_file(const std::string& elf_file_name) { if (elf_file_name.empty() || elf_file_name[0] != '/') { return ScopedFd(); } Debuglink debuglink = read_debuglink(); if (debuglink.file_name.empty()) { return ScopedFd(); } size_t last_slash = elf_file_name.find_last_of('/'); string debug_path = "/usr/lib/debug/"; debug_path += elf_file_name.substr(0, last_slash) + '/' + debuglink.file_name; ScopedFd debug_fd(debug_path.c_str(), O_RDONLY); if (!debug_fd.is_open()) { return ScopedFd(); } // Verify that the CRC checksum matches, in case the debuginfo and text file // are in separate packages that are out of sync. uint32_t crc = 0xffffffff; while (true) { unsigned char buf[4096]; ssize_t ret = ::read(debug_fd.get(), buf, sizeof(buf)); if (ret < 0) { if (errno != EINTR) { LOG(debug) << "Error reading " << debug_path; return ScopedFd(); } } else if (ret == 0) { break; } else { crc = crc32(crc, buf, ret); } } if ((crc ^ 0xffffffff) == debuglink.crc) { return debug_fd; } return ScopedFd(); } SupportedArch ElfFileReader::identify_arch(ScopedFd& fd) { /** * This code is quite lax. That's OK because this is only used to create * a specific ElfReaderImpl, which does much more thorough checking of the * header. */ static const int header_prefix_size = 20; char buf[header_prefix_size]; ssize_t ret = read_to_end(fd, 0, buf, sizeof(buf)); if (ret != (ssize_t)sizeof(buf) || buf[5] != 1) { return NativeArch::arch(); } switch (buf[18] | (buf[19] << 8)) { case 0x03: return x86; case 0x3e: return x86_64; default: return NativeArch::arch(); } } bool ElfFileReader::is_x32_abi(__attribute__((unused)) ScopedFd& fd) { #if defined(__x86_64__) static const int header_prefix_size = 20; char buf[header_prefix_size]; ssize_t ret = read_to_end(fd, 0, buf, sizeof(buf)); if (ret != (ssize_t)sizeof(buf) || buf[5] != 1) { // Who knows what this is. return false; } if ((buf[18] | (buf[19] << 8)) == 0x3e) { // x32 ABI programs declare themselves with the amd64 architecture but // only 4 byte wide pointers. return buf[4] == 1; } #endif return false; } } // namespace rr rr-5.7.0/src/ElfReader.h000066400000000000000000000067371450675474200147720ustar00rootroot00000000000000/* -*- Mode: C++; tab-width: 8; c-basic-offset: 2; indent-tabs-mode: nil; -*- */ #ifndef RR_ELF_READER_H_ #define RR_ELF_READER_H_ #include #include #include #include "Dwarf.h" #include "ScopedFd.h" #include "kernel_abi.h" namespace rr { class ElfReaderImplBase; class SymbolTable { public: const char* name(size_t i) const { size_t offset = symbols[i].name_index; return offset < strtab.size() ? &strtab[offset] : nullptr; } bool is_name(size_t i, const char* name) const { size_t offset = symbols[i].name_index; return offset < strtab.size() && strcmp(&strtab[offset], name) == 0; } uintptr_t addr(size_t i) const { return symbols[i].addr; } size_t size() const { return symbols.size(); } struct Symbol { Symbol(uintptr_t addr, size_t name_index) : addr(addr), name_index(name_index) {} Symbol() {} uintptr_t addr; size_t name_index; }; std::vector symbols; // Last character is always null map = static_cast(fd); std::vector strtab; }; class DynamicSection { public: struct Entry { public: Entry(uint64_t tag, uint64_t val) : tag(tag), val(val) {} Entry() {} uint64_t tag; uint64_t val; }; std::vector entries; // Last character is always null std::vector strtab; }; class Debuglink { public: std::string file_name; uint32_t crc; }; class Debugaltlink { public: std::string file_name; }; struct SectionOffsets { uint64_t start; uint64_t end; bool compressed; }; class ElfReader { public: ElfReader(SupportedArch arch); virtual ~ElfReader(); const void* read_bytes(size_t offset, size_t size) { if (offset + size > this->size) { return nullptr; } return map + offset; } template const T* read(size_t offset, size_t count = 1) { return static_cast(read_bytes(offset, sizeof(T)*count)); } template bool read_into(size_t offset, T* out) { auto r = read(offset); if (!r) { return false; } memcpy(out, r, sizeof(*out)); return true; } bool ok(); SymbolTable read_symbols(const char* symtab, const char* strtab); DynamicSection read_dynamic(); Debuglink read_debuglink(); Debugaltlink read_debugaltlink(); std::string read_buildid(); std::string read_interp(); // Returns true and sets file |offset| if ELF address |addr| is mapped from // a section in the ELF file. Returns false if no section maps to // |addr|. |addr| is an address indicated by the ELF file, not its // relocated address in memory. bool addr_to_offset(uintptr_t addr, uintptr_t& offset); SectionOffsets find_section_file_offsets(const char* name); DwarfSpan dwarf_section(const char* name, bool known_to_be_compressed = false); SupportedArch arch() const { return arch_; } private: ElfReaderImplBase& impl(); std::unique_ptr impl_; SupportedArch arch_; protected: uint8_t* map; size_t size; }; class ElfFileReader : public ElfReader { public: ElfFileReader(ScopedFd& fd, SupportedArch arch); ElfFileReader(ScopedFd& fd) : ElfFileReader(fd, identify_arch(fd)) {} ~ElfFileReader(); // Finds and opens the debug file corresponding to this reader. // |elf_file_name| is the name of the file already opened by this reader. ScopedFd open_debug_file(const std::string& elf_file_name); static SupportedArch identify_arch(ScopedFd& fd); static bool is_x32_abi(ScopedFd& fd); }; } // namespace rr #endif /* RR_ELF_READER_H_ */ rr-5.7.0/src/EmuFs.cc000066400000000000000000000137371450675474200143140ustar00rootroot00000000000000/* -*- Mode: C++; tab-width: 8; c-basic-offset: 2; indent-tabs-mode: nil; -*- */ #include "EmuFs.h" #include #include #include #include #include #include #include "AddressSpace.h" #include "ReplaySession.h" #include "core.h" #include "kernel_abi.h" #include "kernel_metadata.h" #include "log.h" using namespace std; namespace rr { EmuFile::~EmuFile() { LOG(debug) << " EmuFs::~File(einode:" << inode_ << ")"; owner.destroyed_file(*this); } EmuFile::shr_ptr EmuFile::clone(EmuFs& owner) { auto f = EmuFile::create(owner, orig_path.c_str(), device(), inode(), size_); // We could try using FICLONE but tmpfs doesn't support that yet so let's just // not bother for now. // Avoid copying holes. vector buf; uint64_t offset = 0; while (offset < size_) { ssize_t ret = lseek(fd(), offset, SEEK_HOLE); if (ret < 0) { ret = size_; } else { if (uint64_t(ret) < offset) { FATAL() << "lseek returned hole before requested offset"; } } uint64_t hole = ret; // Copy data while (offset < hole) { loff_t off_in = offset; loff_t off_out = offset; ssize_t ncopied = syscall(NativeArch::copy_file_range, file.get(), &off_in, f->fd().get(), &off_out, hole - offset, 0); if (ncopied >= 0) { if (ncopied == 0) { FATAL() << "Didn't copy anything"; } offset += ncopied; continue; } ssize_t amount = min(hole - offset, 4*1024*1024); buf.resize(amount); ret = pread64(fd(), buf.data(), amount, offset); if (ret <= 0) { FATAL() << "Couldn't read all the data"; } ssize_t written = pwrite_all_fallible(f->fd(), buf.data(), ret, offset); if (written < ret) { FATAL() << "Couldn't write all the data"; } offset += written; } if (offset < size_) { // Look for the end of the hole, if any ret = lseek(fd(), offset, SEEK_DATA); if (ret < 0) { if (errno != ENXIO) { FATAL() << "Couldn't find data"; } break; } if (uint64_t(ret) <= offset) { FATAL() << "Zero sized hole?"; } // Skip the hole offset = ret; } } return f; } string EmuFile::proc_path() const { stringstream ss; ss << "/proc/" << getpid() << "/fd/" << fd().get(); return ss.str(); } void EmuFile::update(dev_t device, ino_t inode, uint64_t size) { DEBUG_ASSERT(device_ == device && inode_ == inode); ensure_size(size); } void EmuFile::ensure_size(uint64_t size) { if (size_ < size) { resize_shmem_segment(file, size); size_ = size; } } std::string make_temp_name(const string& orig_path, dev_t orig_device, ino_t orig_inode) { stringstream name; name << "rr-emufs-" << getpid() << "-dev-" << orig_device << "-inode-" << orig_inode << "-" << orig_path; // The linux man page for memfd_create says the length limit for the name // argument is 249 bytes, evidently because it prepends "memfd:" to the // parameter before using it. return name.str().substr(0, 249); } /*static*/ EmuFile::shr_ptr EmuFile::create(EmuFs& owner, const string& orig_path, dev_t orig_device, ino_t orig_inode, uint64_t orig_file_size) { string real_name = make_temp_name(orig_path, orig_device, orig_inode); ScopedFd fd(open_memory_file(real_name)); if (!fd.is_open()) { FATAL() << "Failed to create shmem segment for " << real_name; } resize_shmem_segment(fd, orig_file_size); shr_ptr f(new EmuFile(owner, std::move(fd), orig_path, real_name, orig_device, orig_inode, orig_file_size)); LOG(debug) << "created emulated file for " << orig_path << " as " << real_name; return f; } EmuFile::EmuFile(EmuFs& owner, ScopedFd&& fd, const string& orig_path, const string& real_path, dev_t orig_device, ino_t orig_inode, uint64_t orig_file_size) : orig_path(orig_path), tmp_path(real_path), file(std::move(fd)), owner(owner), size_(orig_file_size), device_(orig_device), inode_(orig_inode) {} EmuFile::shr_ptr EmuFs::at(const KernelMapping& recorded_map) const { return files.at(FileId(recorded_map)).lock(); } bool EmuFs::has_file_for(const KernelMapping& recorded_map) const { return files.find(FileId(recorded_map)) != files.end(); } EmuFile::shr_ptr EmuFs::clone_file(EmuFile::shr_ptr file) { DEBUG_ASSERT(file); auto c = file->clone(*this); files[FileId(*file)] = c; return c; } EmuFile::shr_ptr EmuFs::get_or_create(const KernelMapping& recorded_km) { FileId id(recorded_km); auto it = files.find(id); uint64_t min_file_size = recorded_km.file_offset_bytes() + recorded_km.size(); if (it != files.end()) { it->second.lock()->update(recorded_km.device(), recorded_km.inode(), min_file_size); return it->second.lock(); } auto vf = EmuFile::create(*this, recorded_km.fsname(), recorded_km.device(), recorded_km.inode(), min_file_size); files[id] = vf; return vf; } EmuFile::shr_ptr EmuFs::find(dev_t device, ino_t inode) { FileId id(device, inode); auto it = files.find(id); if (it == files.end()) { return EmuFile::shr_ptr(); } return it->second.lock(); } void EmuFs::log() const { LOG(error) << "EmuFs " << this << " with " << files.size() << " files:"; for (auto& kv : files) { auto file = kv.second.lock(); LOG(error) << " " << file->emu_path(); } } /*static*/ EmuFs::shr_ptr EmuFs::create() { return shr_ptr(new EmuFs()); } EmuFs::EmuFs() {} FileId::FileId(const KernelMapping& recorded_map) : device(recorded_map.device()), inode(recorded_map.inode()) {} FileId::FileId(const EmuFile& emu_file) : device(emu_file.device()), inode(emu_file.inode()) {} } // namespace rr rr-5.7.0/src/EmuFs.h000066400000000000000000000132601450675474200141450ustar00rootroot00000000000000/* -*- Mode: C++; tab-width: 8; c-basic-offset: 2; indent-tabs-mode: nil; -*- */ #ifndef RR_EMUFS_H_ #define RR_EMUFS_H_ #include #include #include #include #include "ScopedFd.h" namespace rr { class AddressSpace; class EmuFs; class KernelMapping; class ReplaySession; class Session; class Task; /** * Implement an "emulated file system" consisting of files that were * mmap'd shared during recording. These files require special * treatment because (i) they were most likely modified during * recording, so (ii) the original file contents only exist as * snapshots in the trace, but (iii) all mappings of the file must * point at the same underlying resource, so that modifications are * seen by all mappees. * * The rr EmuFs creates "emulated files" in shared memory during * replay. Each efile is uniquely identified at a given event in the * trace by |(edev, einode)| (i.e., the recorded device ID and inode). * "What about inode recycling", you're probably thinking to yourself. * This scheme can cope with inode recycling, given a very important * assumption discussed below. * * Why is inode recycling not a problem? Assume that an mmap'd file * F_0 at trace time t_0 has the same (device, inode) ID as a * different file F_1 at trace time t_1. By definition, if the inode * ID was recycled in [t_0, t_1), then all references to F_0 must have * been dropped in that interval. A corollary of that is that all * memory mappings of F_0 must have been fully unmapped in the * interval. As per the first long comment in |gc()| below, an * emulated file can only be "live" during replay if some tracee still * has a mapping of it. Tracees' mappings of emulated files is a * subset of the ways they can create references to real files during * recording. Therefore the event during replay that drops the last * reference to the emulated F_0 must be a tracee unmapping of F_0. * * So as long as we GC emulated F_0 at the event of its fatal * unmapping, the lifetimes of emulated F_0 and emulated F_1 must be * disjoint. And F_0 being GC'd at that point is the important * assumption mentioned above. */ class EmuFile; struct FileId { FileId(const KernelMapping& recorded_map); FileId(const EmuFile& emu_file); FileId(dev_t device, ino_t inode) : device(device), inode(inode) {} bool operator==(const FileId& other) const { return (device == other.device && inode == other.inode); } bool operator<(const FileId& other) const { return device < other.device || (device == other.device && inode < other.inode); } dev_t device; ino_t inode; }; /** * A file within an EmuFs. The file is real, but it's mapped to file * ID that was recorded during replay. */ class EmuFile { public: typedef std::shared_ptr shr_ptr; ~EmuFile(); /** * Return the fd of the real file backing this. */ const ScopedFd& fd() const { return file; } /** * Return a pathname referring to the fd of this in this * tracer's address space. For example, "/proc/12345/fd/5". */ std::string proc_path() const; /** * Return the path of the original file from recording, the * one this is emulating. */ const std::string emu_path() const { return orig_path; } const std::string real_path() const { return tmp_path; } dev_t device() const { return device_; } ino_t inode() const { return inode_; } void ensure_size(uint64_t size); private: friend class EmuFs; EmuFile(EmuFs& owner, ScopedFd&& fd, const std::string& orig_path, const std::string& real_path, dev_t device, ino_t inode, uint64_t file_size); /** * Return a copy of this file. See |create()| for the meaning * of |fs_tag|. */ shr_ptr clone(EmuFs& owner); /** * Ensure that the emulated file is sized to match a later * stat() of it. */ void update(dev_t device, ino_t inode, uint64_t size); /** * Create a new emulated file for |orig_path| that will * emulate the recorded attributes |est|. |tag| is used to * uniquely identify this file among multiple EmuFs's that * might exist concurrently in this tracer process. */ static shr_ptr create(EmuFs& owner, const std::string& orig_path, dev_t orig_device, ino_t orig_inode, uint64_t orig_file_size); std::string orig_path; std::string tmp_path; ScopedFd file; EmuFs& owner; uint64_t size_; dev_t device_; ino_t inode_; EmuFile(const EmuFile&) = delete; EmuFile operator=(const EmuFile&) = delete; }; class EmuFs { public: typedef std::shared_ptr shr_ptr; /** * Return the EmuFile for |recorded_map|, which must exist or this won't * return. */ EmuFile::shr_ptr at(const KernelMapping& recorded_map) const; bool has_file_for(const KernelMapping& recorded_map) const; EmuFile::shr_ptr clone_file(EmuFile::shr_ptr emu_file); /** * Return an emulated file representing the recorded shared mapping * |recorded_km|. */ EmuFile::shr_ptr get_or_create(const KernelMapping& recorded_km); /** * Return an already-existing emulated file for the given device/inode. * Returns null if not found. */ EmuFile::shr_ptr find(dev_t device, ino_t inode); /** * Dump information about this emufs to the "error" log. */ void log() const; size_t size() const { return files.size(); } /** Create and return a new emufs. */ static shr_ptr create(); void destroyed_file(EmuFile& emu_file) { files.erase(FileId(emu_file)); } private: EmuFs(); typedef std::map> FileMap; FileMap files; EmuFs(const EmuFs&) = delete; EmuFs& operator=(const EmuFs&) = delete; }; } // namespace rr #endif // RR_EMUFS_H rr-5.7.0/src/Event.cc000066400000000000000000000136521450675474200143520ustar00rootroot00000000000000/* -*- Mode: C++; tab-width: 8; c-basic-offset: 2; indent-tabs-mode: nil; -*- */ #include "Event.h" #include #include #include #include "kernel_abi.h" #include "kernel_metadata.h" #include "log.h" #include "util.h" using namespace std; namespace rr { Event::Event(const Event& o) : event_type(o.event_type) { switch (event_type) { case EV_DESCHED: new (&Desched()) DeschedEvent(o.Desched()); return; case EV_PATCH_SYSCALL: new (&PatchSyscall()) PatchSyscallEvent(o.PatchSyscall()); return; case EV_SCHED: new (&Sched()) SchedEvent(o.Sched()); return; case EV_SIGNAL: case EV_SIGNAL_DELIVERY: case EV_SIGNAL_HANDLER: new (&Signal()) SignalEvent(o.Signal()); return; case EV_SYSCALL: case EV_SYSCALL_INTERRUPTION: new (&Syscall()) SyscallEvent(o.Syscall()); return; case EV_SYSCALLBUF_FLUSH: new (&SyscallbufFlush()) SyscallbufFlushEvent(o.SyscallbufFlush()); return; default: return; } } Event::~Event() { switch (event_type) { case EV_DESCHED: Desched().~DeschedEvent(); return; case EV_PATCH_SYSCALL: PatchSyscall().~PatchSyscallEvent(); return; case EV_SIGNAL: case EV_SIGNAL_DELIVERY: case EV_SIGNAL_HANDLER: Signal().~SignalEvent(); return; case EV_SYSCALL: case EV_SYSCALL_INTERRUPTION: Syscall().~SyscallEvent(); return; case EV_SYSCALLBUF_FLUSH: SyscallbufFlush().~SyscallbufFlushEvent(); return; default: return; } } Event& Event::operator=(const Event& o) { if (this == &o) { return *this; } this->~Event(); new (this) Event(o); return *this; } bool Event::record_regs() const { switch (type()) { case EV_INSTRUCTION_TRAP: case EV_PATCH_SYSCALL: case EV_SCHED: case EV_SYSCALL: case EV_SIGNAL: case EV_SIGNAL_DELIVERY: case EV_SIGNAL_HANDLER: return true; default: return false; } } bool Event::record_extra_regs() const { switch (type()) { case EV_SYSCALL: { const SyscallEvent& sys_ev = Syscall(); // sigreturn/rt_sigreturn restores register state // execve sets everything under the sun, and // pkey_alloc modifies the PKRU register. return sys_ev.state == EXITING_SYSCALL && (is_sigreturn(sys_ev.number, sys_ev.arch()) || sys_ev.is_exec() || is_pkey_alloc_syscall(sys_ev.number, sys_ev.arch())); } case EV_SIGNAL: // Record extra regs so we can deliver the signal at the // right time even when GP regs and ticks values are unchanged // but extra regs have changed. return true; case EV_SIGNAL_HANDLER: // entering a signal handler seems to clear FP/SSE regs, // so record these effects. return true; default: return false; } } bool Event::has_ticks_slop() const { switch (type()) { case EV_SYSCALLBUF_ABORT_COMMIT: case EV_SYSCALLBUF_FLUSH: case EV_SYSCALLBUF_RESET: case EV_DESCHED: case EV_GROW_MAP: return true; default: return false; } } bool Event::is_signal_event() const { switch (event_type) { case EV_SIGNAL: case EV_SIGNAL_DELIVERY: case EV_SIGNAL_HANDLER: return true; default: return false; } } bool Event::is_syscall_event() const { switch (event_type) { case EV_SYSCALL: case EV_SYSCALL_INTERRUPTION: return true; default: return false; } } string Event::str() const { stringstream ss; ss << type_name(); switch (event_type) { case EV_SIGNAL: case EV_SIGNAL_DELIVERY: case EV_SIGNAL_HANDLER: ss << ": " << signal_name(Signal().siginfo.si_signo) << "(" << (const char*)(Signal().deterministic == DETERMINISTIC_SIG ? "det" : "async") << ")"; break; case EV_SYSCALL: case EV_SYSCALL_INTERRUPTION: ss << ": " << syscall_name(Syscall().number, Syscall().regs.arch()); break; default: // No auxiliary information. break; } return ss.str(); } void Event::transform(EventType new_type) { switch (event_type) { case EV_SIGNAL: DEBUG_ASSERT(EV_SIGNAL_DELIVERY == new_type); break; case EV_SIGNAL_DELIVERY: DEBUG_ASSERT(EV_SIGNAL_HANDLER == new_type); break; case EV_SYSCALL: DEBUG_ASSERT(EV_SYSCALL_INTERRUPTION == new_type); break; case EV_SYSCALL_INTERRUPTION: DEBUG_ASSERT(EV_SYSCALL == new_type); break; default: FATAL() << "Can't transform immutable " << *this << " into " << new_type; } event_type = new_type; } std::string Event::type_name() const { switch (event_type) { case EV_SENTINEL: return "(none)"; #define CASE(_t) \ case EV_##_t: \ return #_t CASE(EXIT); CASE(NOOP); CASE(SCHED); CASE(SECCOMP_TRAP); CASE(INSTRUCTION_TRAP); CASE(SYSCALLBUF_FLUSH); CASE(SYSCALLBUF_ABORT_COMMIT); CASE(SYSCALLBUF_RESET); CASE(PATCH_SYSCALL); CASE(GROW_MAP); CASE(DESCHED); CASE(SIGNAL); CASE(SIGNAL_DELIVERY); CASE(SIGNAL_HANDLER); CASE(SYSCALL); CASE(SYSCALL_INTERRUPTION); CASE(TRACE_TERMINATION); #undef CASE default: FATAL() << "Unknown event type " << event_type; return ""; // not reached } } const char* state_name(SyscallState state) { switch (state) { #define CASE(_id) \ case _id: \ return #_id CASE(NO_SYSCALL); CASE(ENTERING_SYSCALL_PTRACE); CASE(ENTERING_SYSCALL); CASE(PROCESSING_SYSCALL); CASE(EXITING_SYSCALL); #undef CASE default: return "???state"; } } } // namespace rr rr-5.7.0/src/Event.h000066400000000000000000000345141450675474200142140ustar00rootroot00000000000000/* -*- Mode: C++; tab-width: 8; c-basic-offset: 2; indent-tabs-mode: nil; -*- */ #ifndef RR_EVENT_H_ #define RR_EVENT_H_ #include #include #include #include #include #include "Registers.h" #include "core.h" #include "kernel_abi.h" #include "kernel_metadata.h" #include "preload/preload_interface.h" struct syscallbuf_record; namespace rr { /** * During recording, sometimes we need to ensure that an iteration of * RecordSession::record_step schedules the same task as in the previous * iteration. The PREVENT_SWITCH value indicates that this is required. * For example, the futex operation FUTEX_WAKE_OP modifies userspace * memory; those changes are only recorded after the system call completes; * and they must be replayed before we allow a context switch to a woken-up * task (because the kernel guarantees those effects are seen by woken-up * tasks). * Entering a potentially blocking system call must use ALLOW_SWITCH, or * we risk deadlock. Most non-blocking system calls could use PREVENT_SWITCH * or ALLOW_SWITCH; for simplicity we use ALLOW_SWITCH to indicate a call could * block and PREVENT_SWITCH otherwise. * Note that even if a system call uses PREVENT_SWITCH, as soon as we've * recorded the completion of the system call, we can switch to another task. */ enum Switchable { PREVENT_SWITCH, ALLOW_SWITCH }; /** * Events serve two purposes: tracking Task state during recording, and * being stored in traces to guide replay. Some events are only used during * recording and are never actually stored in traces (and are thus irrelevant * to replay). */ enum EventType { EV_UNASSIGNED, EV_SENTINEL, // TODO: this is actually a pseudo-pseudosignal: it will never // appear in a trace, but is only used to communicate between // different parts of the recorder code that should be // refactored to not have to do that. EV_NOOP, EV_DESCHED, EV_SECCOMP_TRAP, EV_SYSCALL_INTERRUPTION, // Not stored in trace, but synthesized when we reach the end of the trace. EV_TRACE_TERMINATION, // Events present in traces: // No associated data. EV_EXIT, // Scheduling signal interrupted the trace. EV_SCHED, // A disabled RDTSC or CPUID instruction. EV_INSTRUCTION_TRAP, // Recorded syscallbuf data for one or more buffered syscalls. EV_SYSCALLBUF_FLUSH, EV_SYSCALLBUF_ABORT_COMMIT, // The syscallbuf was reset to the empty state. We record this event // later than it really happens, because during replay we must proceed to // the event *after* a syscallbuf flush and then reset the syscallbuf, // to ensure we don't reset it while preload code is still using the data. EV_SYSCALLBUF_RESET, // Syscall was entered, the syscall instruction was patched, and the // syscall was aborted. Resume execution at the patch. EV_PATCH_SYSCALL, // Map memory pages due to a (future) memory access. This is associated // with a mmap entry for the new pages. EV_GROW_MAP, // Use .signal. EV_SIGNAL, EV_SIGNAL_DELIVERY, EV_SIGNAL_HANDLER, // Use .syscall. EV_SYSCALL, EV_LAST }; /** * Desched events track the fact that a tracee's desched-event * notification fired during a may-block buffered syscall, which rr * interprets as the syscall actually blocking (for a potentially * unbounded amount of time). After the syscall exits, rr advances * the tracee to where the desched is "disarmed" by the tracee. */ struct DeschedEvent { /** Desched of |rec|. */ DeschedEvent(remote_ptr rec) : rec(rec) {} // Record of the syscall that was interrupted by a desched // notification. It's legal to reference this memory /while // the desched is being processed only/, because |t| is in the // middle of a desched, which means it's successfully // allocated (but not yet committed) this syscall record. remote_ptr rec; }; struct PatchSyscallEvent { PatchSyscallEvent() : patch_trapping_instruction(false), patch_after_syscall(false), patch_vsyscall(false) {} // If true, this patch is for a trapping instruction, not a real syscall bool patch_trapping_instruction; // If true, this patch event comes after a syscall (whereas usually they // come before). We assume the trace has put us in the correct place // and don't try to execute any code to reach this event. bool patch_after_syscall; // If true, this patch is for the caller of a vsyscall entry point bool patch_vsyscall; }; /** * Sched events track points at which context switches happen that are not * otherwise associated with an rr event. * Also used to record the point at which a tracee is SIGKILLed, which * may require special handling if in the syscallbuf. */ struct SchedEvent { SchedEvent(remote_code_ptr in_syscallbuf_syscall_hook) : in_syscallbuf_syscall_hook(in_syscallbuf_syscall_hook) {} // If this SchedEvent represents the tracee being SIGKILLed, // and syscall buffering is enabled, this contains the address // of the 'syscall_hook' function, otherwise zero. remote_code_ptr in_syscallbuf_syscall_hook; }; struct SyscallbufFlushEvent { SyscallbufFlushEvent() {} std::vector mprotect_records; }; enum SignalDeterministic { NONDETERMINISTIC_SIG = 0, DETERMINISTIC_SIG = 1 }; enum SignalResolvedDisposition { DISPOSITION_FATAL = 0, DISPOSITION_USER_HANDLER = 1, DISPOSITION_IGNORED = 2, }; struct SignalEvent { /** * Signal |signo| is the signum, and |deterministic| is true * for deterministically-delivered signals (see * record_signal.cc). */ SignalEvent(const siginfo_t& siginfo, SignalDeterministic deterministic, SignalResolvedDisposition disposition) : siginfo(siginfo), deterministic(deterministic), disposition(disposition) {} // Signal info siginfo_t siginfo; // True if this signal will be deterministically raised as the // side effect of retiring an instruction during replay, for // example |load $r 0x0| deterministically raises SIGSEGV. SignalDeterministic deterministic; SignalResolvedDisposition disposition; }; /** * Syscall events track syscalls through entry into the kernel, * processing in the kernel, and exit from the kernel. * * This also models interrupted syscalls. During recording, only * descheduled buffered syscalls /push/ syscall interruptions; all * others are detected at exit time and transformed into syscall * interruptions from the original, normal syscalls. * * Normal system calls (interrupted or not) record two events: ENTERING_SYSCALL * and EXITING_SYSCALL. If the process exits before the syscall exit (because * this is an exit/exit_group syscall or the process gets SIGKILL), there's no * syscall exit event. * * When PTRACE_SYSCALL is used, there will be three events: * ENTERING_SYSCALL_PTRACE to run the process until it gets into the kernel, * then ENTERING_SYSCALL and EXITING_SYSCALL. We need three events to handle * PTRACE_SYSCALL with clone/fork/vfork and execve. The tracee must run to * the ENTERING_SYSCALL_PTRACE state, allow a context switch so the ptracer * can modify tracee registers, then perform ENTERING_SYSCALL (which actually * creates the new task or does the exec), allow a context switch so the * ptracer can modify the new task or post-exec state in a PTRACE_EVENT_EXEC/ * CLONE/FORK/VFORK, then perform EXITING_SYSCALL to get into the correct * post-syscall state. * * When PTRACE_SYSEMU is used, there will only be one event: an * ENTERING_SYSCALL_PTRACE. */ enum SyscallState { // Not present in trace. Just a dummy value. NO_SYSCALL, // Run to the given register state and enter the kernel but don't // perform any system call processing yet. ENTERING_SYSCALL_PTRACE, // Run to the given register state and enter the kernel, if not already // there due to a ENTERING_SYSCALL_PTRACE, and then perform the initial part // of the system call (any work required before issuing a during-system-call // ptrace event). ENTERING_SYSCALL, // Not present in trace. PROCESSING_SYSCALL, // Already in the kernel. Perform the final part of the system call and exit // with the recorded system call result. EXITING_SYSCALL }; struct OpenedFd { std::string path; int fd; dev_t device; ino_t inode; }; struct SyscallEvent { /** Syscall |syscallno| is the syscall number. */ SyscallEvent(int syscallno, SupportedArch arch) : arch_(arch), regs(arch), desched_rec(nullptr), write_offset(-1), state(NO_SYSCALL), number(syscallno), switchable(PREVENT_SWITCH), is_restart(false), failed_during_preparation(false), in_sysemu(false) {} std::string syscall_name() const { return rr::syscall_name(number, arch()); } SupportedArch arch() const { return arch_; } /** Change the architecture for this event. */ void set_arch(SupportedArch a) { arch_ = a; } bool is_exec() const { return is_execve_syscall(number, arch()) || is_execveat_syscall(number, arch()); } SupportedArch arch_; // The original (before scratch is set up) arguments to the // syscall passed by the tracee. These are used to detect // restarted syscalls. Registers regs; // If this is a descheduled buffered syscall, points at the // record for that syscall. remote_ptr desched_rec; // Extra data for specific syscalls. Only used for exit events currently. // -1 to indicate there isn't one int64_t write_offset; std::vector exec_fds_to_close; std::vector opened; std::shared_ptr> socket_addrs; SyscallState state; // Syscall number. int number; // Records the switchable state when this syscall was prepared Switchable switchable; // True when this syscall was restarted after a signal interruption. bool is_restart; // True when this syscall failed during preparation: syscall entry events // that were interrupted by a user seccomp filter forcing SIGSYS or errno, // and clone system calls that failed. These system calls failed no matter // what the syscall-result register says. bool failed_during_preparation; // Syscall is being emulated via PTRACE_SYSEMU. bool in_sysemu; }; struct syscall_interruption_t { syscall_interruption_t(){}; }; static const syscall_interruption_t interrupted; /** * Sum type for all events (well, a C++ approximation thereof). An * Event always has a definted EventType. It can be down-casted to * one of the leaf types above iff the type tag is correct. */ struct Event { Event() : event_type(EV_UNASSIGNED) {} Event(const DeschedEvent& ev) : event_type(EV_DESCHED), desched(ev) {} Event(EventType type, const SignalEvent& ev) : event_type(type), signal(ev) {} Event(const SyscallbufFlushEvent& ev) : event_type(EV_SYSCALLBUF_FLUSH), syscallbuf_flush(ev) {} Event(const SyscallEvent& ev) : event_type(EV_SYSCALL), syscall(ev) {} Event(const syscall_interruption_t&, const SyscallEvent& ev) : event_type(EV_SYSCALL_INTERRUPTION), syscall(ev) {} Event(const Event& o); ~Event(); Event& operator=(const Event& o); DeschedEvent& Desched() { DEBUG_ASSERT(EV_DESCHED == event_type); return desched; } const DeschedEvent& Desched() const { DEBUG_ASSERT(EV_DESCHED == event_type); return desched; } PatchSyscallEvent& PatchSyscall() { DEBUG_ASSERT(EV_PATCH_SYSCALL == event_type); return patch; } const PatchSyscallEvent& PatchSyscall() const { DEBUG_ASSERT(EV_PATCH_SYSCALL == event_type); return patch; } SchedEvent& Sched() { DEBUG_ASSERT(is_sched_event()); return sched_; } const SchedEvent& Sched() const { DEBUG_ASSERT(is_sched_event()); return sched_; } SyscallbufFlushEvent& SyscallbufFlush() { DEBUG_ASSERT(EV_SYSCALLBUF_FLUSH == event_type); return syscallbuf_flush; } const SyscallbufFlushEvent& SyscallbufFlush() const { DEBUG_ASSERT(EV_SYSCALLBUF_FLUSH == event_type); return syscallbuf_flush; } SignalEvent& Signal() { DEBUG_ASSERT(is_signal_event()); return signal; } const SignalEvent& Signal() const { DEBUG_ASSERT(is_signal_event()); return signal; } SyscallEvent& Syscall() { DEBUG_ASSERT(is_syscall_event()); return syscall; } const SyscallEvent& Syscall() const { DEBUG_ASSERT(is_syscall_event()); return syscall; } bool record_regs() const; bool record_extra_regs() const; bool has_ticks_slop() const; /** * Return true if this is one of the indicated type of events. */ bool is_signal_event() const; bool is_sched_event() const { return event_type == EV_SCHED; } bool is_syscall_event() const; /** Return a string describing this. */ std::string str() const; /** * Dynamically change the type of this. Only a small number * of type changes are allowed. */ void transform(EventType new_type); /** Return the current type of this. */ EventType type() const { return event_type; } /** Return a string naming |ev|'s type. */ std::string type_name() const; static Event noop() { return Event(EV_NOOP); } static Event trace_termination() { return Event(EV_TRACE_TERMINATION); } static Event instruction_trap() { return Event(EV_INSTRUCTION_TRAP); } static Event patch_syscall() { auto ev = Event(EV_PATCH_SYSCALL); ev.PatchSyscall().patch_after_syscall = false; ev.PatchSyscall().patch_vsyscall = false; ev.PatchSyscall().patch_trapping_instruction = false; return ev; } static Event sched() { auto ev = Event(EV_SCHED); ev.Sched().in_syscallbuf_syscall_hook = remote_code_ptr(); return ev; } static Event seccomp_trap() { return Event(EV_SECCOMP_TRAP); } static Event syscallbuf_abort_commit() { return Event(EV_SYSCALLBUF_ABORT_COMMIT); } static Event syscallbuf_reset() { return Event(EV_SYSCALLBUF_RESET); } static Event grow_map() { return Event(EV_GROW_MAP); } static Event exit() { return Event(EV_EXIT); } static Event sentinel() { return Event(EV_SENTINEL); } private: Event(EventType type) : event_type(type) {} EventType event_type; union { DeschedEvent desched; PatchSyscallEvent patch; SchedEvent sched_; SignalEvent signal; SyscallEvent syscall; SyscallbufFlushEvent syscallbuf_flush; }; }; inline static std::ostream& operator<<(std::ostream& o, const Event& ev) { return o << ev.str(); } const char* state_name(SyscallState state); } // namespace rr #endif // EVENT_H_ rr-5.7.0/src/ExportImportCheckpoints.cc000066400000000000000000000262331450675474200201370ustar00rootroot00000000000000/* -*- Mode: C++; tab-width: 8; c-basic-offset: 2; indent-tabs-mode: nil; -*- */ #include "ExportImportCheckpoints.h" #include #include #include #include #include #include #include #include "log.h" #include "main.h" #include "WaitManager.h" using namespace std; /* Clients connect to the checkpoints socket and may send a one-byte command. Currently the only valid command value is 0: create a checkpoint. */ namespace rr { bool parse_export_checkpoints(const string& arg, FrameTime& export_checkpoints_event, int& export_checkpoints_count, string& export_checkpoints_socket) { size_t first_comma = arg.find(','); if (first_comma == string::npos) { fprintf(stderr, "Missing parameter for --export-checkpoints"); return false; } size_t second_comma = arg.find(',', first_comma + 1); if (second_comma == string::npos) { fprintf(stderr, "Missing parameter for --export-checkpoints"); return false; } char* endptr; string event_str = arg.substr(0, first_comma); export_checkpoints_event = strtoul(event_str.c_str(), &endptr, 0); if (*endptr) { fprintf(stderr, "Invalid for --export-checkpoints: %s\n", event_str.c_str()); return false; } string num_str = arg.substr(first_comma + 1, second_comma - (first_comma + 1)); export_checkpoints_count = strtoul(num_str.c_str(), &endptr, 0); if (*endptr) { fprintf(stderr, "Invalid for --export-checkpoints: %s\n", num_str.c_str()); return false; } export_checkpoints_socket = arg.substr(second_comma + 1); return true; } ScopedFd bind_export_checkpoints_socket(int count, const string& socket_file_name) { unlink(socket_file_name.c_str()); ScopedFd sock = ScopedFd(socket(AF_UNIX, SOCK_STREAM | SOCK_CLOEXEC, 0)); if (!sock.is_open()) { FATAL() << "Can't create Unix socket " << socket_file_name; } if (socket_file_name.size() + 1 > sizeof(sockaddr_un::sun_path)) { FATAL() << "Socket file name " << socket_file_name << " too long"; } int reuse = 1; int ret = setsockopt(sock, SOL_SOCKET, SO_REUSEADDR, &reuse, sizeof(reuse)); if (ret < 0) { FATAL() << "Failed to set SO_REUSEADDR"; } sockaddr_un addr; addr.sun_family = AF_UNIX; strcpy(addr.sun_path, socket_file_name.c_str()); ret = ::bind(sock, (const sockaddr*)&addr, sizeof(addr)); if (ret < 0) { FATAL() << "Can't bind Unix socket " << socket_file_name; } ret = listen(sock, count); if (ret < 0) { FATAL() << "Can't listen on Unix socket " << socket_file_name; } return sock; } static void send_all(ScopedFd& sock, const void* vbuf, size_t count) { const char* buf = static_cast(vbuf); while (count > 0) { ssize_t ret = send(sock, buf, count, 0); if (ret <= 0) { FATAL() << "Failed to send complete message"; } count -= ret; buf += ret; } } static void recv_all(ScopedFd& sock, void* vbuf, size_t count) { char* buf = static_cast(vbuf); while (count > 0) { ssize_t ret = recv(sock, buf, count, 0); if (ret <= 0) { FATAL() << "Failed to recv complete message"; } count -= ret; buf += ret; } } static void setup_child_fds(vector fds, CommandForCheckpoint& command_for_checkpoint) { command_for_checkpoint.exit_notification_fd = ScopedFd(fds[0]); for (int our_fd = 0; our_fd < 3; ++our_fd) { // We deliberately don't set CLOEXEC here since we might want these // to be inherited. int ret = dup2(fds[our_fd + 1], our_fd); if (ret < 0) { FATAL() << "Can't dup over stdin/stdout/stderr"; } close(fds[our_fd + 1]); } for (size_t i = 4; i < fds.size(); ++i) { command_for_checkpoint.fds.push_back(ScopedFd(fds[i])); } } static void set_title(const vector& args) { string line = "rr:"; for (auto& a : args) { line += ' '; line += a; } char* arg0 = saved_argv0(); size_t space = saved_argv0_space() - 1; if (space < 3) { return; } // To simplify things, instead of moving the environment around when the new command is too long, // we just truncate it. if (line.size() > space) { line[space - 3] = line[space - 2] = line[space - 1] = '.'; line.resize(space); } memcpy(arg0, line.data(), line.size()); memset(arg0 + line.size(), 0, space - line.size()); } CommandForCheckpoint export_checkpoints(ReplaySession::shr_ptr session, int count, ScopedFd& sock, const std::string&) { if (!session->can_clone()) { FATAL() << "Can't create checkpoints at this time, aborting: " << session->current_frame_time(); } CommandForCheckpoint command_for_checkpoint; vector children; for (int i = 0; i < count; ++i) { ScopedFd client = ScopedFd(accept4(sock, nullptr, nullptr, SOCK_CLOEXEC)); if (!client.is_open()) { FATAL() << "Failed to accept client connection"; } ssize_t priority; recv_all(client, &priority, sizeof(priority)); ssize_t ret = setpriority(PRIO_PROCESS, 0, priority); if (ret < 0) { if (errno == EACCES) { LOG(warn) << "Failed to increase priority"; } else { FATAL() << "Failed setpriority"; } } size_t fds_size; recv_all(client, &fds_size, sizeof(fds_size)); // Do the SCM_RIGHTS dance to receive file descriptors msghdr msg; memset(&msg, 0, sizeof(msg)); char dummy_buf; iovec iov = { &dummy_buf, 1 }; msg.msg_iov = &iov; msg.msg_iovlen = 1; vector cbuf; size_t data_len = sizeof(int)*fds_size; cbuf.resize(CMSG_SPACE(data_len)); msg.msg_control = cbuf.data(); msg.msg_controllen = cbuf.size(); ret = recvmsg(client, &msg, MSG_CMSG_CLOEXEC); if (ret != 1) { FATAL() << "Failed to read fds"; } cmsghdr* cmsg = CMSG_FIRSTHDR(&msg); if (cmsg->cmsg_level != SOL_SOCKET || cmsg->cmsg_type != SCM_RIGHTS || cmsg->cmsg_len != CMSG_LEN(data_len)) { FATAL() << "Invalid cmsg metadata"; } vector fds_data; fds_data.resize(fds_size); memcpy(fds_data.data(), CMSG_DATA(cmsg), data_len); size_t arg_count; recv_all(client, &arg_count, sizeof(arg_count)); vector args; for (size_t i = 0; i < arg_count; ++i) { size_t arg_size; recv_all(client, &arg_size, sizeof(arg_size)); vector arg; arg.resize(arg_size); recv_all(client, arg.data(), arg_size); args.push_back(string(arg.data(), arg.size())); } ReplaySession::shr_ptr checkpoint = session->clone(); int parent_to_child_fds[2]; ret = pipe(parent_to_child_fds); if (ret < 0) { FATAL() << "Can't pipe"; } ScopedFd parent_to_child_read(parent_to_child_fds[0]); ScopedFd parent_to_child_write(parent_to_child_fds[1]); checkpoint->prepare_to_detach_tasks(); // We need to create a new control socket for the child, we can't use the shared control socket // safely in multiple processes. int sockets[2]; ret = socketpair(AF_UNIX, SOCK_STREAM | SOCK_CLOEXEC, 0, sockets); if (ret < 0) { FATAL() << "socketpair failed"; } ScopedFd new_tracee_socket(sockets[0]); ScopedFd new_tracee_socket_receiver(sockets[1]); pid_t child = fork(); if (!child) { set_title(args); session->forget_tasks(); command_for_checkpoint.args = std::move(args); command_for_checkpoint.session = std::move(checkpoint); setup_child_fds(fds_data, command_for_checkpoint); char ch; ret = read(parent_to_child_read, &ch, 1); if (ret != 1) { FATAL() << "Failed to read parent notification"; } command_for_checkpoint.session->reattach_tasks(std::move(new_tracee_socket), std::move(new_tracee_socket_receiver)); return command_for_checkpoint; } children.push_back(child); checkpoint->detach_tasks(child, new_tracee_socket_receiver); ret = write(parent_to_child_write, "x", 1); if (ret != 1) { FATAL() << "Failed to write parent notification"; } for (auto d : fds_data) { close(d); } } // Wait for and reap all children for (size_t i = 0; i < children.size(); ++i) { WaitResult result = WaitManager::wait_exit(WaitOptions(children[i])); if (result.code != WAIT_OK) { FATAL() << "Failed to wait for child " << children[i]; } } return command_for_checkpoint; } void notify_normal_exit(ScopedFd& exit_notification_fd) { ssize_t ret = write(exit_notification_fd, "", 1); if (ret != 1) { FATAL() << "Can't send exit notification"; } } int invoke_checkpoint_command(const string& socket_file_name, vector args, vector fds) { ScopedFd sock = ScopedFd(socket(AF_UNIX, SOCK_STREAM | SOCK_CLOEXEC, 0)); if (!sock.is_open()) { FATAL() << "Can't create Unix socket " << socket_file_name; } if (socket_file_name.size() + 1 > sizeof(sockaddr_un::sun_path)) { FATAL() << "Socket file name " << socket_file_name << " too long"; } sockaddr_un addr; addr.sun_family = AF_UNIX; strcpy(addr.sun_path, socket_file_name.c_str()); while (true) { ssize_t ret = connect(sock, (const sockaddr*)&addr, sizeof(addr)); if (ret < 0) { // We might try to connect between the socket being bound and listen()ed on if (errno == ENOENT || errno == ECONNREFUSED) { sleep(1); continue; } FATAL() << "Can't connect socket " << socket_file_name; } break; } ssize_t ret = getpriority(PRIO_PROCESS, 0); if (ret < 0) { FATAL() << "Failed getpriority"; } send_all(sock, &ret, sizeof(ret)); size_t total_fds = 4 + fds.size(); send_all(sock, &total_fds, sizeof(total_fds)); int exit_notification_pipe_fds[2]; ret = pipe(exit_notification_pipe_fds); if (ret < 0) { FATAL() << "Failed pipe"; } // Do the SCM_RIGHTS dance to send file descriptors. msghdr msg; memset(&msg, 0, sizeof(msg)); char ch = 'x'; iovec iov = { &ch, 1 }; msg.msg_iov = &iov; msg.msg_iovlen = 1; vector cbuf; size_t data_len = sizeof(int)*total_fds; cbuf.resize(CMSG_SPACE(data_len)); msg.msg_control = cbuf.data(); msg.msg_controllen = cbuf.size(); cmsghdr* cmsg = CMSG_FIRSTHDR(&msg); cmsg->cmsg_level = SOL_SOCKET; cmsg->cmsg_type = SCM_RIGHTS; cmsg->cmsg_len = CMSG_LEN(data_len); char* cmsg_data = (char*)CMSG_DATA(cmsg); vector int_fds; int_fds.push_back(exit_notification_pipe_fds[1]); int_fds.push_back(0); int_fds.push_back(1); int_fds.push_back(2); for (auto& fd : fds) { int_fds.push_back(fd); } memcpy(cmsg_data, int_fds.data(), sizeof(int)*int_fds.size()); ret = sendmsg(sock, &msg, 0); if (ret != 1) { FATAL() << "Can't send file descriptors"; } close(exit_notification_pipe_fds[1]); // Close stdin but keep stdout/stderr open in case we need to print something ourselves. close(0); size_t arg_count = args.size(); send_all(sock, &arg_count, sizeof(arg_count)); for (auto& arg : args) { size_t arg_size = arg.size(); send_all(sock, &arg_size, sizeof(arg_size)); send_all(sock, arg.data(), arg_size); } ret = read(exit_notification_pipe_fds[0], &ch, 1); if (ret < 0) { FATAL() << "Can't read from notification pipe"; } if (ret == 0) { // abnormal termination return 1; } return 0; } } // namespace rr rr-5.7.0/src/ExportImportCheckpoints.h000066400000000000000000000035671450675474200200060ustar00rootroot00000000000000/* -*- Mode: C++; tab-width: 8; c-basic-offset: 2; indent-tabs-mode: nil; -*- */ #ifndef RR_EXPORT_IMPORT_CHECKPOINTS_H_ #define RR_EXPORT_IMPORT_CHECKPOINTS_H_ #include "ReplaySession.h" #include #include namespace rr { bool parse_export_checkpoints(const std::string& arg, FrameTime& export_checkpoints_event, int& export_checkpoints_count, std::string& export_checkpoints_socket); /* Bind the socket so clients can try to connect to it and block. */ ScopedFd bind_export_checkpoints_socket(int count, const std::string& socket_file_name); /* A command to run on the checkpoint */ struct CommandForCheckpoint { std::vector args; std::vector fds; ReplaySession::shr_ptr session; ScopedFd exit_notification_fd; }; /* Export checkpoints from the given session. This function will return `count` + 1 times; the first `count` times in a forked child with a valid CommandForCheckpoint with a nonnull `session`; the last time with a null `session` when all forked children have exited and been reaped. For the child returns, stdin/stdout/stderr will have been rebound to fds passed in over the socket. */ CommandForCheckpoint export_checkpoints(ReplaySession::shr_ptr session, int count, ScopedFd& sock, const std::string& socket_file_name); /* After performing the CommandForCheckpoint, notify that we have exited normally. */ void notify_normal_exit(ScopedFd& exit_notification_fd); /* Invoke a command on a checkpoint. stdin/stdout/stderr and the args and fds are passed to the exporter process, then we wait for it to complete and exit, and return an appropriate exit code. */ int invoke_checkpoint_command(const std::string& socket_file_name, std::vector args, std::vector fds = std::vector()); } // namespace rr #endif /* RR_EXPORT_IMPORT_CHECKPOINTS_H_ */ rr-5.7.0/src/ExtraRegisters.cc000066400000000000000000000702361450675474200162450ustar00rootroot00000000000000/* -*- Mode: C++; tab-width: 8; c-basic-offset: 2; indent-tabs-mode: nil; -*- */ #include "ExtraRegisters.h" #include #include "ReplayTask.h" #include "core.h" #include "log.h" #include "util.h" using namespace std; namespace rr { // This is the byte offset at which the ST0-7 register data begins // with an xsave (or fxsave) block. static const int st_regs_offset = 32; // NB: each STx register holds 10 bytes of actual data, but each // occupies 16 bytes of space within (f)xsave, presumably for // alignment purposes. static const int st_reg_space = 16; // Byte offset at which the XMM0-15 register data begins with (f)xsave. static const int xmm_regs_offset = 160; static const int xmm_reg_space = 16; static const uint8_t fxsave_387_ctrl_offsets[] = { // The Intel documentation says that the following layout is only valid in // 32-bit mode, or when fxsave is executed in 64-bit mode without an // appropriate REX prefix. The kernel seems to only use fxsave with the // REX prefix, so one would think these offsets would be different. But // GDB seems happy to use these offsets, so that's what we use too. 0, // DREG_64_FCTRL 2, // DREG_64_FSTAT 4, // DREG_64_FTAG 12, // DREG_64_FISEG 8, // DREG_64_FIOFF 20, // DREG_64_FOSEG 16, // DREG_64_FOOFF 6, // DREG_64_FOP }; static const int fip_offset = 8; static const int fop_offset = 6; static const int fdp_offset = 16; static const int mxcsr_offset = 24; struct RegData { int offset; int size; int xsave_feature_bit; RegData(int offset = -1, int size = 0) : offset(offset), size(size), xsave_feature_bit(-1) {} }; static bool reg_in_range(GdbRegister regno, GdbRegister low, GdbRegister high, int offset_base, int offset_stride, int size, RegData* out) { if (regno < low || regno > high) { return false; } out->offset = offset_base + offset_stride * (regno - low); out->size = size; return true; } static const int AVX_FEATURE_BIT = 2; static const int PKRU_FEATURE_BIT = 9; static const uint64_t PKRU_FEATURE_MASK = 1 << PKRU_FEATURE_BIT; static const size_t xsave_header_offset = 512; static const size_t xsave_header_size = 64; static const size_t xsave_header_end = xsave_header_offset + xsave_header_size; // This is always at 576 since AVX is always the first optional feature, // if present. static const size_t AVX_xsave_offset = 576; // Return the size and data location of register |regno|. // If we can't read the register, returns -1 in 'offset'. static RegData xsave_register_data(SupportedArch arch, GdbRegister regno) { // Check regno is in range, and if it's 32-bit then convert it to the // equivalent 64-bit register. switch (arch) { case x86: // Convert regno to the equivalent 64-bit version since the XSAVE layout // is compatible if (regno >= DREG_XMM0 && regno <= DREG_XMM7) { regno = (GdbRegister)(regno - DREG_XMM0 + DREG_64_XMM0); break; } if (regno >= DREG_YMM0H && regno <= DREG_YMM7H) { regno = (GdbRegister)(regno - DREG_YMM0H + DREG_64_YMM0H); break; } if (regno == DREG_MXCSR) { regno = DREG_64_MXCSR; } else if (regno == DREG_PKRU) { regno = DREG_64_PKRU; } else if (regno < DREG_FIRST_FXSAVE_REG || regno > DREG_LAST_FXSAVE_REG) { return RegData(); } else { regno = (GdbRegister)(regno - DREG_FIRST_FXSAVE_REG + DREG_64_FIRST_FXSAVE_REG); } break; case x86_64: break; default: DEBUG_ASSERT(0 && "Unknown arch"); return RegData(); } RegData result; if (reg_in_range(regno, DREG_64_ST0, DREG_64_ST7, st_regs_offset, st_reg_space, 10, &result)) { return result; } if (reg_in_range(regno, DREG_64_XMM0, DREG_64_XMM15, xmm_regs_offset, xmm_reg_space, 16, &result)) { return result; } if (reg_in_range(regno, DREG_64_YMM0H, DREG_64_YMM15H, AVX_xsave_offset, 16, 16, &result)) { result.xsave_feature_bit = AVX_FEATURE_BIT; return result; } if (regno == DREG_64_PKRU) { const XSaveLayout& layout = xsave_native_layout(); if (PKRU_FEATURE_BIT > layout.feature_layouts.size()) { return RegData(); } const XSaveFeatureLayout& fl = layout.feature_layouts[PKRU_FEATURE_BIT]; result.offset = fl.offset; // NB: the PKRU *region* may be 8 bytes to maintain alignment but the // PKRU *register* is only the first 4 bytes. result.size = 4; result.xsave_feature_bit = PKRU_FEATURE_BIT; return result; } if (regno < DREG_64_FIRST_FXSAVE_REG || regno > DREG_64_LAST_FXSAVE_REG) { return RegData(); } if (regno == DREG_64_MXCSR) { return RegData(24, 4); } DEBUG_ASSERT(regno >= DREG_64_FCTRL && regno <= DREG_64_FOP); // NB: most of these registers only occupy 2 bytes of space in // the (f)xsave region, but gdb's default x86 target // config expects us to send back 4 bytes of data for // each. return RegData(fxsave_387_ctrl_offsets[regno - DREG_64_FCTRL], 4); } static const uint64_t* xsave_features(const vector& data) { // If this is just FXSAVE(64) data then we we have no XSAVE header and no // XSAVE(64) features enabled. return data.size() < xsave_header_offset + xsave_header_size ? nullptr : reinterpret_cast(data.data() + xsave_header_offset); } static uint64_t* xsave_features(vector& data) { // If this is just FXSAVE(64) data then we we have no XSAVE header and no // XSAVE(64) features enabled. return data.size() < xsave_header_offset + xsave_header_size ? nullptr : reinterpret_cast(data.data() + xsave_header_offset); } size_t ExtraRegisters::read_register(uint8_t* buf, GdbRegister regno, bool* defined) const { if (format_ == NT_FPR) { if (arch() != aarch64) { *defined = false; return 0; } RegData reg_data; if (DREG_V0 <= regno && regno <= DREG_V31) { reg_data = RegData(offsetof(ARM64Arch::user_fpsimd_state, vregs[0]) + ((regno - DREG_V0) * 16), 16); } else if (regno == DREG_FPSR) { reg_data = RegData(offsetof(ARM64Arch::user_fpsimd_state, fpsr), sizeof(uint32_t)); } else if (regno == DREG_FPCR) { reg_data = RegData(offsetof(ARM64Arch::user_fpsimd_state, fpcr), sizeof(uint32_t)); } else { *defined = false; return 0; } DEBUG_ASSERT(size_t(reg_data.offset + reg_data.size) <= data_.size()); *defined = true; memcpy(buf, data_.data() + reg_data.offset, reg_data.size); return reg_data.size; } if (format_ != XSAVE) { *defined = false; return 0; } auto reg_data = xsave_register_data(arch(), regno); if (reg_data.offset < 0 || empty()) { *defined = false; return reg_data.size; } DEBUG_ASSERT(reg_data.size > 0); *defined = true; // Apparently before any AVX registers are used, the feature bit is not set // in the XSAVE data, so we'll just return 0 for them here. const uint64_t* xsave_features_ = xsave_features(data_); if (reg_data.xsave_feature_bit >= 0 && (!xsave_features_ || !(*xsave_features_ & (1 << reg_data.xsave_feature_bit)))) { memset(buf, 0, reg_data.size); } else { DEBUG_ASSERT(size_t(reg_data.offset + reg_data.size) <= data_.size()); memcpy(buf, data_.data() + reg_data.offset, reg_data.size); } return reg_data.size; } bool ExtraRegisters::write_register(GdbRegister regno, const void* value, size_t value_size) { if (format_ == NT_FPR) { if (arch() != aarch64) { return false; } RegData reg_data; if (DREG_V0 <= regno && regno <= DREG_V31) { reg_data = RegData(offsetof(ARM64Arch::user_fpsimd_state, vregs[0]) + ((regno - DREG_V0) * 16), 16); } else if (regno == DREG_FPSR) { reg_data = RegData(offsetof(ARM64Arch::user_fpsimd_state, fpsr), sizeof(uint32_t)); } else if (regno == DREG_FPCR) { reg_data = RegData(offsetof(ARM64Arch::user_fpsimd_state, fpcr), sizeof(uint32_t)); } else { return false; } DEBUG_ASSERT(reg_data.size > 0); if ((size_t)reg_data.size != value_size) { LOG(warn) << "Register " << regno << "has mismatched sizes (" << reg_data.size << " vs " << value_size << ")"; return false; } DEBUG_ASSERT(size_t(reg_data.offset + reg_data.size) <= data_.size()); memcpy(data_.data() + reg_data.offset, value, value_size); return true; } if (format_ != XSAVE) { return false; } auto reg_data = xsave_register_data(arch(), regno); if (reg_data.offset < 0 || empty()) { return false; } DEBUG_ASSERT(reg_data.size > 0); if ((size_t)reg_data.size != value_size) { LOG(warn) << "Register " << regno << "has mismatched sizes (" << reg_data.size << " vs " << value_size << ")"; return false; } if (reg_data.xsave_feature_bit >= 0) { uint64_t* xsave_features_ = xsave_features(data_); if (!xsave_features_) { return false; } *xsave_features_ |= (1 << reg_data.xsave_feature_bit); } memcpy(data_.data() + reg_data.offset, value, value_size); return true; } static const int xinuse_offset = 512; uint64_t ExtraRegisters::read_xinuse(bool* defined) const { uint64_t ret; if (format_ != XSAVE || data_.size() < 512 + sizeof(ret)) { *defined = false; return 0; } memcpy(&ret, data_.data() + xinuse_offset, sizeof(ret)); return ret; } uint64_t ExtraRegisters::read_fip(bool* defined) const { if (format_ != XSAVE) { *defined = false; return 0; } uint64_t ret; memcpy(&ret, data_.data() + fip_offset, sizeof(ret)); return ret; } uint16_t ExtraRegisters::read_fop(bool* defined) const { if (format_ != XSAVE) { *defined = false; return 0; } uint16_t ret; memcpy(&ret, data_.data() + fop_offset, sizeof(ret)); return ret; } uint32_t ExtraRegisters::read_mxcsr(bool* defined) const { if (format_ != XSAVE) { *defined = false; return 0; } uint32_t ret; memcpy(&ret, data_.data() + mxcsr_offset, sizeof(ret)); return ret; } bool ExtraRegisters::clear_fip_fdp() { if (format_ != XSAVE) { return false; } bool ret = false; uint64_t v; memcpy(&v, data_.data() + fip_offset, sizeof(v)); if (v != 0) { ret = true; memset(data_.data() + fip_offset, 0, 8); } memcpy(&v, data_.data() + fdp_offset, sizeof(v)); if (v != 0) { ret = true; memset(data_.data() + fdp_offset, 0, 8); } return ret; } void ExtraRegisters::validate(Task* t) { if (format_ != XSAVE) { return; } ASSERT(t, data_.size() >= 512); uint32_t offset = 512; if (data_.size() > offset) { ASSERT(t, data_.size() >= offset + 64); offset += 64; const uint64_t* features = xsave_features(data_); if (features && (*features & (1 << AVX_FEATURE_BIT))) { ASSERT(t, data_.size() >= offset + 256); } } } static size_t get_full_value(const ExtraRegisters& r, GdbRegister low, GdbRegister hi, uint8_t buf[128]) { bool defined = false; size_t len = r.read_register(buf, low, &defined); DEBUG_ASSERT(defined && len <= 64); if (hi != GdbRegister(0)) { size_t len2 = r.read_register(buf + len, hi, &defined); if (defined) { DEBUG_ASSERT(len == len2); len += len2; } } return len; } static string reg_to_string(const ExtraRegisters& r, GdbRegister low, GdbRegister hi) { uint8_t buf[128]; size_t len = get_full_value(r, low, hi, buf); bool printed_digit = false; char out_buf[257]; char* p = out_buf; for (int i = len - 1; i >= 0; --i) { if (!printed_digit && !buf[i] && i > 0) { continue; } p += sprintf(p, printed_digit ? "%02x" : "%x", buf[i]); printed_digit = true; } return out_buf; } static void print_reg(const ExtraRegisters& r, GdbRegister low, GdbRegister hi, const char* name, FILE* f) { string out = reg_to_string(r, low, hi); fprintf(f, "%s:0x%s", name, out.c_str()); } static void print_regs(const ExtraRegisters& r, GdbRegister low, GdbRegister hi, int num_regs, const char* name_base, FILE* f) { for (int i = 0; i < num_regs; ++i) { char buf[80]; sprintf(buf, "%s%d", name_base, i); print_reg(r, (GdbRegister)(low + i), hi == GdbRegister(0) ? hi : (GdbRegister)(hi + i), buf, f); if (i < num_regs - 1) { fputc(' ', f); } } } void ExtraRegisters::print_register_file_compact(FILE* f) const { switch (arch_) { case x86: print_regs(*this, DREG_ST0, GdbRegister(0), 8, "st", f); fputc(' ', f); print_regs(*this, DREG_XMM0, DREG_YMM0H, 8, "ymm", f); break; case x86_64: print_regs(*this, DREG_64_ST0, GdbRegister(0), 8, "st", f); fputc(' ', f); print_regs(*this, DREG_64_XMM0, DREG_64_YMM0H, 16, "ymm", f); break; case aarch64: DEBUG_ASSERT(format_ == NT_FPR); print_regs(*this, DREG_V0, GdbRegister(0), 32, "v", f); fputc(' ', f); print_reg(*this, DREG_FPSR, GdbRegister(0), "fpsr", f); fputc(' ', f); print_reg(*this, DREG_FPCR, GdbRegister(0), "fpcr", f); break; default: DEBUG_ASSERT(0 && "Unknown arch"); break; } } static X86Arch::user_fpregs_struct convert_fxsave_to_x86_fpregs( const X86Arch::user_fpxregs_struct& buf) { X86Arch::user_fpregs_struct result; for (int i = 0; i < 8; ++i) { memcpy(reinterpret_cast(result.st_space) + i * 10, &buf.st_space[i * 4], 10); } result.cwd = buf.cwd | 0xffff0000; result.swd = buf.swd | 0xffff0000; // XXX Computing the correct twd is a pain. It probably doesn't matter to us // in practice. result.twd = 0; result.fip = buf.fip; result.fcs = buf.fcs; result.foo = buf.foo; result.fos = buf.fos; return result; } static void convert_x86_fpregs_to_fxsave(const X86Arch::user_fpregs_struct& buf, X86Arch::user_fpxregs_struct* result) { for (int i = 0; i < 8; ++i) { memcpy(&result->st_space[i * 4], reinterpret_cast(buf.st_space) + i * 10, 10); } result->cwd = buf.cwd; result->swd = buf.swd; // XXX Computing the correct twd is a pain. It probably doesn't matter to us // in practice. result->fip = buf.fip; result->fcs = buf.fcs; result->foo = buf.foo; result->fos = buf.fos; } template static vector to_vector(const T& v) { vector result; result.resize(sizeof(T)); memcpy(result.data(), &v, sizeof(T)); return result; } static uint32_t features_used(const uint8_t* data) { uint64_t features; memcpy(&features, data + xsave_header_offset, sizeof(features)); return features; } template bool memcpy_fpr_regs_arch(std::vector& dest, const uint8_t* src, size_t data_size) { if (data_size != sizeof(typename Arch::user_fpregs_struct)) { LOG(error) << "Invalid FPR data length: " << data_size << " for architecture " << arch_name(Arch::arch()) << ", expected " << sizeof(typename Arch::user_fpregs_struct); return false; } dest.resize(sizeof(typename Arch::user_fpregs_struct)); memcpy(dest.data(), src, sizeof(typename Arch::user_fpregs_struct)); return true; } bool memcpy_fpr_regs_arch(SupportedArch arch, std::vector& dest, const uint8_t* src, size_t data_size) { RR_ARCH_FUNCTION(memcpy_fpr_regs_arch, arch, dest, src, data_size) } bool ExtraRegisters::set_to_raw_data(SupportedArch a, Format format, const uint8_t* data, size_t data_size, const XSaveLayout& layout) { arch_ = a; format_ = NONE; if (format == NONE) { return true; } else if (format == NT_FPR) { if (!memcpy_fpr_regs_arch(a, data_, data, data_size)) { return false; } format_ = NT_FPR; return true; } if (format != XSAVE) { LOG(error) << "Unknown ExtraRegisters format: " << format; return false; } format_ = XSAVE; // Now we have to convert from the input XSAVE format to our // native XSAVE format. Be careful to handle possibly-corrupt input data. const XSaveLayout& native_layout = xsave_native_layout(); if (data_size != layout.full_size) { LOG(error) << "Invalid XSAVE data length: " << data_size << ", expected " << layout.full_size; return false; } data_.resize(native_layout.full_size); DEBUG_ASSERT(data_.size() >= xsave_header_offset); if (layout.full_size < xsave_header_offset) { LOG(error) << "Invalid XSAVE layout size: " << layout.full_size; return false; } memcpy(data_.data(), data, xsave_header_offset); memset(data_.data() + xsave_header_offset, 0, data_.size() - xsave_header_offset); // Check for unsupported features being used if (layout.full_size >= xsave_header_end) { uint64_t features = features_used(data); /* Mask off the PKRU bit unconditionally here. * We want traces that are recorded on machines with PKRU but * that don't actually use PKRU to be replayable on machines * without PKRU. Linux, however, sets the PKRU register to * 0x55555554 (only the default key is allowed to access memory), * while the default hardware value is 0, so in some sense * PKRU is always in use. * * There are three classes of side effects of the pkey feature. * 1. The direct effects of syscalls such as pkey_alloc/pkey_mprotect * on registers. * 2. Traps generated by the CPU when the protection keys are violated. * 3. The RDPKRU instruction writing to EAX. * * The first two are replayed exactly by rr. The latter will trigger * SIGILL on any machine without PKRU, which is no different from * any other new CPU instruction that doesn't have its own XSAVE * feature bit. So ignore the PKRU bit here and leave users on their * own with respect to RDPKRU. */ features &= ~PKRU_FEATURE_MASK; if (features & ~native_layout.supported_feature_bits) { LOG(error) << "Unsupported CPU features found: got " << HEX(features) << " (" << xsave_feature_string(features) << "), supported: " << HEX(native_layout.supported_feature_bits) << " (" << xsave_feature_string(native_layout.supported_feature_bits) << "); Consider using `rr cpufeatures` and " << "`rr record --disable-cpuid-features-(ext)`"; return false; } } if (native_layout.full_size < xsave_header_end) { // No XSAVE supported here, we're done! return true; } if (layout.full_size < xsave_header_end) { // Degenerate XSAVE format without an actual XSAVE header. Assume x87+XMM // are in use. uint64_t assume_features_used = 0x3; memcpy(data_.data() + xsave_header_offset, &assume_features_used, sizeof(assume_features_used)); return true; } uint64_t features = features_used(data); // OK, now both our native layout and the input layout are using the full // XSAVE header. Copy each optional and present area into the right place // in our struct. for (size_t i = 2; i < 64; ++i) { if (features & (uint64_t(1) << i)) { if (i >= layout.feature_layouts.size()) { LOG(error) << "Invalid feature " << i << " beyond max layout " << layout.feature_layouts.size(); return false; } const XSaveFeatureLayout& feature = layout.feature_layouts[i]; if (uint64_t(feature.offset) + feature.size > layout.full_size) { LOG(error) << "Invalid feature region: " << feature.offset << "+" << feature.size << " > " << layout.full_size; return false; } if (i >= native_layout.feature_layouts.size()) { if (i == PKRU_FEATURE_BIT) { // The native arch doesn't support PKRU. // This must be during replay, and as the comments above explain, // it's OK to not set PKRU during replay on a pre-PKRU CPU, so // we can just ignore this. features &= ~PKRU_FEATURE_MASK; continue; } else { LOG(error) << "Invalid feature " << i << " beyond max layout " << layout.feature_layouts.size(); return false; } } const XSaveFeatureLayout& native_feature = native_layout.feature_layouts[i]; if (native_feature.size == 0 && i == PKRU_FEATURE_BIT) { // See the above comment about PKRU. features &= ~PKRU_FEATURE_MASK; continue; } if (feature.size != native_feature.size) { LOG(error) << "Feature " << i << " has wrong size " << feature.size << ", expected " << native_feature.size; return false; } // The CPU should guarantee these DEBUG_ASSERT(native_feature.offset > 0); DEBUG_ASSERT(native_feature.offset + native_feature.size <= native_layout.full_size); memcpy(data_.data() + native_feature.offset, data + feature.offset, feature.size); } } // Copy the header. Make sure to use our updated `features`. memcpy(data_.data() + xsave_header_offset, &features, sizeof(features)); memcpy(data_.data() + xsave_header_offset + sizeof(features), data + xsave_header_offset + sizeof(features), xsave_header_size - sizeof(features)); return true; } vector ExtraRegisters::get_user_fpregs_struct( SupportedArch arch) const { switch (arch) { case x86: DEBUG_ASSERT(format_ == XSAVE); DEBUG_ASSERT(data_.size() >= sizeof(X86Arch::user_fpxregs_struct)); return to_vector(convert_fxsave_to_x86_fpregs( *reinterpret_cast( data_.data()))); case x86_64: DEBUG_ASSERT(format_ == XSAVE); DEBUG_ASSERT(data_.size() >= sizeof(X64Arch::user_fpregs_struct)); return to_vector( *reinterpret_cast(data_.data())); case aarch64: DEBUG_ASSERT(format_ == NT_FPR); DEBUG_ASSERT(data_.size() == sizeof(ARM64Arch::user_fpregs_struct)); return to_vector( *reinterpret_cast(data_.data())); default: DEBUG_ASSERT(0 && "Unknown arch"); return vector(); } } void ExtraRegisters::set_user_fpregs_struct(Task* t, SupportedArch arch, void* data, size_t size) { switch (arch) { case x86: DEBUG_ASSERT(format_ == XSAVE); ASSERT(t, size >= sizeof(X86Arch::user_fpregs_struct)); ASSERT(t, data_.size() >= sizeof(X86Arch::user_fpxregs_struct)); convert_x86_fpregs_to_fxsave( *static_cast(data), reinterpret_cast(data_.data())); return; case x86_64: DEBUG_ASSERT(format_ == XSAVE); ASSERT(t, data_.size() >= sizeof(X64Arch::user_fpregs_struct)); ASSERT(t, size >= sizeof(X64Arch::user_fpregs_struct)); memcpy(data_.data(), data, sizeof(X64Arch::user_fpregs_struct)); return; case aarch64: DEBUG_ASSERT(format_ == NT_FPR); ASSERT(t, size >= sizeof(ARM64Arch::user_fpregs_struct)); ASSERT(t, data_.size() >= sizeof(ARM64Arch::user_fpregs_struct)); memcpy(data_.data(), data, sizeof(ARM64Arch::user_fpregs_struct)); return; default: DEBUG_ASSERT(0 && "Unknown arch"); } } X86Arch::user_fpxregs_struct ExtraRegisters::get_user_fpxregs_struct() const { DEBUG_ASSERT(format_ == XSAVE); DEBUG_ASSERT(arch_ == x86); DEBUG_ASSERT(data_.size() >= sizeof(X86Arch::user_fpxregs_struct)); return *reinterpret_cast(data_.data()); } void ExtraRegisters::set_user_fpxregs_struct( Task* t, const X86Arch::user_fpxregs_struct& regs) { ASSERT(t, format_ == XSAVE); ASSERT(t, arch_ == x86); ASSERT(t, data_.size() >= sizeof(X86Arch::user_fpxregs_struct)); memcpy(data_.data(), ®s, sizeof(regs)); } static void set_word(SupportedArch arch, vector& v, GdbRegister r, int word) { RegData d = xsave_register_data(arch, r); DEBUG_ASSERT(d.size == 4); DEBUG_ASSERT(d.offset + d.size <= (int)v.size()); DEBUG_ASSERT(-1 == d.xsave_feature_bit); *reinterpret_cast(v.data() + d.offset) = word; } void ExtraRegisters::reset() { memset(data_.data(), 0, data_.size()); if (is_x86ish(arch())) { DEBUG_ASSERT(format_ == XSAVE); if (arch() == x86_64) { set_word(arch(), data_, DREG_64_MXCSR, 0x1f80); set_word(arch(), data_, DREG_64_FCTRL, 0x37f); } else { set_word(arch(), data_, DREG_MXCSR, 0x1f80); set_word(arch(), data_, DREG_FCTRL, 0x37f); } uint64_t xinuse; if (data_.size() >= xinuse_offset + sizeof(xinuse)) { memcpy(&xinuse, data_.data() + xinuse_offset, sizeof(xinuse)); /* We have observed (Skylake, Linux 4.10) the system setting XINUSE's 0 bit * to indicate x87-in-use, at times unrelated to x87 actually being used. * Work around this by setting the bit unconditionally after exec. */ xinuse |= 1; /* If the system supports the PKRU feature, the PKRU feature bit must be * set in order to get the kernel to properly update the PKRU register * value. If this is not set, it has been observed that the PKRU register * may occasionally contain "stale" values, particularly after involuntary * context switches. * Avoid this issue by setting the bit if the feature is supported by the * CPU. */ if (xcr0() & PKRU_FEATURE_MASK) { RegData d = xsave_register_data(arch(), arch() == x86_64 ? DREG_64_PKRU : DREG_PKRU); DEBUG_ASSERT(d.xsave_feature_bit == PKRU_FEATURE_BIT); DEBUG_ASSERT(d.offset + d.size <= (int)data_.size()); *reinterpret_cast(data_.data() + d.offset) = 0x55555554; xinuse |= PKRU_FEATURE_MASK; } memcpy(data_.data() + xinuse_offset, &xinuse, sizeof(xinuse)); } } else { DEBUG_ASSERT(format_ == NT_FPR); DEBUG_ASSERT(arch() == aarch64 && "Ensure that nothing is required here for your architecture."); } } static void compare_regs(const ExtraRegisters& reg1, const ExtraRegisters& reg2, GdbRegister low, GdbRegister hi, int num_regs, const char* name_base, Registers::Comparison& result) { for (int i = 0; i < num_regs; ++i) { GdbRegister this_low = (GdbRegister)(low + i); GdbRegister this_hi = hi == GdbRegister(0) ? hi : (GdbRegister)(hi + i); uint8_t buf1[128]; size_t len1 = get_full_value(reg1, this_low, this_hi, buf1); uint8_t buf2[128]; size_t len2 = get_full_value(reg2, this_low, this_hi, buf2); DEBUG_ASSERT(len1 == len2); if (!memcmp(buf1, buf2, len1)) { continue; } ++result.mismatch_count; if (result.store_mismatches) { char regname[80]; sprintf(regname, "%s%d", name_base, i); result.mismatches.push_back({regname, reg_to_string(reg1, this_low, this_hi), reg_to_string(reg2, this_low, this_hi)}); } } } void ExtraRegisters::compare_internal(const ExtraRegisters& reg2, Registers::Comparison& result) const { if (arch() != reg2.arch()) { FATAL() << "Can't compare register files with different archs"; } if (format() == NONE || reg2.format() == NONE) { // Not enough data to check anything return; } if (format() != reg2.format()) { FATAL() << "Can't compare register files with different formats"; } switch (arch()) { case x86: compare_regs(*this, reg2, DREG_ST0, GdbRegister(0), 8, "st", result); compare_regs(*this, reg2, DREG_XMM0, DREG_YMM0H, 8, "ymm", result); break; case x86_64: compare_regs(*this, reg2, DREG_64_ST0, GdbRegister(0), 8, "st", result); compare_regs(*this, reg2, DREG_64_XMM0, DREG_64_YMM0H, 8, "ymm", result); break; case aarch64: DEBUG_ASSERT(format_ == NT_FPR); compare_regs(*this, reg2, DREG_V0, GdbRegister(0), 32, "v", result); break; default: DEBUG_ASSERT(0 && "Unknown arch"); break; } } } // namespace rr rr-5.7.0/src/ExtraRegisters.h000066400000000000000000000130601450675474200160770ustar00rootroot00000000000000/* -*- Mode: C++; tab-width: 8; c-basic-offset: 2; indent-tabs-mode: nil; -*- */ #ifndef RR_EXTRA_REGISTERS_H_ #define RR_EXTRA_REGISTERS_H_ #include #include #include #include "GdbRegister.h" #include "Registers.h" #include "kernel_abi.h" namespace rr { class ReplayTask; struct XSaveLayout; /** * An ExtraRegisters object contains values for all user-space-visible * registers other than those in Registers. * * Task is responsible for creating meaningful values of this class. * * The only reason this class has an arch() is to enable us to * interpret GdbRegister. */ class ExtraRegisters { public: // Create empty (uninitialized/unknown registers) value ExtraRegisters(SupportedArch arch = SupportedArch(-1)) : format_(NONE), arch_(arch) {} enum Format { NONE, /** * The XSAVE format is x86(_64) only. * On a x86 64-bit kernel, these structures are initialized by an XSAVE64 or * FXSAVE64. * On a x86 32-bit kernel, they are initialized by an XSAVE or FXSAVE. * * The layouts are basically the same in the first 512 bytes --- an * FXSAVE(64) area. The differences are: * -- On a 64-bit kernel, registers XMM8-XMM15 are saved, but on a 32-bit * kernel they are not (that space is reserved). * -- On a 64-bit kernel, bytes 8-15 store a 64-bit "FPU IP" address, * but on a 32-bit kernel they store "FPU IP/CS". Likewise, * bytes 16-23 store "FPU DP" or "FPU DP/DS". * We basically ignore these differences. If gdb requests 32-bit-specific * registers, we return them, assuming that the data there is valid. * * XSAVE/XSAVE64 have extra information after the first 512 bytes, which we * currently save and restore but do not otherwise use. If the data record * has more than 512 bytes then it's an XSAVE(64) area, otherwise it's just * the FXSAVE(64) area. * * The data always uses our CPU's native XSAVE layout. When reading a trace, * we need to convert from the trace's CPU's XSAVE layout to our layout. */ XSAVE, /** * Stores the content of the NT_FPREGS regset. The format depends on the * architecture. It is given by Arch::user_fpregs_struct for the appropriate * architecture. */ NT_FPR }; // Set values from raw data, with the given XSAVE layout. Returns false // if this could not be done. bool set_to_raw_data(SupportedArch a, Format format, const uint8_t* data, size_t data_size, const XSaveLayout& layout); Format format() const { return format_; } SupportedArch arch() const { return arch_; } const std::vector data() const { return data_; } int data_size() const { return data_.size(); } const uint8_t* data_bytes() const { return data_.data(); } bool empty() const { return data_.empty(); } /** * Read XSAVE `xinuse` field */ uint64_t read_xinuse(bool* defined) const; /** * Read FIP field */ uint64_t read_fip(bool* defined) const; /** * Read FOP field */ uint16_t read_fop(bool* defined) const; /** * Read MXCSR field */ uint32_t read_mxcsr(bool* defined) const; /** * Clear FIP and FDP registers if they're present. * Returns true if the registers changed. */ bool clear_fip_fdp(); /** * Like |Registers::read_register()|, except attempts to read * the value of an "extra register" (floating point / vector). */ size_t read_register(uint8_t* buf, GdbRegister regno, bool* defined) const; /** * Like |Registers::write_register()|, except attempts to write * the value of an "extra register" (floating point / vector). */ bool write_register(GdbRegister regno, const void* value, size_t value_size); /** * Get a user_fpregs_struct for a particular Arch from these ExtraRegisters. */ std::vector get_user_fpregs_struct(SupportedArch arch) const; /** * Update registers from a user_fpregs_struct. */ void set_user_fpregs_struct(Task* t, SupportedArch arch, void* data, size_t size); /** * Get a user_fpxregs_struct for from these ExtraRegisters. */ X86Arch::user_fpxregs_struct get_user_fpxregs_struct() const; /** * Update registers from a user_fpxregs_struct. */ void set_user_fpxregs_struct(Task* t, const X86Arch::user_fpxregs_struct& regs); void print_register_file_compact(FILE* f) const; /** * Reset to post-exec initial state */ void reset(); void validate(Task* t); /** * Return true if |reg1| matches |reg2|. Passing EXPECT_MISMATCHES * indicates that the caller is using this as a general register * compare and nothing special should be done if the register files * mismatch. Passing LOG_MISMATCHES will log the registers that don't * match. Passing BAIL_ON_MISMATCH will additionally abort on * mismatch. * This is conservative; we only return false if we have enough * information to verify that the registers definitely don't match. * The register files must have the same arch. */ Registers::Comparison compare_with(const ExtraRegisters& reg2) const { Registers::Comparison result; compare_internal(reg2, result); return result; } bool matches(const ExtraRegisters& reg2) const { Registers::Comparison result; result.store_mismatches = false; compare_internal(reg2, result); return !result.mismatch_count; } private: friend class Task; void compare_internal(const ExtraRegisters& reg2, Registers::Comparison& result) const; Format format_; SupportedArch arch_; std::vector data_; }; } // namespace rr #endif /* RR_EXTRA_REGISTERS_H_ */ rr-5.7.0/src/FdTable.cc000066400000000000000000000166071450675474200145750ustar00rootroot00000000000000/* -*- Mode: C++; tab-width: 8; c-basic-offset: 2; indent-tabs-mode: nil; -*- */ #include "FdTable.h" #include #include #include #include "rr/rr.h" #include "AddressSpace.h" #include "RecordTask.h" #include "ReplayTask.h" #include "Session.h" #include "core.h" #include "log.h" using namespace std; namespace rr { FdTable::shr_ptr FdTable::create(Task* t) { shr_ptr fds(new FdTable(t->session().syscallbuf_fds_disabled_size())); fds->insert_task(t); return fds; } void FdTable::insert_task(Task* t) { HasTaskSet::insert_task(t); ++vms[t->vm().get()]; } void FdTable::erase_task(Task* t) { if (task_set().find(t) == task_set().end()) { return; } HasTaskSet::erase_task(t); auto it = vms.find(t->vm().get()); if (it == vms.end()) { FATAL() << "Lost track of VM already?"; } --it->second; if (!it->second) { vms.erase(it); } } void FdTable::add_monitor(Task* t, int fd, FileMonitor* monitor) { // In the future we could support multiple monitors on an fd, but we don't // need to yet. FileMonitor* current = get_monitor(fd); if (current) { ASSERT(t, false) << "Task " << t->rec_tid << " already monitoring fd " << fd << " " << file_monitor_type_name(current->type()); } if (fd >= syscallbuf_fds_disabled_size && fds.count(fd) == 0) { fd_count_beyond_limit++; } fds[fd] = FileMonitor::shr_ptr(monitor); update_syscallbuf_fds_disabled(fd); } void FdTable::replace_monitor(Task* t, int fd, FileMonitor* monitor) { if (!is_monitoring(fd)) { add_monitor(t, fd, monitor); } else { fds[fd] = FileMonitor::shr_ptr(monitor); } } bool FdTable::is_rr_fd(int fd) { auto it = fds.find(fd); if (it == fds.end()) { return false; } return it->second->is_rr_fd(); } bool FdTable::emulate_ioctl(int fd, RecordTask* t, uint64_t* result) { auto it = fds.find(fd); if (it == fds.end()) { return false; } return it->second->emulate_ioctl(t, result); } bool FdTable::emulate_fcntl(int fd, RecordTask* t, uint64_t* result) { auto it = fds.find(fd); if (it == fds.end()) { return false; } return it->second->emulate_fcntl(t, result); } bool FdTable::emulate_read(int fd, RecordTask* t, const std::vector& ranges, FileMonitor::LazyOffset& offset, uint64_t* result) { auto it = fds.find(fd); if (it == fds.end()) { return false; } return it->second->emulate_read(t, ranges, offset, result); } void FdTable::filter_getdents(int fd, RecordTask* t) { auto it = fds.find(fd); if (it == fds.end()) { return; } it->second->filter_getdents(t); } Switchable FdTable::will_write(Task* t, int fd) { auto it = fds.find(fd); if (it == fds.end()) { return ALLOW_SWITCH; } return it->second->will_write(t); } void FdTable::did_write(Task* t, int fd, const std::vector& ranges, FileMonitor::LazyOffset& offset) { auto it = fds.find(fd); if (it != fds.end()) { it->second->did_write(t, ranges, offset); } } void FdTable::did_dup(FdTable* table, int from, int to) { if (table->fds.count(from)) { if (to >= syscallbuf_fds_disabled_size && fds.count(to) == 0) { fd_count_beyond_limit++; } fds[to] = table->fds[from]; } else { if (to >= syscallbuf_fds_disabled_size && fds.count(to) > 0) { fd_count_beyond_limit--; } fds.erase(to); } update_syscallbuf_fds_disabled(to); } void FdTable::did_close(int fd) { LOG(debug) << "Close fd " << fd; if (fd >= syscallbuf_fds_disabled_size && fds.count(fd) > 0) { fd_count_beyond_limit--; } fds.erase(fd); update_syscallbuf_fds_disabled(fd); } FileMonitor* FdTable::get_monitor(int fd) { auto it = fds.find(fd); if (it == fds.end()) { return nullptr; } return it->second.get(); } static syscallbuf_fd_classes join_fd_classes_over_tasks(AddressSpace* vm, int fd, int syscallbuf_fds_disabled_size) { syscallbuf_fd_classes cls = FD_CLASS_UNTRACED; for (Task* t : vm->task_set()) { auto table = t->fd_table(); if (table->is_monitoring(fd)) { if (cls != FD_CLASS_UNTRACED) { return FD_CLASS_TRACED; } cls = table->get_monitor(fd)->get_syscallbuf_class(); } else if (fd >= syscallbuf_fds_disabled_size - 1 && table->count_beyond_limit() > 0) { return FD_CLASS_TRACED; } } return cls; } void FdTable::update_syscallbuf_fds_disabled(int fd) { DEBUG_ASSERT(fd >= 0); DEBUG_ASSERT(task_set().size() > 0); // It's possible for tasks with different VMs to share this fd table. // But tasks with the same VM might have different fd tables... for (auto address_space : vms) { RecordTask* rt = nullptr; if (address_space.first->task_set().empty()) { FATAL() << "Address space must have at least one task"; } for (Task* t : address_space.first->task_set()) { if (!t->session().is_recording()) { // We could return but we want to check that all our // AddressSpaces have tasks (i.e. aren't dead/dangling) break; } rt = static_cast(t); if (!rt->already_exited()) { break; } rt = nullptr; } if (rt && !rt->preload_globals.is_null()) { if (fd >= syscallbuf_fds_disabled_size) { fd = syscallbuf_fds_disabled_size - 1; } char disable = (char)join_fd_classes_over_tasks(address_space.first, fd, syscallbuf_fds_disabled_size); auto addr = REMOTE_PTR_FIELD(rt->preload_globals, syscallbuf_fd_class[0]) + fd; rt->write_mem(addr, disable); rt->record_local(addr, &disable); } } } void FdTable::init_syscallbuf_fds_disabled(Task* t) { if (!t->session().is_recording()) { return; } RecordTask* rt = static_cast(t); ASSERT(rt, has_task(rt)); if (rt->preload_globals.is_null()) { return; } char disabled[syscallbuf_fds_disabled_size]; memset(disabled, 0, sizeof(disabled)); // It's possible that some tasks in this address space have a different // FdTable. We need to disable syscallbuf for an fd if any tasks for this // address space are monitoring the fd. for (Task* vm_t : rt->vm()->task_set()) { for (auto& it : vm_t->fd_table()->fds) { int fd = it.first; DEBUG_ASSERT(fd >= 0); if (fd >= syscallbuf_fds_disabled_size) { fd = syscallbuf_fds_disabled_size - 1; } if (disabled[fd] == FD_CLASS_UNTRACED) { disabled[fd] = it.second->get_syscallbuf_class(); } else { disabled[fd] = FD_CLASS_TRACED; } } } auto addr = REMOTE_PTR_FIELD(t->preload_globals, syscallbuf_fd_class[0]); rt->write_mem(addr, disabled, syscallbuf_fds_disabled_size); rt->record_local(addr, disabled, syscallbuf_fds_disabled_size); } void FdTable::close_after_exec(ReplayTask* t, const vector& fds_to_close) { ASSERT(t, has_task(t)); for (auto fd : fds_to_close) { did_close(fd); } } static bool is_fd_open(Task* t, int fd) { char path[PATH_MAX]; sprintf(path, "/proc/%d/fd/%d", t->tid, fd); struct stat st; return 0 == lstat(path, &st); } vector FdTable::fds_to_close_after_exec(RecordTask* t) { ASSERT(t, has_task(t)); vector fds_to_close; for (auto& it : fds) { if (!is_fd_open(t, it.first)) { fds_to_close.push_back(it.first); } } for (auto fd : fds_to_close) { did_close(fd); } return fds_to_close; } } // namespace rr rr-5.7.0/src/FdTable.h000066400000000000000000000071671450675474200144400ustar00rootroot00000000000000/* -*- Mode: C++; tab-width: 8; c-basic-offset: 2; indent-tabs-mode: nil; -*- */ #ifndef RR_FD_TABLE_H_ #define RR_FD_TABLE_H_ #include #include #include #include "FileMonitor.h" #include "HasTaskSet.h" namespace rr { class AddressSpace; class RecordTask; class ReplayTask; class Task; class FdTable final : public HasTaskSet { public: typedef std::shared_ptr shr_ptr; void add_monitor(Task* t, int fd, FileMonitor* monitor); void replace_monitor(Task* t, int fd, FileMonitor* monitor); bool emulate_ioctl(int fd, RecordTask* t, uint64_t* result); bool emulate_fcntl(int fd, RecordTask* t, uint64_t* result); bool emulate_read(int fd, RecordTask* t, const std::vector& ranges, FileMonitor::LazyOffset& offset, uint64_t* result); void filter_getdents(int fd, RecordTask* t); bool is_rr_fd(int fd); Switchable will_write(Task* t, int fd); void did_write(Task* t, int fd, const std::vector& ranges, FileMonitor::LazyOffset& offset); void did_dup(int from, int to) { did_dup(this, from, to); } void did_dup(FdTable* table, int from, int to); void did_close(int fd); shr_ptr clone() const { return shr_ptr(new FdTable(*this)); } static shr_ptr create(Task* t); bool is_monitoring(int fd) const { return fds.count(fd) > 0; } uint32_t count_beyond_limit() const { return fd_count_beyond_limit; } FileMonitor* get_monitor(int fd); /** * Regenerate syscallbuf_fds_disabled in task |t|. * Called during initialization of the preload library. */ void init_syscallbuf_fds_disabled(Task* t); /** * Get list of fds that have been closed after |t| has done an execve. * Rather than tracking CLOEXEC flags (which would be complicated), we just * scan /proc//fd during recording and note any monitored fds that have * been closed. * This also updates our table to match reality. */ std::vector fds_to_close_after_exec(RecordTask* t); /** * Close fds in list after an exec. */ void close_after_exec(ReplayTask* t, const std::vector& fds_to_close); // Used to optimize ReplayTask's find_free_file_descriptor int last_free_fd() const { return last_free_fd_; } void set_last_free_fd(int last_free_fd) { last_free_fd_ = last_free_fd; } void insert_task(Task* t) override; void erase_task(Task* t) override; private: explicit FdTable(uint32_t syscallbuf_fds_disabled_size) : syscallbuf_fds_disabled_size(syscallbuf_fds_disabled_size), fd_count_beyond_limit(0), last_free_fd_(0) {} // Does not call the base-class copy constructor because // we don't want to copy the task set; the new FdTable will // be for new tasks. FdTable(const FdTable& other) : fds(other.fds), syscallbuf_fds_disabled_size(other.syscallbuf_fds_disabled_size), fd_count_beyond_limit(other.fd_count_beyond_limit), last_free_fd_(other.last_free_fd_) {} void update_syscallbuf_fds_disabled(int fd); std::unordered_map fds; std::unordered_map vms; // Currently this is only used during recording, so we could use // SYSCALLBUF_FDS_DISABLED_SIZE directly and not bother tracking it in // the trace header, but to make things less fragile in case we ever need to // know it during replay, we track it here. int syscallbuf_fds_disabled_size; // Number of elements of `fds` that are >= syscallbuf_fds_disabled_size. // Only used during recording. uint32_t fd_count_beyond_limit; // Only used during recording. int last_free_fd_; }; } // namespace rr #endif /* RR_FD_TABLE_H_ */ rr-5.7.0/src/FileMonitor.cc000066400000000000000000000064761450675474200155260ustar00rootroot00000000000000/* -*- Mode: C++; tab-width: 8; c-basic-offset: 2; indent-tabs-mode: nil; -*- */ #include "FileMonitor.h" #include #include #include #include "RecordTask.h" #include "ReplayTask.h" #include "Session.h" #include "log.h" namespace rr { using namespace std; template static bool is_implicit_offset_syscall_arch(int syscallno) { return syscallno == Arch::writev || syscallno == Arch::write || syscallno == Arch::readv || syscallno == Arch::read; } template static bool is_write_syscall_arch(int syscallno) { return syscallno == Arch::writev || syscallno == Arch::write || syscallno == Arch::pwrite64 || syscallno == Arch::pwritev; } static bool is_implicit_offset_syscall(SupportedArch arch, int syscallno) { RR_ARCH_FUNCTION(is_implicit_offset_syscall_arch, arch, syscallno); } template static int64_t retrieve_offset_arch(Task* t, int syscallno, const Registers& regs) { switch (syscallno) { case Arch::pwrite64: case Arch::pwritev: case Arch::pread64: case Arch::preadv: { if (sizeof(typename Arch::unsigned_word) == 4) { return regs.arg4() | (uint64_t(regs.arg5_signed()) << 32); } return regs.arg4_signed(); } case Arch::readv: case Arch::read: case Arch::writev: case Arch::write: { ASSERT(t, t->session().is_recording()) << "Can only read a file descriptor's offset while recording"; int fd = regs.orig_arg1_signed(); int64_t offset = t->fd_offset(fd); return is_write_syscall_arch(syscallno) ? // The pos we just read, was after the write completed. Luckily, we do // know how many bytes were written. offset - regs.syscall_result() : offset; } default: { ASSERT(t, false) << "Can not retrieve offset for this system call."; return -1; } } } static int64_t retrieve_offset(Task* t, int syscallno, const Registers& regs) { RR_ARCH_FUNCTION(retrieve_offset_arch, t->arch(), t, syscallno, regs); } int64_t FileMonitor::LazyOffset::retrieve(bool needed_for_replay) { bool is_replay = t->session().is_replaying(); bool is_implicit_offset = is_implicit_offset_syscall(t->arch(), syscallno); ASSERT(t, needed_for_replay || !is_replay); // There is no way we can figure out this information now, so retrieve it // from the trace (we record it below under the same circumstance). if (is_replay && is_implicit_offset) { return static_cast(t) ->current_trace_frame() .event() .Syscall() .write_offset; } int64_t offset = retrieve_offset(t, syscallno, regs); if (needed_for_replay && is_implicit_offset) { static_cast(t)->ev().Syscall().write_offset = offset; } return offset; } #define CASE(v) case FileMonitor::v: return #v std::string file_monitor_type_name(FileMonitor::Type t) { switch (t) { CASE(Base); CASE(MagicSaveData); CASE(Mmapped); CASE(Preserve); CASE(ProcFd); CASE(ProcMem); CASE(Stdio); CASE(VirtualPerfCounter); CASE(NonvirtualPerfCounter); CASE(SysCpu); CASE(ProcStat); CASE(RRPage); CASE(ODirect); CASE(BpfMap); CASE(PidFd); default: FATAL() << "Unknown type " << (int)t; return ""; } } } rr-5.7.0/src/FileMonitor.h000066400000000000000000000072471450675474200153650ustar00rootroot00000000000000/* -*- Mode: C++; tab-width: 8; c-basic-offset: 2; indent-tabs-mode: nil; -*- */ #ifndef RR_FILE_MONITOR_H_ #define RR_FILE_MONITOR_H_ class Task; #include #include #include #include #include "preload/preload_interface.h" #include "util.h" namespace rr { class RecordTask; class Registers; class FileMonitor { public: typedef std::shared_ptr shr_ptr; virtual ~FileMonitor() {} enum Type { Base, MagicSaveData, Mmapped, Preserve, ProcFd, ProcMem, Stdio, VirtualPerfCounter, NonvirtualPerfCounter, SysCpu, ProcStat, RRPage, ODirect, BpfMap, PidFd, }; virtual Type type() { return Base; } /** * Overriding this to return true will cause close() (and related fd-smashing * operations such as dup2) to return EBADF, and hide it from the tracee's * /proc/pid/fd/ */ virtual bool is_rr_fd() { return false; } /** * Notification that task |t| is about to write |data| bytes of length * |length| to the file. * In general writes can block, and concurrent blocking writes to the same * file may race so that the kernel performs writes out of order * with respect to will_write notifications. * If it is known that the write cannot block (or that blocking all of rr * on it is OK), this notification can return PREVENT_SWITCH to make the * write a blocking write. This ensures that writes are performed in the order * of will_write notifications. */ virtual Switchable will_write(Task*) { return ALLOW_SWITCH; } /** * Notification that task |t| wrote to the file descriptor. * Due to races, if will_write did not return PREVENT_SWITCH, it's possible * that the data in the buffers is not what was actually written. */ struct Range { remote_ptr data; size_t length; Range(remote_ptr data, size_t length) : data(data), length(length) {} }; /** * Encapsulates the offset at which to read or write. Computing this may be * an expensive operation if the offset is implicit (i.e. is taken from the * file descriptor), so we only do it if we actually need to look at the * offset. */ class LazyOffset { public: LazyOffset(Task* t, const Registers& regs, int64_t syscallno) : t(t), regs(regs), syscallno(syscallno) {} int64_t retrieve(bool needed_for_replay); private: Task* t; const Registers& regs; int64_t syscallno; }; virtual void did_write(Task*, const std::vector&, LazyOffset&) {} /** * Return true if the ioctl should be fully emulated. If so the result * is stored in the last parameter. * Only called during recording. */ virtual bool emulate_ioctl(RecordTask*, uint64_t*) { return false; } /** * Return true if the fcntl should be fully emulated. If so the * result is stored in the last parameter. * Only called during recording. */ virtual bool emulate_fcntl(RecordTask*, uint64_t*) { return false; } /** * Return true if the read should be fully emulated. If so the * result is stored in the last parameter. The emulation should write to the * task's memory ranges. * Only called during recording. */ virtual bool emulate_read(RecordTask*, const std::vector&, LazyOffset&, uint64_t*) { return false; } /** * Allows the FileMonitor to rewrite the output of a getdents/getdents64 call * if desired. */ virtual void filter_getdents(RecordTask*) {} virtual enum syscallbuf_fd_classes get_syscallbuf_class() { return FD_CLASS_TRACED; } }; std::string file_monitor_type_name(FileMonitor::Type t); } // namespace rr #endif /* RR_FILE_MONITOR_H_ */ rr-5.7.0/src/FileNameCommand.cc000066400000000000000000000042571450675474200162510ustar00rootroot00000000000000/* -*- Mode: C++; tab-width: 8; c-basic-offset: 2; indent-tabs-mode: nil; -*- */ #include #include "AddressSpace.h" #include "Command.h" #include "TraceStream.h" #include "core.h" #include "main.h" #include "util.h" using namespace std; namespace rr { class FileNameCommand : public Command { public: virtual int run(vector& args) override; protected: FileNameCommand(const char* name, const char* help) : Command(name, help) {} bool parse_file_name(vector& args, string* out); static FileNameCommand singleton; }; FileNameCommand FileNameCommand::singleton( "filename", " rr filename \n" " Prints the original filename for a given trace file name.\n"); static void print_original_file_name(const string& trace_dir, const string& file_name, FILE* out) { TraceReader trace(trace_dir); unordered_set original_files; string full_file_name = trace.dir() + "/" + file_name; while (true) { TraceReader::MappedData data; bool found; KernelMapping km = trace.read_mapped_region( &data, &found, TraceReader::VALIDATE, TraceReader::ANY_TIME); if (!found) { break; } if (data.source == TraceReader::SOURCE_FILE && data.file_name == full_file_name && !km.fsname().empty() && original_files.find(km.fsname()) == original_files.end()) { fprintf(out, "%s\n", km.fsname().c_str()); original_files.insert(km.fsname()); } } } bool FileNameCommand::parse_file_name(vector& args, string* out) { if (args.empty() || !verify_not_option(args)) { return false; } *out = args[0]; args.erase(args.begin()); return true; } int FileNameCommand::run(vector& args) { string file_name; if (!parse_file_name(args, &file_name) || !args.empty()) { print_help(stderr); return 1; } string trace_dir; size_t last_slash = file_name.rfind('/'); if (last_slash == string::npos) { trace_dir = "."; } else { trace_dir = file_name.substr(0, last_slash); file_name = file_name.substr(last_slash + 1); } print_original_file_name(trace_dir, file_name, stdout); return 0; } } // namespace rr rr-5.7.0/src/Flags.cc000066400000000000000000000003251450675474200143160ustar00rootroot00000000000000/* -*- Mode: C++; tab-width: 8; c-basic-offset: 2; indent-tabs-mode: nil; -*- */ #include "Flags.h" namespace rr { Flags& Flags::get_for_init() { return singleton; } Flags Flags::singleton; } // namespace rr rr-5.7.0/src/Flags.h000066400000000000000000000051511450675474200141620ustar00rootroot00000000000000/* -*- Mode: C++; tab-width: 8; c-basic-offset: 2; indent-tabs-mode: nil; -*- */ #ifndef RR_FLAGS_H_ #define RR_FLAGS_H_ #include #include #include #include "Ticks.h" #include "TraceFrame.h" namespace rr { /** * Command line arguments for rr */ struct Flags { enum { CHECKSUM_NONE = -3, CHECKSUM_SYSCALL = -2, CHECKSUM_ALL = -1 }; /* When to generate or check memory checksums. One of CHECKSUM_NONE, * CHECKSUM_SYSCALL or CHECKSUM_ALL, or a positive integer representing the * event time at which to start checksumming. */ FrameTime checksum; enum { DUMP_ON_ALL = 10000, DUMP_ON_RDTSC = 10001, DUMP_ON_NONE = -DUMP_ON_ALL }; int dump_on; enum { DUMP_AT_NONE = -1 }; /* time at which to create memory dump */ FrameTime dump_at; // global time // Force rr to do some things that it otherwise wouldn't, for // example launching an emergency debugger when the output // doesn't seem to be a tty. bool force_things; // Force rr to assume that the terminal is non-interactive, disabling e.g. // the interactive emergency debugger. If used with --force-things, this // option prevails. bool non_interactive; /* Mark the trace global time along with tracee writes to * stdio. */ bool mark_stdio; // Check that cached mmaps match /proc/maps after each event. bool check_cached_mmaps; // Suppress warnings related to environmental features outside rr's // control. bool suppress_environment_warnings; // Any warning or error that would be printed is treated as fatal bool fatal_errors_and_warnings; // Pretend CPUID faulting support doesn't exist bool disable_cpuid_faulting; // Don't listen for PTRACE_EVENT_EXIT events, to test how rr handles // missing PTRACE_EVENT_EXITs. bool disable_ptrace_exit_events; // User override for architecture detection, e.g. when running // under valgrind. std::string forced_uarch; // User override for the path to page files and other resources. std::string resource_path; Flags() : checksum(CHECKSUM_NONE), dump_on(DUMP_ON_NONE), dump_at(DUMP_AT_NONE), force_things(false), mark_stdio(false), check_cached_mmaps(false), suppress_environment_warnings(false), fatal_errors_and_warnings(false), disable_cpuid_faulting(false), disable_ptrace_exit_events(false) {} static const Flags& get() { return singleton; } /** * Get a reference that can be used to initialize the global Flags. */ static Flags& get_for_init(); private: static Flags singleton; }; } // namespace rr #endif /* RR_FLAGS_H_ */ rr-5.7.0/src/GdbCommand.cc000066400000000000000000000134571450675474200152670ustar00rootroot00000000000000/* -*- Mode: C++; tab-width: 8; c-basic-offset: 2; indent-tabs-mode: nil; -*- */ #include "GdbCommand.h" #include "ReplayTask.h" #include "log.h" using namespace std; namespace rr { static SimpleGdbCommand elapsed_time( "elapsed-time", "Print elapsed time (in seconds) since the start of the trace, in the" " 'record' timeline.", [](GdbServer&, Task* t, const vector&) { if (!t->session().is_replaying()) { return GdbCommandHandler::cmd_end_diversion(); } ReplayTask* replay_t = static_cast(t); double elapsed_time = replay_t->current_trace_frame().monotonic_time() - replay_t->session().get_trace_start_time(); return string("Elapsed Time (s): ") + to_string(elapsed_time); }); static SimpleGdbCommand when( "when", "Print the current rr event number.", [](GdbServer&, Task* t, const vector&) { if (!t->session().is_replaying()) { return GdbCommandHandler::cmd_end_diversion(); } return string("Current event: ") + to_string( static_cast(t)->current_trace_frame().time()); }); static SimpleGdbCommand when_ticks( "when-ticks", "Print the current rr tick count for the current thread.", [](GdbServer&, Task* t, const vector&) { if (!t->session().is_replaying()) { return GdbCommandHandler::cmd_end_diversion(); } return string("Current tick: ") + to_string(t->tick_count()); }); static SimpleGdbCommand when_tid( "when-tid", "Print the real tid for the current thread.", [](GdbServer&, Task* t, const vector&) { if (!t->session().is_replaying()) { return GdbCommandHandler::cmd_end_diversion(); } return string("Current tid: ") + to_string(t->tid); }); static std::vector back_stack; static ReplayTimeline::Mark current_history_cp; static std::vector forward_stack; static SimpleGdbCommand rr_history_push( "rr-history-push", "Push an entry into the rr history.", [](GdbServer& gdb_server, Task* t, const vector&) { if (!t->session().is_replaying()) { // Don't create new history state inside a diversion return string(); } if (current_history_cp) { back_stack.push_back(current_history_cp); } current_history_cp = gdb_server.get_timeline().mark(); forward_stack.clear(); return string(); }); static SimpleGdbCommand back( "back", "Go back one entry in the rr history.", [](GdbServer& gdb_server, Task* t, const vector&) { if (!t->session().is_replaying()) { return GdbCommandHandler::cmd_end_diversion(); } if (back_stack.size() == 0) { return string("Can't go back. No more history entries."); } forward_stack.push_back(current_history_cp); current_history_cp = back_stack.back(); back_stack.pop_back(); gdb_server.get_timeline().seek_to_mark(current_history_cp); return string(); }); static SimpleGdbCommand forward( "forward", "Go forward one entry in the rr history.", [](GdbServer& gdb_server, Task* t, const vector&) { if (!t->session().is_replaying()) { return GdbCommandHandler::cmd_end_diversion(); } if (forward_stack.size() == 0) { return string("Can't go forward. No more history entries."); } back_stack.push_back(current_history_cp); current_history_cp = forward_stack.back(); forward_stack.pop_back(); gdb_server.get_timeline().seek_to_mark(current_history_cp); return string(); }); static int gNextCheckpointId = 0; string invoke_checkpoint(GdbServer& gdb_server, Task*, const vector& args) { const string& where = args[1]; int checkpoint_id = ++gNextCheckpointId; GdbServer::Checkpoint::Explicit e; if (gdb_server.timeline.can_add_checkpoint()) { e = GdbServer::Checkpoint::EXPLICIT; } else { e = GdbServer::Checkpoint::NOT_EXPLICIT; } gdb_server.checkpoints[checkpoint_id] = GdbServer::Checkpoint( gdb_server.timeline, gdb_server.last_continue_tuid, e, where); return string("Checkpoint ") + to_string(checkpoint_id) + " at " + where; } static SimpleGdbCommand checkpoint( "checkpoint", "create a checkpoint representing a point in the execution\n" "use the 'restart' command to return to the checkpoint", invoke_checkpoint); string invoke_delete_checkpoint(GdbServer& gdb_server, Task*, const vector& args) { int id = stoi(args[1]); auto it = gdb_server.checkpoints.find(id); if (it != gdb_server.checkpoints.end()) { if (it->second.is_explicit == GdbServer::Checkpoint::EXPLICIT) { gdb_server.timeline.remove_explicit_checkpoint(it->second.mark); } gdb_server.checkpoints.erase(it); return string("Deleted checkpoint ") + to_string(id) + "."; } else { return string("No checkpoint number ") + to_string(id) + "."; } } static SimpleGdbCommand delete_checkpoint( "delete checkpoint", "remove a checkpoint created with the 'checkpoint' command", invoke_delete_checkpoint); string invoke_info_checkpoints(GdbServer& gdb_server, Task*, const vector&) { if (gdb_server.checkpoints.size() == 0) { return "No checkpoints."; } string out = "ID\tWhen\tWhere"; for (auto& c : gdb_server.checkpoints) { out += string("\n") + to_string(c.first) + "\t" + to_string(c.second.mark.time()) + "\t" + c.second.where; } return out; } static SimpleGdbCommand info_checkpoints( "info checkpoints", "list all checkpoints created with the 'checkpoint' command", invoke_info_checkpoints); /*static*/ void GdbCommand::init_auto_args() { checkpoint.add_auto_arg("rr-where"); } } // namespace rr rr-5.7.0/src/GdbCommand.h000066400000000000000000000041751450675474200151260ustar00rootroot00000000000000/* -*- Mode: C++; tab-width: 8; c-basic-offset: 2; indent-tabs-mode: nil; -*- */ #ifndef RR_GDB_COMMAND_H_ #define RR_GDB_COMMAND_H_ #include "GdbCommandHandler.h" #include "GdbServer.h" #include #include #include namespace rr { class GdbCommand { protected: GdbCommand(const std::string& cmd_name, const std::string& documentation) : cmd_name(cmd_name), documentation(documentation) { GdbCommandHandler::register_command(*this); } public: virtual ~GdbCommand() {} const std::string& name() const { return cmd_name; } const std::string& docs() const { return documentation; } /** * Handle the RR Cmd and return a string response to be echo'd * to the user. * * NOTE: args[0] is the command name */ virtual std::string invoke(GdbServer& gdb_server, Task* t, const std::vector& args) = 0; /** * When called, gdb will automatically run gdb.execute() on this string and * pass it as an argument to the rr command. This is useful to pass gdb * state alongside the command invocation. */ void add_auto_arg(const std::string& auto_arg) { cmd_auto_args.push_back(auto_arg); } const std::vector& auto_args() const { return cmd_auto_args; } /** * Setup all the automatic auto_args for our commands. */ static void init_auto_args(); private: const std::string cmd_name; const std::string documentation; std::vector cmd_auto_args; }; class SimpleGdbCommand : public GdbCommand { public: SimpleGdbCommand( const std::string& cmd_name, const std::string& documentation, const std::function&)>& invoker) : GdbCommand(cmd_name, documentation), invoker(invoker) {} virtual std::string invoke(GdbServer& gdb_server, Task* t, const std::vector& args) override { return invoker(gdb_server, t, args); } std::function&)> invoker; }; } // namespace rr #endif rr-5.7.0/src/GdbCommandHandler.cc000066400000000000000000000170401450675474200165550ustar00rootroot00000000000000/* -*- Mode: C++; tab-width: 8; c-basic-offset: 2; indent-tabs-mode: nil; -*- */ #include "GdbCommandHandler.h" #include "GdbCommand.h" #include "log.h" #include #include using namespace std; namespace rr { // HashMap would be better here but the unordered_map API is annoying // and linear search is fine. static vector* gdb_command_list; static string gdb_macro_binding(const GdbCommand& cmd) { string auto_args_str = "["; for (size_t i = 0; i < cmd.auto_args().size(); i++) { if (i > 0) { auto_args_str += ", "; } auto_args_str += "'" + cmd.auto_args()[i] + "'"; } auto_args_str += "]"; string ret = "python RRCmd('" + cmd.name() + "', " + auto_args_str + ")\n"; if (!cmd.docs().empty()) { ret += "document " + cmd.name() + "\n" + cmd.docs() + "\nend\n"; } return ret; } /* static */ string GdbCommandHandler::gdb_macros() { GdbCommand::init_auto_args(); stringstream ss; ss << string(R"Delimiter( set python print-stack full python import re def gdb_unescape(string): str_len = len(string) if str_len % 2: # check for unexpected string length return "" result = bytearray() try: pos = 0 while pos < str_len: hex_char = string[pos:pos+2] result.append(int(hex_char, 16)) pos += 2 except: # check for unexpected string value return "" return result.decode('utf-8') def gdb_escape(string): result = "" for curr_char in string.encode('utf-8'): result += format(curr_char, '02x') return result class RRWhere(gdb.Command): """Helper to get the location for checkpoints/history. Used by auto-args""" def __init__(self): gdb.Command.__init__(self, 'rr-where', gdb.COMMAND_USER, gdb.COMPLETE_NONE, False) def invoke(self, arg, from_tty): #Get the symbol name from 'frame 0' in the format: # '#0 0x00007f9d81a04c46 in _dl_start (arg=0x7ffee1f1c740) at rtld.c:356 # 356 in rtld.c' try: rv = gdb.execute('frame 0', to_string=True) except: rv = "???" # This may occurs if we're not running m = re.match("#0\w*(.*)", rv); if m: rv = m.group(1) else: rv = rv + "???" gdb.write(rv) RRWhere() class RRDenied(gdb.Command): """Helper to prevent use of breaking commands. Used by auto-args""" def __init__(self): gdb.Command.__init__(self, 'rr-denied', gdb.COMMAND_USER, gdb.COMPLETE_NONE, False) def invoke(self, arg, from_tty): raise gdb.GdbError("Execution of '" + arg + "' is not possible in recorded executions.") RRDenied() class RRCmd(gdb.Command): def __init__(self, name, auto_args): gdb.Command.__init__(self, name, gdb.COMMAND_USER, gdb.COMPLETE_NONE, False) self.cmd_name = name self.auto_args = auto_args def invoke(self, arg, from_tty): args = gdb.string_to_argv(arg) self.rr_cmd(args) def rr_cmd(self, args): cmd_prefix = "maint packet qRRCmd:" + gdb_escape(self.cmd_name) argStr = "" for auto_arg in self.auto_args: argStr += ":" + gdb_escape(gdb.execute(auto_arg, to_string=True)) for arg in args: argStr += ":" + gdb_escape(arg) rv = gdb.execute(cmd_prefix + argStr, to_string=True); rv_match = re.search('received: "(.*)"', rv, re.MULTILINE); if not rv_match: gdb.write("Response error: " + rv) return response = gdb_unescape(rv_match.group(1)) if response != '\n': gdb.write(response) def history_push(p): # ensure any output (e.g. produced by breakpoint commands running during our # processing, that were triggered by the stop we've been notified for) # is echoed as normal. gdb.execute("rr-history-push") rr_suppress_run_hook = False class RRHookRun(gdb.Command): def __init__(self): gdb.Command.__init__(self, 'rr-hook-run', gdb.COMMAND_USER, gdb.COMPLETE_NONE, False) def invoke(self, arg, from_tty): thread = int(gdb.parse_and_eval("$_thread")) if thread != 0 and not rr_suppress_run_hook: gdb.execute("stepi") class RRSetSuppressRunHook(gdb.Command): def __init__(self): gdb.Command.__init__(self, 'rr-set-suppress-run-hook', gdb.COMMAND_USER, gdb.COMPLETE_NONE, False) def invoke(self, arg, from_tty): rr_suppress_run_hook = arg == '1' RRHookRun() RRSetSuppressRunHook() #Automatically push an history entry when the program execution stops #(signal, breakpoint).This is fired before an interactive prompt is shown. #Disabled for now since it's not fully working. gdb.events.stop.connect(history_push) end )Delimiter"); if (gdb_command_list) { for (auto& it : *gdb_command_list) { ss << gdb_macro_binding(*it); } } ss << string(R"Delimiter( define hookpost-back maintenance flush register-cache frame end define hookpost-forward maintenance flush register-cache frame end )Delimiter"); return ss.str(); } /*static*/ GdbCommand* GdbCommandHandler::command_for_name(const string& name) { if (!gdb_command_list) { return nullptr; } for (auto& it : *gdb_command_list) { if (it->name() == name) { return it; } } return nullptr; } void GdbCommandHandler::register_command(GdbCommand& cmd) { LOG(debug) << "registering command: " << cmd.name(); if (!gdb_command_list) { gdb_command_list = new vector(); } gdb_command_list->push_back(&cmd); } // applies the simplest two hex character by byte encoding static string gdb_escape(const string& str) { stringstream ss; ss << hex; const size_t len = str.size(); const char *data = str.data(); for (size_t i = 0; i < len; i++) { int chr = (uint8_t)data[i]; if (chr < 16) { ss << "0"; } ss << chr; } return ss.str(); } // undo the two hex character byte encoding, // in case of error returns an empty string static string gdb_unescape(const string& str) { const size_t len = str.size(); // check for unexpected string length if (len % 2) { return ""; } stringstream ss; for (size_t i = 0; i < len; i += 2) { string substr = str.substr(i, 2); const char *hex_str = substr.c_str(); char *ptr = nullptr; ss << (char)strtoul(hex_str, &ptr, 16); // check for unexpected character if (*ptr) { return ""; } } return ss.str(); } static vector parse_cmd(string& str) { vector args; size_t pos = 0; string delimiter = ":"; while ((pos = str.find(delimiter)) != string::npos) { args.push_back(gdb_unescape(str.substr(0, pos))); str.erase(0, pos + delimiter.length()); } args.push_back(gdb_unescape(str)); return args; } /* static */ string GdbCommandHandler::process_command(GdbServer& gdb_server, Task* t, string payload) { const vector args = parse_cmd(payload); GdbCommand* cmd = command_for_name(args[0]); if (!cmd) { return gdb_escape(string() + "Command '" + args[0] + "' not found.\n"); } LOG(debug) << "invoking command: " << cmd->name(); string resp = cmd->invoke(gdb_server, t, args); if (resp == GdbCommandHandler::cmd_end_diversion()) { LOG(debug) << "cmd must run outside of diversion (" << resp << ")"; return resp; } LOG(debug) << "cmd response: " << resp; return gdb_escape(resp + "\n"); } } // namespace rr rr-5.7.0/src/GdbCommandHandler.h000066400000000000000000000020541450675474200164160ustar00rootroot00000000000000/* -*- Mode: C++; tab-width: 8; c-basic-offset: 2; indent-tabs-mode: nil; -*- */ #ifndef RR_GDB_COMMAND_HANDLER_H_ #define RR_GDB_COMMAND_HANDLER_H_ #include namespace rr { class GdbCommand; class GdbServer; class Task; class GdbCommandHandler { public: // Declare any registered command with supporting // wrapper code. static std::string gdb_macros(); static void register_command(GdbCommand& cmd); /** * Process an incoming GDB payload of the following form: * :::... * * NOTE: RR Command are typically sent with the qRRCmd: prefix which * should of been striped already. */ static std::string process_command(GdbServer& gdb_server, Task* t, std::string payload); static GdbCommand* command_for_name(const std::string& name); /** * Special return value for commands that immediatly end a diversion session */ static std::string cmd_end_diversion() { return std::string("RRCmd_EndDiversion"); } private: }; } // namespace rr #endif rr-5.7.0/src/GdbConnection.cc000066400000000000000000001461731450675474200160120ustar00rootroot00000000000000/* -*- Mode: C++; tab-width: 8; c-basic-offset: 2; indent-tabs-mode: nil; -*- */ #define REVERSE_EXECUTION /** * Much of this implementation is based on the documentation at * * http://sourceware.org/gdb/onlinedocs/gdb/Packets.html */ #include "GdbConnection.h" #include #include #include #include #include #include #include #include #include #include #include #include #include #include "GdbCommandHandler.h" #include "ReplaySession.h" #include "ScopedFd.h" #include "core.h" #include "log.h" using namespace std; namespace rr { static const char INTERRUPT_CHAR = '\x03'; #define UNHANDLED_REQ() \ write_packet(""); \ LOG(info) const GdbThreadId GdbThreadId::ANY(0, 0); const GdbThreadId GdbThreadId::ALL(-1, -1); #ifdef DEBUG static bool request_needs_immediate_response(const GdbRequest* req) { switch (req->type) { case DREQ_NONE: case DREQ_CONT: return false; default: return true; } } #endif GdbConnection::GdbConnection(pid_t tgid, const Features& features) : tgid(tgid), cpu_features_(0), no_ack(false), features_(features), connection_alive_(true) { #ifndef REVERSE_EXECUTION features_.reverse_execution = false; #endif } void GdbConnection::await_debugger(ScopedFd& listen_fd) { sock_fd = ScopedFd(accept(listen_fd, nullptr, nullptr)); // We might restart this debugging session, so don't set the // socket fd CLOEXEC. } /** * Poll for data to or from gdb, waiting |timeoutMs|. 0 means "don't * wait", and -1 means "wait forever". Return true if data is ready. */ static bool poll_socket(const ScopedFd& sock_fd, short events, int timeoutMs) { struct pollfd pfd; memset(&pfd, 0, sizeof(pfd)); pfd.fd = sock_fd; pfd.events = events; int ret = poll(&pfd, 1, timeoutMs); if (ret < 0 && errno != EINTR) { LOG(info) << "gdb socket has been closed"; } return ret > 0; } static bool poll_incoming(const ScopedFd& sock_fd, int timeoutMs) { return poll_socket(sock_fd, POLLIN /* TODO: |POLLERR */, timeoutMs); } static void poll_outgoing(const ScopedFd& sock_fd, int timeoutMs) { poll_socket(sock_fd, POLLOUT /* TODO: |POLLERR */, timeoutMs); } /** * read() incoming data exactly one time, successfully. May block. */ void GdbConnection::read_data_once() { ssize_t nread; /* Wait until there's data, instead of busy-looping on * EAGAIN. */ poll_incoming(sock_fd, -1 /* wait forever */); uint8_t buf[4096]; nread = read(sock_fd, buf, sizeof(buf)); if (nread <= 0) { LOG(info) << "Could not read data from gdb socket, " "marking connection as closed"; connection_alive_ = false; } else { inbuf.insert(inbuf.end(), buf, buf + nread); } } void GdbConnection::write_flush() { size_t write_index = 0; outbuf.push_back(0); LOG(debug) << "write_flush: '" << outbuf.data() << "'"; outbuf.pop_back(); while (write_index < outbuf.size()) { ssize_t nwritten; poll_outgoing(sock_fd, -1 /*wait forever*/); nwritten = write(sock_fd, outbuf.data() + write_index, outbuf.size() - write_index); if (nwritten < 0) { LOG(info) << "Could not write data to gdb socket, " "marking connection as closed"; connection_alive_ = false; outbuf.clear(); return; } else { write_index += nwritten; } } outbuf.clear(); } void GdbConnection::write_data_raw(const uint8_t* data, ssize_t len) { outbuf.insert(outbuf.end(), data, data + len); } void GdbConnection::write_hex(unsigned long hex) { char buf[32]; size_t len; len = snprintf(buf, sizeof(buf) - 1, "%02lx", hex); write_data_raw((uint8_t*)buf, len); } void GdbConnection::write_packet_bytes(const uint8_t* data, size_t num_bytes) { uint8_t checksum; size_t i; write_data_raw((uint8_t*)"$", 1); for (i = 0, checksum = 0; i < num_bytes; ++i) { checksum += data[i]; } write_data_raw((uint8_t*)data, num_bytes); write_data_raw((uint8_t*)"#", 1); write_hex(checksum); } void GdbConnection::write_packet(const char* data) { return write_packet_bytes((const uint8_t*)data, strlen(data)); } void GdbConnection::write_binary_packet(const char* pfx, const uint8_t* data, ssize_t num_bytes) { ssize_t pfx_num_chars = strlen(pfx); vector buf; buf.resize(2 * num_bytes + pfx_num_chars); ssize_t buf_num_bytes = 0; int i; memcpy((char*)buf.data(), pfx, pfx_num_chars); buf_num_bytes += pfx_num_chars; for (i = 0; i < num_bytes; ++i) { uint8_t b = data[i]; if (buf_num_bytes + 2 > ssize_t(buf.size())) { break; } switch (b) { case '#': case '$': case '}': case '*': buf.data()[buf_num_bytes++] = '}'; buf.data()[buf_num_bytes++] = b ^ 0x20; break; default: buf.data()[buf_num_bytes++] = b; break; } } LOG(debug) << " ***** NOTE: writing binary data, upcoming debug output may " "be truncated"; return write_packet_bytes(buf.data(), buf_num_bytes); } void GdbConnection::write_hex_bytes_packet(const char* prefix, const uint8_t* bytes, size_t len) { if (prefix[0] == '\0' && 0 == len) { write_packet(""); return; } ssize_t pfx_num_chars = strlen(prefix); vector buf; buf.resize(pfx_num_chars + 2 * len + 1); memcpy(buf.data(), prefix, pfx_num_chars); for (size_t i = 0; i < len; ++i) { unsigned long b = bytes[i]; snprintf(&buf.data()[pfx_num_chars + 2 * i], 3, "%02lx", b); } write_packet(buf.data()); } void GdbConnection::write_hex_bytes_packet(const uint8_t* bytes, size_t len) { write_hex_bytes_packet("", bytes, len); } static void parser_assert(bool cond) { if (!cond) { fputs("Failed to parse gdb request\n", stderr); DEBUG_ASSERT(false); exit(2); } } static string decode_ascii_encoded_hex_str(const char* encoded) { ssize_t enc_len = strlen(encoded); parser_assert(enc_len % 2 == 0); string str; for (int i = 0; i < enc_len / 2; ++i) { char enc_byte[] = { encoded[2 * i], encoded[2 * i + 1], '\0' }; char* endp; int c = strtol(enc_byte, &endp, 16); parser_assert(c < 128); str += static_cast(c); } return str; } bool GdbConnection::skip_to_packet_start() { ssize_t end = -1; /* XXX we want memcspn() here ... */ for (size_t i = 0; i < inbuf.size(); ++i) { if (inbuf[i] == '$' || inbuf[i] == INTERRUPT_CHAR) { end = i; break; } } if (end < 0) { /* Discard all read bytes, which we don't care * about. */ inbuf.clear(); return false; } /* Discard bytes up to start-of-packet. */ inbuf.erase(inbuf.begin(), inbuf.begin() + end); parser_assert(1 <= inbuf.size()); parser_assert('$' == inbuf[0] || INTERRUPT_CHAR == inbuf[0]); return true; } bool GdbConnection::sniff_packet() { if (skip_to_packet_start()) { /* We've already seen a (possibly partial) packet. */ return true; } parser_assert(inbuf.empty()); return poll_incoming(sock_fd, 0 /*don't wait*/); } void GdbConnection::read_packet() { /* Read and discard bytes until we see the start of a * packet. * * NB: we're ignoring "+/-" responses from gdb. There doesn't * seem to be any sane reason why we would send a damaged * packet to gdb over TCP, then see a "-" reply from gdb and * somehow magically fix our bug that led to the malformed * packet in the first place. */ while (!skip_to_packet_start() && connection_alive_) { read_data_once(); } if (!connection_alive_) { return; } if (inbuf[0] == INTERRUPT_CHAR) { /* Interrupts are kind of an ugly duckling in the gdb * protocol ... */ packetend = 1; return; } /* Read until we see end-of-packet. */ size_t checkedlen = 0; while (true) { uint8_t* p = (uint8_t*)memchr(inbuf.data() + checkedlen, '#', inbuf.size() - checkedlen); if (p) { packetend = p - inbuf.data(); break; } checkedlen = inbuf.size(); read_data_once(); if (!connection_alive_) { return; } } /* NB: we're ignoring the gdb packet checksums here too. If * gdb is corrupted enough to garble a checksum over TCP, it's * not really clear why asking for the packet again might make * the bug go away. */ parser_assert('$' == inbuf[0] && packetend < inbuf.size()); /* Acknowledge receipt of the packet. */ if (!no_ack) { write_data_raw((uint8_t*)"+", 1); write_flush(); } } static void read_binary_data(const uint8_t* payload, const uint8_t* payload_end, vector& data) { data.clear(); while (payload < payload_end) { uint8_t b = *payload++; if ('}' == b) { parser_assert(payload < payload_end); b = 0x20 ^ *payload++; } data.push_back(b); } } /** * Parse and return a gdb thread-id from |str|. |endptr| points to * the character just after the last character in the thread-id. It * may be nullptr. */ static GdbThreadId parse_threadid(const char* str, char** endptr) { GdbThreadId t; char* endp; bool multiprocess = false; if ('p' == *str) { multiprocess = true; ++str; } t.pid = strtol(str, &endp, 16); parser_assert(endp); /* terminators (single process, no PID or TID, depending on 'p' prefix) */ if (*endp == '\0' || *endp == ';' || *endp == ',') { if (multiprocess) { t.tid = -1; } else { t.tid = t.pid; t.pid = -1; } /* multiprocess syntax "." */ } else if (*endp == '.') { str = endp + 1; t.tid = strtol(str, &endp, 16); } parser_assert(*endp == '\0' || *endp == ';' || *endp == ','); *endptr = endp; return t; } void GdbConnection::write_xfer_response(const void* data, size_t size, uint64_t offset, uint64_t len) { if (offset > size) { write_packet("E01"); return; } if (offset == size) { write_packet("l"); return; } if (offset + len < size) { write_binary_packet("m", static_cast(data) + offset, len); return; } write_binary_packet("l", static_cast(data) + offset, size - offset); } static string read_target_desc(const char* file_name) { #ifdef __BIONIC__ const char* share_path = "usr/share/rr/"; #else const char* share_path = "share/rr/"; #endif string path = resource_path() + share_path + string(file_name); stringstream ss; FILE* f = fopen(path.c_str(), "r"); if (f == NULL) { FATAL() << "Failed to load target description file: " << file_name; } while (true) { int ch = getc(f); if (ch == EOF) { break; } ss << (char)ch; } fclose(f); return ss.str(); } static const char* target_description_name(uint32_t cpu_features) { // This doesn't scale, but it's what gdb does... switch (cpu_features) { case 0: return "i386-linux.xml"; case GdbConnection::CPU_X86_64: return "amd64-linux.xml"; case GdbConnection::CPU_AVX: return "i386-avx-linux.xml"; case GdbConnection::CPU_X86_64 | GdbConnection::CPU_AVX: return "amd64-avx-linux.xml"; case GdbConnection::CPU_PKU | GdbConnection::CPU_AVX: return "i386-pkeys-linux.xml"; case GdbConnection::CPU_X86_64 | GdbConnection::CPU_PKU | GdbConnection::CPU_AVX: return "amd64-pkeys-linux.xml"; case GdbConnection::CPU_AARCH64: return "aarch64-core.xml"; default: FATAL() << "Unknown features"; return nullptr; } } bool GdbConnection::xfer(const char* name, char* args) { const char* mode = args; args = strchr(args, ':'); parser_assert(args); *args++ = '\0'; if (strcmp(mode, "read") && strcmp(mode, "write")) { write_packet(""); return false; } const char* annex = args; args = strchr(args, ':'); parser_assert(args); *args++ = '\0'; uint64_t offset = strtoul(args, &args, 16); uint64_t len = 0; if (!strcmp(mode, "read")) { parser_assert(',' == *args++); len = strtoul(args, &args, 16); parser_assert(!*args); } else { parser_assert(*args == ':'); ++args; } LOG(debug) << "gdb asks us to transfer " << name << " mode=" << mode << ", annex=" << annex << ", offset=" << offset << " len=" << len; if (!strcmp(name, "auxv")) { if (strcmp(annex, "")) { write_packet("E00"); return false; } if (strcmp(mode, "read")) { write_packet(""); return false; } req = GdbRequest(DREQ_GET_AUXV); req.target = query_thread; // XXX handle offset/len here! return true; } if (!strcmp(name, "exec-file")) { if (strcmp(mode, "read")) { write_packet(""); return false; } req = GdbRequest(DREQ_GET_EXEC_FILE); req.target.pid = req.target.tid = strtoul(annex, nullptr, 16); // XXX handle offset/len here! return true; } if (!strcmp(name, "siginfo")) { if (strcmp(annex, "")) { write_packet("E00"); return false; } if (!strcmp(mode, "read")) { req = GdbRequest(DREQ_READ_SIGINFO); req.target = query_thread; req.mem().addr = offset; req.mem().len = len; return true; } req = GdbRequest(DREQ_WRITE_SIGINFO); req.target = query_thread; return true; } if (!strcmp(name, "features")) { if (strcmp(mode, "read")) { write_packet(""); return false; } string target_desc = read_target_desc((strcmp(annex, "") && strcmp(annex, "target.xml")) ? annex : target_description_name(cpu_features_)); write_xfer_response(target_desc.c_str(), target_desc.size(), offset, len); return false; } write_packet(""); return false; } /** * Format |value| into |buf| in the manner gdb expects. |buf| must * point at a buffer with at least |1 + 2*DBG_MAX_REG_SIZE| bytes * available. Fewer bytes than that may be written, but |buf| is * guaranteed to be null-terminated. */ static size_t print_reg_value(const GdbRegisterValue& reg, char* buf) { parser_assert(reg.size <= GdbRegisterValue::MAX_SIZE); if (reg.defined) { /* gdb wants the register value in native endianness. * reg.value read in native endianness is exactly that. */ for (size_t i = 0; i < reg.size; ++i) { snprintf(&buf[2 * i], 3, "%02lx", (unsigned long)reg.value[i]); } } else { for (size_t i = 0; i < reg.size; ++i) { strcpy(&buf[2 * i], "xx"); } } return reg.size * 2; } /** * Read the encoded register value in |strp| into |reg|. |strp| may * be mutated. */ static void read_reg_value(char** strp, GdbRegisterValue* reg) { char* str = *strp; if ('x' == str[0]) { reg->defined = false; reg->size = 0; return; } reg->defined = true; reg->size = strlen(str) / 2; for (size_t i = 0; i < reg->size; ++i) { char tmp = str[2]; str[2] = '\0'; reg->value[i] = strtoul(str, &str, 16); parser_assert('\0' == *str); str[0] = tmp; } *strp = str; } bool GdbConnection::query(char* payload) { const char* name; char* args; args = strchr(payload, ':'); if (args) { *args++ = '\0'; } name = payload; if (strstr(name, "RRCmd") == name) { LOG(debug) << "gdb requests rr cmd: " << name; req = GdbRequest(DREQ_RR_CMD); req.text_ = args; return true; } if (!strcmp(name, "C")) { LOG(debug) << "gdb requests current thread ID"; req = GdbRequest(DREQ_GET_CURRENT_THREAD); return true; } if (!strcmp(name, "Attached")) { LOG(debug) << "gdb asks if this is a new or existing process"; /* Tell gdb this is an existing process; it might be * (see emergency_debug()). */ write_packet("1"); return false; } if (!strcmp(name, "fThreadInfo")) { LOG(debug) << "gdb asks for thread list"; req = GdbRequest(DREQ_GET_THREAD_LIST); return true; } if (!strcmp(name, "sThreadInfo")) { write_packet("l"); /* "end of list" */ return false; } if (!strcmp(name, "GetTLSAddr")) { LOG(debug) << "gdb asks for TLS addr"; req = GdbRequest(DREQ_TLS); req.target = parse_threadid(args, &args); parser_assert(*args == ','); ++args; size_t offset = strtoul(args, &args, 16); parser_assert(*args == ','); ++args; remote_ptr load_module = strtoul(args, &args, 16); parser_assert(*args == '\0'); req.tls().offset = offset; req.tls().load_module = load_module; return true; } if (!strcmp(name, "Offsets")) { LOG(debug) << "gdb asks for section offsets"; req = GdbRequest(DREQ_GET_OFFSETS); req.target = query_thread; return true; } if ('P' == name[0]) { /* The docs say not to use this packet ... */ write_packet(""); return false; } if (!strcmp(name, "Supported")) { /* TODO process these */ LOG(debug) << "gdb supports " << args; multiprocess_supported_ = strstr(args, "multiprocess+") != nullptr; hwbreak_supported_ = strstr(args, "hwbreak+") != nullptr; swbreak_supported_ = strstr(args, "swbreak+") != nullptr; stringstream supported; // Encourage gdb to use very large packets since we support any packet size supported << "PacketSize=1048576" ";QStartNoAckMode+" ";qXfer:features:read+" ";qXfer:auxv:read+" ";qXfer:exec-file:read+" ";qXfer:siginfo:read+" ";qXfer:siginfo:write+" ";multiprocess+" ";hwbreak+" ";swbreak+" ";ConditionalBreakpoints+" ";vContSupported+" ";QPassSignals+"; if (features().reverse_execution) { supported << ";ReverseContinue+" ";ReverseStep+"; } write_packet(supported.str().c_str()); return false; } if (!strcmp(name, "Symbol")) { #ifdef PROC_SERVICE_H LOG(debug) << "gdb is ready for symbol lookups"; const char* colon = strchr(args, ':'); parser_assert(colon != nullptr); req = GdbRequest(DREQ_QSYMBOL); if (*args == ':') { req.sym().has_address = false; } else { req.sym().has_address = true; req.sym().address = strtoul(args, &args, 16); } parser_assert(*args == ':'); ++args; req.sym().name = decode_ascii_encoded_hex_str(args); return true; #else LOG(debug) << "gdb is ready for symbol lookups, but we don't support them"; write_packet(""); return false; #endif } if (strstr(name, "ThreadExtraInfo") == name) { // ThreadExtraInfo is a special snowflake that // delimits its args with ','. parser_assert(!args); args = payload; args = 1 + strchr(args, ',' /*sic*/); req = GdbRequest(DREQ_GET_THREAD_EXTRA_INFO); req.target = parse_threadid(args, &args); parser_assert('\0' == *args); return true; } if (!strcmp(name, "TStatus")) { LOG(debug) << "gdb asks for trace status"; /* XXX from the docs, it appears that we should reply * with "T0" here. But if we do, gdb keeps bothering * us with trace queries. So pretend we don't know * what it's talking about. */ write_packet(""); return false; } if (!strcmp(name, "Xfer")) { name = args; args = strchr(args, ':'); parser_assert(args); *args++ = '\0'; return xfer(name, args); } if (!strcmp(name, "Search")) { name = args; args = strchr(args, ':'); if (args) { *args++ = '\0'; } if (!strcmp(name, "memory") && args) { req = GdbRequest(DREQ_SEARCH_MEM); req.target = query_thread; req.mem().addr = strtoul(args, &args, 16); parser_assert(';' == *args++); req.mem().len = strtoull(args, &args, 16); parser_assert(';' == *args++); read_binary_data((const uint8_t*)args, inbuf.data() + packetend, req.mem().data); LOG(debug) << "gdb searching memory (addr=" << HEX(req.mem().addr) << ", len=" << req.mem().len << ")"; return true; } write_packet(""); return false; } UNHANDLED_REQ() << "Unhandled gdb query: q" << name; return false; } bool GdbConnection::set_var(char* payload) { const char* name; char* args; args = strchr(payload, ':'); if (args) { *args++ = '\0'; } name = payload; if (!strcmp(name, "StartNoAckMode")) { write_packet("OK"); no_ack = true; return false; } if (!strncmp(name, "PassSignals", sizeof("PassSignals"))) { pass_signals.clear(); while (*args != '\0') { char *next = nullptr; int sig = std::strtol(args, &next, 16); parser_assert(next != nullptr); LOG(debug) << "registered " << sig << " by QPassSignal"; pass_signals.insert(sig); args = next; if (*args == '\0') { break; } parser_assert(*args == ';'); args++; } write_packet("OK"); return false; } UNHANDLED_REQ() << "Unhandled gdb set: Q" << name; return false; } void GdbConnection::consume_request() { req = GdbRequest(); write_flush(); } bool GdbConnection::process_bpacket(char* payload) { if (strcmp(payload, "c") == 0) { req = GdbRequest(DREQ_CONT); req.cont().run_direction = RUN_BACKWARD; req.cont().actions.push_back(GdbContAction(ACTION_CONTINUE, resume_thread)); return true; } else if (strcmp(payload, "s") == 0) { req = GdbRequest(DREQ_CONT); req.cont().run_direction = RUN_BACKWARD; req.cont().actions.push_back(GdbContAction(ACTION_STEP, resume_thread)); return true; } else { UNHANDLED_REQ() << "Unhandled gdb bpacket: b" << payload; return false; } } static int gdb_open_flags_to_system_flags(int64_t flags) { int ret; switch (flags & 3) { case 0: ret = O_RDONLY; break; case 1: ret = O_WRONLY; break; case 2: ret = O_RDWR; break; default: parser_assert(false); return 0; } parser_assert(!(flags & ~int64_t(3 | 0x8 | 0x200 | 0x400 | 0x800))); if (flags & 0x8) { ret |= O_APPEND; } if (flags & 0x200) { ret |= O_CREAT; } if (flags & 0x400) { ret |= O_TRUNC; } if (flags & 0x800) { ret |= O_EXCL; } return ret; } bool GdbConnection::process_vpacket(char* payload) { const char* name; char* args; args = strchr(payload, ';'); if (args) { *args++ = '\0'; } name = payload; if (!strcmp("Cont", name)) { vector actions; bool has_default_action = false; GdbContAction default_action; while (args) { char* cmd = args; while (*args != ':' && *args != ';') { if (!*args) { args = nullptr; break; } ++args; } bool is_default = true; GdbThreadId target; if (args) { if (*args == ':') { is_default = false; *args = '\0'; target = parse_threadid(args + 1, &args); } args = strchr(args, ';'); if (args) { *args = '\0'; ++args; } } GdbActionType action; int signal_to_deliver = 0; char* endptr = NULL; switch (cmd[0]) { case 'C': action = ACTION_CONTINUE; signal_to_deliver = strtol(cmd + 1, &endptr, 16); break; case 'c': action = ACTION_CONTINUE; break; case 'S': action = ACTION_STEP; signal_to_deliver = strtol(cmd + 1, &cmd, 16); break; case 's': action = ACTION_STEP; break; default: UNHANDLED_REQ() << "Unhandled vCont command " << cmd << "(" << args << ")"; return false; } if (endptr && *endptr) { UNHANDLED_REQ() << "Unhandled vCont command parameters " << cmd; return false; } if (is_default) { if (has_default_action) { UNHANDLED_REQ() << "Unhandled vCont command with multiple default actions"; return false; } has_default_action = true; default_action = GdbContAction(action, GdbThreadId::ALL, signal_to_deliver); } else { actions.push_back(GdbContAction(action, target, signal_to_deliver)); } } if (has_default_action) { actions.push_back(default_action); } req = GdbRequest(DREQ_CONT); req.cont().run_direction = RUN_FORWARD; req.cont().actions = std::move(actions); return true; } if (!strcmp("Cont?", name)) { LOG(debug) << "gdb queries which continue commands we support"; write_packet("vCont;c;C;s;S;"); return false; } if (!strcmp("Kill", name)) { // We can't kill tracees or replay can diverge. We // assume that this kill request is being made because // a "vRun" restart is coming right up. We know how // to implement vRun, so we'll ignore this one. LOG(debug) << "gdb asks us to kill tracee(s); ignoring"; write_packet("OK"); return false; } if (!strcmp("Run", name)) { req = GdbRequest(DREQ_RESTART); const char* filename = args; args = strchr(args, ';'); if (args) { *args++ = '\0'; } if (strlen(filename)) { FATAL() << "gdb wants us to run the exe image `" << filename << "', but we don't support that."; } if (!args) { req.restart().type = RESTART_FROM_PREVIOUS; return true; } const char* arg1 = args; args = strchr(args, ';'); if (args) { *args++ = 0; LOG(debug) << "Ignoring extra parameters " << args; } string event_str = decode_ascii_encoded_hex_str(arg1); char* endp; if (event_str[0] == 'c') { int64_t param = strtoll(event_str.c_str() + 1, &endp, 0); req.restart().type = RESTART_FROM_CHECKPOINT; req.restart().param_str = event_str.substr(1); req.restart().param = param; LOG(debug) << "next replayer restarting from checkpoint " << param; } else if (event_str[0] == 't') { int64_t param = strtoll(event_str.c_str() + 1, &endp, 0); req.restart().type = RESTART_FROM_TICKS; req.restart().param_str = event_str.substr(1); req.restart().param = param; LOG(debug) << "next replayer restarting from tick count " << param; } else { req.restart().type = RESTART_FROM_EVENT; req.restart().param = strtoll(event_str.c_str(), &endp, 0); LOG(debug) << "next replayer advancing to event " << req.restart().param; } if (!endp || *endp != '\0') { LOG(debug) << "Couldn't parse event string `" << event_str << "'" << "; restarting from previous"; req.restart().type = RESTART_FROM_PREVIOUS; req.restart().param = -1; } return true; } if (name == strstr(name, "File:")) { char* operation = payload + 5; if (operation == strstr(operation, "open:")) { char* file_name_end = strchr(operation + 5, ','); parser_assert(file_name_end != NULL); *file_name_end = 0; req = GdbRequest(DREQ_FILE_OPEN); req.file_open().file_name = decode_ascii_encoded_hex_str(operation + 5); char* flags_end; int64_t flags = strtol(file_name_end + 1, &flags_end, 16); parser_assert(*flags_end == ','); req.file_open().flags = gdb_open_flags_to_system_flags(flags); char* mode_end; int64_t mode = strtol(flags_end + 1, &mode_end, 16); parser_assert(*mode_end == 0); parser_assert((mode & ~(int64_t)0777) == 0); req.file_open().mode = mode; return true; } else if (operation == strstr(operation, "close:")) { char* endptr; int64_t fd = strtol(operation + 6, &endptr, 16); parser_assert(*endptr == 0); req = GdbRequest(DREQ_FILE_CLOSE); req.file_close().fd = fd; parser_assert(req.file_close().fd == fd); return true; } else if (operation == strstr(operation, "pread:")) { char* fd_end; int64_t fd = strtol(operation + 6, &fd_end, 16); parser_assert(*fd_end == ','); req = GdbRequest(DREQ_FILE_PREAD); req.file_pread().fd = fd; parser_assert(req.file_pread().fd == fd); char* size_end; int64_t size = strtol(fd_end + 1, &size_end, 16); parser_assert(*size_end == ','); parser_assert(size >= 0); req.file_pread().size = size; char* offset_end; int64_t offset = strtol(size_end + 1, &offset_end, 16); parser_assert(*offset_end == 0); parser_assert(offset >= 0); req.file_pread().offset = offset; return true; } else if (operation == strstr(operation, "setfs:")) { char* endptr; int64_t pid = strtol(operation + 6, &endptr, 16); parser_assert(*endptr == 0); req = GdbRequest(DREQ_FILE_SETFS); req.file_setfs().pid = pid; parser_assert(req.file_setfs().pid == pid); return true; } else { write_packet(""); return false; } } UNHANDLED_REQ() << "Unhandled gdb vpacket: v" << name; return false; } static string to_string(const vector& bytes, size_t max_len) { stringstream ss; for (size_t i = 0; i < bytes.size(); ++i) { if (i >= max_len) { ss << "..."; break; } char buf[3]; sprintf(buf, "%02x", bytes[i]); ss << buf; } return ss.str(); } bool GdbConnection::process_packet() { parser_assert( INTERRUPT_CHAR == inbuf[0] || ('$' == inbuf[0] && (uint8_t*)memchr(inbuf.data(), '#', inbuf.size()) == inbuf.data() + packetend)); if (INTERRUPT_CHAR == inbuf[0]) { LOG(debug) << "gdb requests interrupt"; req = GdbRequest(DREQ_INTERRUPT); inbuf.erase(inbuf.begin()); return true; } char request = inbuf[1]; char* payload = (char*)&inbuf[2]; inbuf[packetend] = '\0'; LOG(debug) << "raw request " << request << payload; bool ret; switch (request) { case 'b': ret = process_bpacket(payload); break; case 'c': LOG(debug) << "gdb is asking to continue"; req = GdbRequest(DREQ_CONT); req.cont().run_direction = RUN_FORWARD; req.cont().actions.push_back(GdbContAction(ACTION_CONTINUE)); ret = true; break; case 'D': LOG(debug) << "gdb is detaching from us"; req = GdbRequest(DREQ_DETACH); ret = true; break; case 'g': req = GdbRequest(DREQ_GET_REGS); req.target = query_thread; LOG(debug) << "gdb requests registers"; ret = true; break; case 'G': /* XXX we can't let gdb spray registers in general, * because it may cause replay to diverge. But some * writes may be OK. Let's see how far we can get * with ignoring these requests. */ write_packet(""); ret = false; break; case 'H': if ('c' == *payload++) { req = GdbRequest(DREQ_SET_CONTINUE_THREAD); } else { req = GdbRequest(DREQ_SET_QUERY_THREAD); } req.target = parse_threadid(payload, &payload); parser_assert('\0' == *payload); LOG(debug) << "gdb selecting " << req.target; ret = true; break; case 'k': LOG(info) << "gdb requests kill, exiting"; write_packet("OK"); exit(0); case 'm': req = GdbRequest(DREQ_GET_MEM); req.target = query_thread; req.mem().addr = strtoul(payload, &payload, 16); parser_assert(',' == *payload++); req.mem().len = strtoul(payload, &payload, 16); parser_assert('\0' == *payload); LOG(debug) << "gdb requests memory (addr=" << HEX(req.mem().addr) << ", len=" << req.mem().len << ")"; ret = true; break; case 'M': /* We can't allow the debugger to write arbitrary data * to memory, or the replay may diverge. */ // TODO: parse this packet in case some oddball gdb // decides to send it instead of 'X' write_packet(""); ret = false; break; case 'p': req = GdbRequest(DREQ_GET_REG); req.target = query_thread; req.reg().name = GdbRegister(strtoul(payload, &payload, 16)); parser_assert('\0' == *payload); LOG(debug) << "gdb requests register value (" << req.reg().name << ")"; ret = true; break; case 'P': req = GdbRequest(DREQ_SET_REG); req.target = query_thread; req.reg().name = GdbRegister(strtoul(payload, &payload, 16)); parser_assert('=' == *payload++); read_reg_value(&payload, &req.reg()); parser_assert('\0' == *payload); ret = true; break; case 'q': ret = query(payload); break; case 'Q': ret = set_var(payload); break; case 'T': req = GdbRequest(DREQ_GET_IS_THREAD_ALIVE); req.target = parse_threadid(payload, &payload); parser_assert('\0' == *payload); LOG(debug) << "gdb wants to know if " << req.target << " is alive"; ret = true; break; case 'v': ret = process_vpacket(payload); break; case 'X': { req = GdbRequest(DREQ_SET_MEM); req.target = query_thread; req.mem().addr = strtoul(payload, &payload, 16); parser_assert(',' == *payload++); req.mem().len = strtoul(payload, &payload, 16); parser_assert(':' == *payload++); read_binary_data((const uint8_t*)payload, inbuf.data() + packetend, req.mem().data); parser_assert(req.mem().len == req.mem().data.size()); LOG(debug) << "gdb setting memory (addr=" << HEX(req.mem().addr) << ", len=" << req.mem().len << ", data=" << to_string(req.mem().data, 32) << ")"; ret = true; break; } case 'z': case 'Z': { int type = strtol(payload, &payload, 16); parser_assert(',' == *payload++); if (!(0 <= type && type <= 4)) { LOG(warn) << "Unknown watch type " << type; write_packet(""); ret = false; break; } req = GdbRequest(GdbRequestType( type + (request == 'Z' ? DREQ_SET_SW_BREAK : DREQ_REMOVE_SW_BREAK))); req.watch().addr = strtoul(payload, &payload, 16); parser_assert(',' == *payload); payload++; req.watch().kind = strtoul(payload, &payload, 16); if (';' == *payload) { ++payload; while ('X' == *payload) { ++payload; int len = strtol(payload, &payload, 16); parser_assert(',' == *payload); payload++; vector bytes; for (int i = 0; i < len; ++i) { parser_assert(payload[0] && payload[1]); char tmp = payload[2]; payload[2] = '\0'; bytes.push_back(strtol(payload, &payload, 16)); parser_assert('\0' == *payload); payload[0] = tmp; } req.watch().conditions.push_back(std::move(bytes)); } } parser_assert('\0' == *payload); LOG(debug) << "gdb requests " << ('Z' == request ? "set" : "remove") << "breakpoint (addr=" << HEX(req.watch().addr) << ", len=" << req.watch().kind << ")"; ret = true; break; } case '!': LOG(debug) << "gdb requests extended mode"; write_packet("OK"); ret = false; break; case '?': LOG(debug) << "gdb requests stop reason"; req = GdbRequest(DREQ_GET_STOP_REASON); req.target = query_thread; ret = true; break; default: UNHANDLED_REQ() << "Unhandled gdb request '" << inbuf[1] << "'"; ret = false; } /* Erase the newly processed packet from the input buffer. The checksum * after the '#' will be skipped later as we look for the next packet start. */ inbuf.erase(inbuf.begin(), inbuf.begin() + packetend + 1); /* If we processed the request internally, consume it. */ if (!ret) { consume_request(); } return ret; } void GdbConnection::notify_no_such_thread(const GdbRequest& req) { DEBUG_ASSERT(req.target == this->req.target && req.type == this->req.type); /* '10' is the errno ECHILD. We use it as a magic code to * notify the user that the thread that was the target of this * request has died, and either gdb didn't notice that, or rr * didn't notify gdb. Either way, the user should restart * their debugging session. */ LOG(error) << "Targeted thread no longer exists; this is the result of " "either a gdb or\n" "rr bug. Please restart your debugging session and avoid " "doing whatever\n" "triggered this bug."; write_packet("E10"); consume_request(); } void GdbConnection::notify_restart() { DEBUG_ASSERT(DREQ_RESTART == req.type); // These threads may not exist at the first trace-stop after // restart. The gdb client should reset this state, but help // it out just in case. resume_thread = GdbThreadId::ANY; query_thread = GdbThreadId::ANY; req = GdbRequest(); } GdbRequest GdbConnection::get_request() { if (DREQ_RESTART == req.type) { LOG(debug) << "consuming RESTART request"; notify_restart(); // gdb wants to be notified with a stop packet when // the process "relaunches". In rr's case, the // traceee may be very far away from process creation, // but that's OK. req = GdbRequest(DREQ_GET_STOP_REASON); req.target = query_thread; return req; } /* Can't ask for the next request until you've satisfied the * current one, for requests that need an immediate * response. */ #ifdef DEBUG DEBUG_ASSERT(!request_needs_immediate_response(&req)); #endif if (!sniff_packet() && req.is_resume_request()) { /* There's no new request data available and gdb has * already asked us to resume. OK, do that (or keep * doing that) now. */ return req; } while (true) { /* There's either new request data, or we have nothing * to do. Either way, block until we read a complete * packet from gdb. */ read_packet(); if (!connection_alive_) { return req = GdbRequest(DREQ_DETACH); } if (process_packet()) { /* We couldn't process the packet internally, * so the target has to do something. */ return req; } /* The packet we got was "internal", gdb details. * Nothing for the target to do yet. Keep waiting. */ } } void GdbConnection::notify_exit_code(int code) { char buf[64]; DEBUG_ASSERT(req.is_resume_request() || req.type == DREQ_INTERRUPT); snprintf(buf, sizeof(buf) - 1, "W%02x", code); write_packet(buf); consume_request(); } void GdbConnection::notify_exit_signal(int sig) { char buf[64]; DEBUG_ASSERT(req.is_resume_request() || req.type == DREQ_INTERRUPT); snprintf(buf, sizeof(buf) - 1, "X%02x", sig); write_packet(buf); consume_request(); } /** * Translate linux-x86 |sig| to gdb's internal numbering. Translation * made according to gdb/include/gdb/signals.def. */ static int to_gdb_signum(int sig) { switch (sig) { case 0: return 0; case SIGHUP: return 1; case SIGINT: return 2; case SIGQUIT: return 3; case SIGILL: return 4; case SIGTRAP: return 5; case SIGABRT /*case SIGIOT*/: return 6; case SIGBUS: return 10; case SIGFPE: return 8; case SIGKILL: return 9; case SIGUSR1: return 30; case SIGSEGV: return 11; case SIGUSR2: return 31; case SIGPIPE: return 13; case SIGALRM: return 14; case SIGTERM: return 15; /* gdb hasn't heard of SIGSTKFLT, so this is * arbitrarily made up. SIGDANGER just sounds cool.*/ case SIGSTKFLT: return 38 /*GDB_SIGNAL_DANGER*/; /*case SIGCLD*/ case SIGCHLD: return 20; case SIGCONT: return 19; case SIGSTOP: return 17; case SIGTSTP: return 18; case SIGTTIN: return 21; case SIGTTOU: return 22; case SIGURG: return 16; case SIGXCPU: return 24; case SIGXFSZ: return 25; case SIGVTALRM: return 26; case SIGPROF: return 27; case SIGWINCH: return 28; /*case SIGPOLL*/ case SIGIO: return 23; case SIGPWR: return 32; case SIGSYS: return 12; case 32: return 77; default: if (33 <= sig && sig <= 63) { /* GDB_SIGNAL_REALTIME_33 is numbered 45, hence this offset. */ return sig + 12; } if (64 <= sig && sig <= 127) { /* GDB_SIGNAL_REALTIME_64 is numbered 78, hence this offset. */ return sig + 14; } LOG(warn) << "Unknown signal " << sig; return 143; // GDB_SIGNAL_UNKNOWN } } void GdbConnection::send_stop_reply_packet(GdbThreadId thread, int sig, const char *reason) { if (sig < 0) { write_packet("E01"); return; } char buf[PATH_MAX]; if (multiprocess_supported_) { snprintf(buf, sizeof(buf) - 1, "T%02xthread:p%02x.%02x;%s", to_gdb_signum(sig), thread.pid, thread.tid, reason); } else { snprintf(buf, sizeof(buf) - 1, "T%02xthread:%02x;%s", to_gdb_signum(sig), thread.tid, reason); } write_packet(buf); } void GdbConnection::notify_stop(GdbThreadId thread, int sig, const char *reason) { DEBUG_ASSERT(req.is_resume_request() || req.type == DREQ_INTERRUPT); // don't pass this signal to gdb if it is specified not to if (pass_signals.find(to_gdb_signum(sig)) != pass_signals.end()) { LOG(debug) << "discarding stop notification for signal " << sig << " on thread " << thread << " as specified by QPassSignal"; return; } if (tgid != thread.pid) { LOG(debug) << "ignoring stop of " << thread << " because we're debugging tgid " << tgid; // Re-use the existing continue request to advance to // the next stop we're willing to tell gdb about. return; } if (!reason) { reason = ""; } send_stop_reply_packet(thread, sig, reason); // This isn't documented in the gdb remote protocol, but if we // don't do this, gdb will sometimes continue to send requests // for the previously-stopped thread when it obviously intends // to be making requests about the stopped thread. // To make things even better, gdb expects different behavior // for forward continue/interrupt and reverse continue. if (req.is_resume_request() && req.cont().run_direction == RUN_BACKWARD) { LOG(debug) << "Setting query/resume_thread to ANY after reverse continue"; query_thread = resume_thread = GdbThreadId::ANY; } else { LOG(debug) << "Setting query/resume_thread to " << thread << " after forward continue or interrupt"; query_thread = resume_thread = thread; } consume_request(); } void GdbConnection::notify_restart_failed() { DEBUG_ASSERT(DREQ_RESTART == req.type); // TODO: it's not known by this author whether gdb knows how // to recover from a failed "run" request. write_packet("E01"); consume_request(); } void GdbConnection::reply_get_current_thread(GdbThreadId thread) { DEBUG_ASSERT(DREQ_GET_CURRENT_THREAD == req.type); char buf[1024]; if (multiprocess_supported_) { snprintf(buf, sizeof(buf), "QCp%02x.%02x", thread.pid, thread.tid); } else { snprintf(buf, sizeof(buf), "QC%02x", thread.tid); } write_packet(buf); consume_request(); } void GdbConnection::reply_get_auxv(const vector& auxv) { DEBUG_ASSERT(DREQ_GET_AUXV == req.type); if (!auxv.empty()) { write_binary_packet("l", auxv.data(), auxv.size()); } else { write_packet("E01"); } consume_request(); } void GdbConnection::reply_get_exec_file(const string& exec_file) { DEBUG_ASSERT(DREQ_GET_EXEC_FILE == req.type); if (!exec_file.empty()) { write_binary_packet("l", reinterpret_cast(exec_file.c_str()), exec_file.size()); } else { write_packet("E01"); } consume_request(); } void GdbConnection::reply_get_is_thread_alive(bool alive) { DEBUG_ASSERT(DREQ_GET_IS_THREAD_ALIVE == req.type); write_packet(alive ? "OK" : "E01"); consume_request(); } void GdbConnection::reply_get_thread_extra_info(const string& info) { DEBUG_ASSERT(DREQ_GET_THREAD_EXTRA_INFO == req.type); LOG(debug) << "thread extra info: '" << info.c_str() << "'"; write_hex_bytes_packet((const uint8_t*)info.c_str(), 1 + info.length()); consume_request(); } void GdbConnection::reply_select_thread(bool ok) { DEBUG_ASSERT(DREQ_SET_CONTINUE_THREAD == req.type || DREQ_SET_QUERY_THREAD == req.type); if (ok && DREQ_SET_CONTINUE_THREAD == req.type) { resume_thread = req.target; } else if (ok && DREQ_SET_QUERY_THREAD == req.type) { query_thread = req.target; } write_packet(ok ? "OK" : "E01"); consume_request(); } void GdbConnection::reply_get_mem(const vector& mem) { DEBUG_ASSERT(DREQ_GET_MEM == req.type); DEBUG_ASSERT(mem.size() <= req.mem().len); if (req.mem().len > 0 && mem.size() == 0) { write_packet("E01"); } else { write_hex_bytes_packet(mem.data(), mem.size()); } consume_request(); } void GdbConnection::reply_set_mem(bool ok) { DEBUG_ASSERT(DREQ_SET_MEM == req.type); write_packet(ok ? "OK" : "E01"); consume_request(); } void GdbConnection::reply_search_mem(bool found, remote_ptr addr) { DEBUG_ASSERT(DREQ_SEARCH_MEM == req.type); if (found) { char buf[256]; sprintf(buf, "1,%llx", (long long)addr.as_int()); write_packet(buf); } else { write_packet("0"); } consume_request(); } void GdbConnection::reply_get_offsets(/* TODO */) { DEBUG_ASSERT(DREQ_GET_OFFSETS == req.type); /* XXX FIXME TODO */ write_packet(""); consume_request(); } void GdbConnection::reply_get_reg(const GdbRegisterValue& reg) { char buf[2 * GdbRegisterValue::MAX_SIZE + 1]; DEBUG_ASSERT(DREQ_GET_REG == req.type); print_reg_value(reg, buf); write_packet(buf); consume_request(); } void GdbConnection::reply_get_regs(const vector& file) { std::unique_ptr buf( new char[file.size() * 2 * GdbRegisterValue::MAX_SIZE + 1]); DEBUG_ASSERT(DREQ_GET_REGS == req.type); size_t offset = 0; for (auto& reg : file) { offset += print_reg_value(reg, &buf[offset]); } write_packet(buf.get()); consume_request(); } void GdbConnection::reply_set_reg(bool ok) { DEBUG_ASSERT(DREQ_SET_REG == req.type); // TODO: what happens if we're forced to reply to a // set-register request with |ok = false|, leading us to // pretend not to understand the packet? If, later, an // experimental session needs the set-register request will it // not be sent? // // We can't reply with an error packet here because gdb thinks // that failed set-register requests are catastrophic. write_packet(ok ? "OK" : ""); consume_request(); } void GdbConnection::reply_get_stop_reason(GdbThreadId which, int sig) { DEBUG_ASSERT(DREQ_GET_STOP_REASON == req.type); send_stop_reply_packet(which, sig, ""); consume_request(); } void GdbConnection::reply_get_thread_list(const vector& threads) { DEBUG_ASSERT(DREQ_GET_THREAD_LIST == req.type); if (threads.empty()) { write_packet("l"); } else { stringstream sstr; sstr << 'm'; for (size_t i = 0; i < threads.size(); ++i) { const GdbThreadId& t = threads[i]; if (tgid != t.pid) { continue; } if (multiprocess_supported_) { sstr << 'p' << setw(2) << setfill('0') << hex << t.pid << dec << '.' << setw(2) << setfill('0') << hex << t.tid << ','; } else { sstr << setw(2) << setfill('0') << hex << t.tid << ','; } } string str = sstr.str(); /* Overwrite the trailing ',' */ str.back() = 0; write_packet(str.c_str()); } consume_request(); } void GdbConnection::reply_watchpoint_request(bool ok) { DEBUG_ASSERT(DREQ_WATCH_FIRST <= req.type && req.type <= DREQ_WATCH_LAST); write_packet(ok ? "OK" : "E01"); consume_request(); } void GdbConnection::reply_detach() { DEBUG_ASSERT(DREQ_DETACH <= req.type); write_packet("OK"); consume_request(); } void GdbConnection::reply_read_siginfo(const vector& si_bytes) { DEBUG_ASSERT(DREQ_READ_SIGINFO == req.type); if (si_bytes.empty()) { write_packet("E01"); } else { write_binary_packet("l", si_bytes.data(), si_bytes.size()); } consume_request(); } void GdbConnection::reply_write_siginfo(/* TODO*/) { DEBUG_ASSERT(DREQ_WRITE_SIGINFO == req.type); write_packet("E01"); consume_request(); } void GdbConnection::reply_rr_cmd(const std::string& text) { DEBUG_ASSERT(DREQ_RR_CMD == req.type); write_packet(text.c_str()); consume_request(); } void GdbConnection::send_qsymbol(const std::string& name) { DEBUG_ASSERT(DREQ_QSYMBOL == req.type); const void* data = static_cast(name.c_str()); write_hex_bytes_packet("qSymbol:", static_cast(data), name.length()); consume_request(); } void GdbConnection::qsymbols_finished() { DEBUG_ASSERT(DREQ_QSYMBOL == req.type); write_packet("OK"); consume_request(); } void GdbConnection::reply_tls_addr(bool ok, remote_ptr address) { DEBUG_ASSERT(DREQ_TLS == req.type); if (ok) { char buf[256]; sprintf(buf, "%llx", (long long)address.as_int()); write_packet(buf); } else { write_packet("E01"); } consume_request(); } void GdbConnection::reply_setfs(int err) { DEBUG_ASSERT(DREQ_FILE_SETFS == req.type); if (err) { send_file_error_reply(err); } else { write_packet("F0"); } consume_request(); } void GdbConnection::reply_open(int fd, int err) { DEBUG_ASSERT(DREQ_FILE_OPEN == req.type); if (err) { send_file_error_reply(err); } else { char buf[32]; sprintf(buf, "F%x", fd); write_packet(buf); } consume_request(); } void GdbConnection::reply_pread(const uint8_t* bytes, ssize_t len, int err) { DEBUG_ASSERT(DREQ_FILE_PREAD == req.type); if (err) { send_file_error_reply(err); } else { char buf[32]; sprintf(buf, "F%llx;", (long long)len); write_binary_packet(buf, bytes, len); } consume_request(); } void GdbConnection::reply_close(int err) { DEBUG_ASSERT(DREQ_FILE_CLOSE == req.type); if (err) { send_file_error_reply(err); } else { write_packet("F0"); } consume_request(); } void GdbConnection::send_file_error_reply(int system_errno) { int gdb_err; switch (system_errno) { case EPERM: gdb_err = 1; break; case ENOENT: gdb_err = 2; break; case EINTR: gdb_err = 4; break; case EBADF: gdb_err = 9; break; case EACCES: gdb_err = 13; break; case EFAULT: gdb_err = 14; break; case EBUSY: gdb_err = 16; break; case EEXIST: gdb_err = 17; break; case ENODEV: gdb_err = 19; break; case ENOTDIR: gdb_err = 20; break; case EISDIR: gdb_err = 21; break; case EINVAL: gdb_err = 22; break; case ENFILE: gdb_err = 23; break; case EMFILE: gdb_err = 24; break; case EFBIG: gdb_err = 27; break; case ENOSPC: gdb_err = 28; break; case ESPIPE: gdb_err = 29; break; case EROFS: gdb_err = 30; break; case ENAMETOOLONG: gdb_err = 91; break; default: gdb_err = 9999; break; } char buf[32]; sprintf(buf, "F-01,%x", gdb_err); write_packet(buf); } bool GdbConnection::is_connection_alive() { return connection_alive_; } bool GdbConnection::is_pass_signal(int sig) { return pass_signals.find(to_gdb_signum(sig)) != pass_signals.end(); } } // namespace rr rr-5.7.0/src/GdbConnection.h000066400000000000000000000455671450675474200156610ustar00rootroot00000000000000/* -*- Mode: C++; tab-width: 8; c-basic-offset: 2; indent-tabs-mode: nil; -*- */ #ifndef RR_GDB_CONNECTION_H_ #define RR_GDB_CONNECTION_H_ #include #include #include #include #include #include #include #include "GdbRegister.h" #include "Registers.h" #include "ReplaySession.h" #include "ReplayTimeline.h" #include "core.h" namespace rr { /** * Descriptor for task. Note: on linux, we can uniquely identify any thread * by its |tid| (in rr's pid namespace). */ struct GdbThreadId { GdbThreadId(pid_t pid = -1, pid_t tid = -1) : pid(pid), tid(tid) {} pid_t pid; pid_t tid; bool operator==(const GdbThreadId& o) const { return pid == o.pid && tid == o.tid; } static const GdbThreadId ANY; static const GdbThreadId ALL; }; inline std::ostream& operator<<(std::ostream& o, const GdbThreadId& t) { o << t.pid << "." << t.tid; return o; } /** * Represents a possibly-undefined register |name|. |size| indicates how * many bytes of |value| are valid, if any. */ struct GdbRegisterValue { enum { MAX_SIZE = Registers::MAX_SIZE }; GdbRegister name; union { uint8_t value[MAX_SIZE]; uint8_t value1; uint16_t value2; uint32_t value4; uint64_t value8; }; size_t size; bool defined; }; enum GdbRequestType { DREQ_NONE = 0, /* None of these requests have parameters. */ DREQ_GET_CURRENT_THREAD, DREQ_GET_OFFSETS, DREQ_GET_REGS, DREQ_GET_STOP_REASON, DREQ_GET_THREAD_LIST, DREQ_INTERRUPT, DREQ_DETACH, /* These use params.target. */ DREQ_GET_AUXV, DREQ_GET_EXEC_FILE, DREQ_GET_IS_THREAD_ALIVE, DREQ_GET_THREAD_EXTRA_INFO, DREQ_SET_CONTINUE_THREAD, DREQ_SET_QUERY_THREAD, // TLS lookup, uses params.target and params.tls. DREQ_TLS, // gdb wants to write back siginfo_t to a tracee. More // importantly, this packet arrives before an experiment // session for a |call foo()| is about to be torn down. // // TODO: actual interface NYI. DREQ_WRITE_SIGINFO, /* These use params.mem. */ DREQ_GET_MEM, DREQ_SET_MEM, // gdb wants to read the current siginfo_t for a stopped // tracee. More importantly, this packet arrives at the very // beginning of a |call foo()| experiment. // // Uses .mem for offset/len. DREQ_READ_SIGINFO, DREQ_SEARCH_MEM, DREQ_MEM_FIRST = DREQ_GET_MEM, DREQ_MEM_LAST = DREQ_SEARCH_MEM, DREQ_REMOVE_SW_BREAK, DREQ_REMOVE_HW_BREAK, DREQ_REMOVE_WR_WATCH, DREQ_REMOVE_RD_WATCH, DREQ_REMOVE_RDWR_WATCH, DREQ_SET_SW_BREAK, DREQ_SET_HW_BREAK, DREQ_SET_WR_WATCH, DREQ_SET_RD_WATCH, DREQ_SET_RDWR_WATCH, DREQ_WATCH_FIRST = DREQ_REMOVE_SW_BREAK, DREQ_WATCH_LAST = DREQ_SET_RDWR_WATCH, /* Use params.reg. */ DREQ_GET_REG, DREQ_SET_REG, DREQ_REG_FIRST = DREQ_GET_REG, DREQ_REG_LAST = DREQ_SET_REG, /* Use params.cont. */ DREQ_CONT, /* gdb host detaching from stub. No parameters. */ /* Uses params.restart. */ DREQ_RESTART, /* Uses params.text. */ DREQ_RR_CMD, // qSymbol packet, uses params.sym. DREQ_QSYMBOL, // vFile:setfs packet, uses params.file_setfs. DREQ_FILE_SETFS, // vFile:open packet, uses params.file_open. DREQ_FILE_OPEN, // vFile:pread packet, uses params.file_pread. DREQ_FILE_PREAD, // vFile:close packet, uses params.file_close. DREQ_FILE_CLOSE, }; enum GdbRestartType { RESTART_FROM_PREVIOUS, RESTART_FROM_EVENT, RESTART_FROM_CHECKPOINT, RESTART_FROM_TICKS }; enum GdbActionType { ACTION_CONTINUE, ACTION_STEP }; struct GdbContAction { GdbContAction(GdbActionType type = ACTION_CONTINUE, const GdbThreadId& target = GdbThreadId::ANY, int signal_to_deliver = 0) : type(type), target(target), signal_to_deliver(signal_to_deliver) {} GdbActionType type; GdbThreadId target; int signal_to_deliver; }; /** * These requests are made by the debugger host and honored in proxy * by rr, the target. */ struct GdbRequest { GdbRequest(GdbRequestType type = DREQ_NONE) : type(type), suppress_debugger_stop(false) {} GdbRequest(const GdbRequest& other) : type(other.type), target(other.target), suppress_debugger_stop(other.suppress_debugger_stop), mem_(other.mem_), watch_(other.watch_), reg_(other.reg_), restart_(other.restart_), cont_(other.cont_), text_(other.text_), tls_(other.tls_), sym_(other.sym_), file_setfs_(other.file_setfs_), file_open_(other.file_open_), file_pread_(other.file_pread_), file_close_(other.file_close_) {} GdbRequest& operator=(const GdbRequest& other) { this->~GdbRequest(); new (this) GdbRequest(other); return *this; } const GdbRequestType type; GdbThreadId target; bool suppress_debugger_stop; struct Mem { uintptr_t addr; size_t len; // For SET_MEM requests, the |len| raw bytes that are to be written. // For SEARCH_MEM requests, the bytes to search for. std::vector data; } mem_; struct Watch { uintptr_t addr; int kind; std::vector> conditions; } watch_; GdbRegisterValue reg_; struct Restart { int64_t param; std::string param_str; GdbRestartType type; } restart_; struct Cont { RunDirection run_direction; std::vector actions; } cont_; std::string text_; struct Tls { size_t offset; remote_ptr load_module; } tls_; struct Symbol { bool has_address; remote_ptr address; std::string name; } sym_; struct FileSetfs { pid_t pid; } file_setfs_; struct FileOpen { std::string file_name; // In system format, not gdb's format int flags; int mode; } file_open_; struct FilePread { int fd; size_t size; uint64_t offset; } file_pread_; struct FileClose { int fd; } file_close_; Mem& mem() { DEBUG_ASSERT(type >= DREQ_MEM_FIRST && type <= DREQ_MEM_LAST); return mem_; } const Mem& mem() const { DEBUG_ASSERT(type >= DREQ_MEM_FIRST && type <= DREQ_MEM_LAST); return mem_; } Watch& watch() { DEBUG_ASSERT(type >= DREQ_WATCH_FIRST && type <= DREQ_WATCH_LAST); return watch_; } const Watch& watch() const { DEBUG_ASSERT(type >= DREQ_WATCH_FIRST && type <= DREQ_WATCH_LAST); return watch_; } GdbRegisterValue& reg() { DEBUG_ASSERT(type >= DREQ_REG_FIRST && type <= DREQ_REG_LAST); return reg_; } const GdbRegisterValue& reg() const { DEBUG_ASSERT(type >= DREQ_REG_FIRST && type <= DREQ_REG_LAST); return reg_; } Restart& restart() { DEBUG_ASSERT(type == DREQ_RESTART); return restart_; } const Restart& restart() const { DEBUG_ASSERT(type == DREQ_RESTART); return restart_; } Cont& cont() { DEBUG_ASSERT(type == DREQ_CONT); return cont_; } const Cont& cont() const { DEBUG_ASSERT(type == DREQ_CONT); return cont_; } const std::string& text() const { DEBUG_ASSERT(type == DREQ_RR_CMD); return text_; } Tls& tls() { DEBUG_ASSERT(type == DREQ_TLS); return tls_; } const Tls& tls() const { DEBUG_ASSERT(type == DREQ_TLS); return tls_; } Symbol& sym() { DEBUG_ASSERT(type == DREQ_QSYMBOL); return sym_; } const Symbol& sym() const { DEBUG_ASSERT(type == DREQ_QSYMBOL); return sym_; } FileSetfs& file_setfs() { DEBUG_ASSERT(type == DREQ_FILE_SETFS); return file_setfs_; } const FileSetfs& file_setfs() const { DEBUG_ASSERT(type == DREQ_FILE_SETFS); return file_setfs_; } FileOpen& file_open() { DEBUG_ASSERT(type == DREQ_FILE_OPEN); return file_open_; } const FileOpen& file_open() const { DEBUG_ASSERT(type == DREQ_FILE_OPEN); return file_open_; } FilePread& file_pread() { DEBUG_ASSERT(type == DREQ_FILE_PREAD); return file_pread_; } const FilePread& file_pread() const { DEBUG_ASSERT(type == DREQ_FILE_PREAD); return file_pread_; } FileClose& file_close() { DEBUG_ASSERT(type == DREQ_FILE_CLOSE); return file_close_; } const FileClose& file_close() const { DEBUG_ASSERT(type == DREQ_FILE_CLOSE); return file_close_; } /** * Return nonzero if this requires that program execution be resumed * in some way. */ bool is_resume_request() const { return type == DREQ_CONT; } }; /** * This struct wraps up the state of the gdb protocol, so that we can * offer a (mostly) stateless interface to clients. */ class GdbConnection { public: struct Features { Features() : reverse_execution(true) {} bool reverse_execution; }; /** * Call this when the target of |req| is needed to fulfill the * request, but the target is dead. This situation is a symptom of a * gdb or rr bug. */ void notify_no_such_thread(const GdbRequest& req); /** * Finish a DREQ_RESTART request. Should be invoked after replay * restarts and prior GdbConnection has been restored. */ void notify_restart(); /** * Return the current request made by the debugger host, that needs to * be satisfied. This function will block until either there's a * debugger host request that needs a response, or until a request is * made to resume execution of the target. In the latter case, * calling this function multiple times will return an appropriate * resume request each time (see above). * * The target should peek at the debugger request in between execution * steps. A new request may need to be serviced. */ GdbRequest get_request(); /** * Notify the host that this process has exited with |code|. */ void notify_exit_code(int code); /** * Notify the host that this process has exited from |sig|. */ void notify_exit_signal(int sig); /** * Notify the host that a resume request has "finished", i.e., the * target has stopped executing for some reason. |sig| is the signal * that stopped execution, or 0 if execution stopped otherwise. */ void notify_stop(GdbThreadId which, int sig, const char *reason=nullptr); /** Notify the debugger that a restart request failed. */ void notify_restart_failed(); /** * Tell the host that |thread| is the current thread. */ void reply_get_current_thread(GdbThreadId thread); /** * Reply with the target thread's |auxv| pairs. |auxv.empty()| * if there was an error reading the auxiliary vector. */ void reply_get_auxv(const std::vector& auxv); /** * Reply with the target thread's executable file name */ void reply_get_exec_file(const std::string& exec_file); /** * |alive| is true if the requested thread is alive, false if dead. */ void reply_get_is_thread_alive(bool alive); /** * |info| is a string containing data about the request target that * might be relevant to the debugger user. */ void reply_get_thread_extra_info(const std::string& info); /** * |ok| is true if req->target can be selected, false otherwise. */ void reply_select_thread(bool ok); /** * The first |mem.size()| bytes of the request were read into |mem|. * |mem.size()| must be less than or equal to the length of the request. */ void reply_get_mem(const std::vector& mem); /** * |ok| is true if a SET_MEM request succeeded, false otherwise. This * function *must* be called whenever a SET_MEM request is made, * regardless of success/failure or special interpretation. */ void reply_set_mem(bool ok); /** * Reply to the DREQ_SEARCH_MEM request. * |found| is true if we found the searched-for bytes starting at address * |addr|. */ void reply_search_mem(bool found, remote_ptr addr); /** * Reply to the DREQ_GET_OFFSETS request. */ void reply_get_offsets(/* TODO */); /** * Send |value| back to the debugger host. |value| may be undefined. */ void reply_get_reg(const GdbRegisterValue& value); /** * Send |file| back to the debugger host. |file| may contain * undefined register values. */ void reply_get_regs(const std::vector& file); /** * Pass |ok = true| iff the requested register was successfully set. */ void reply_set_reg(bool ok); /** * Reply to the DREQ_GET_STOP_REASON request. */ void reply_get_stop_reason(GdbThreadId which, int sig); /** * |threads| contains the list of live threads, of which there are * |len|. */ void reply_get_thread_list(const std::vector& threads); /** * |ok| is true if the request was successfully applied, false if * not. */ void reply_watchpoint_request(bool ok); /** * DREQ_DETACH was processed. * * There's no functional reason to reply to the detach request. * However, some versions of gdb expect a response and time out * awaiting it, wasting developer time. */ void reply_detach(); /** * Pass the siginfo_t and its size (as requested by the debugger) in * |si_bytes| and |num_bytes| if successfully read. Otherwise pass * |si_bytes = nullptr|. */ void reply_read_siginfo(const std::vector& si_bytes); /** * Not yet implemented, but call this after a WRITE_SIGINFO request * anyway. */ void reply_write_siginfo(/* TODO*/); /** * Send a manual text response to a rr cmd (maintenance) packet. */ void reply_rr_cmd(const std::string& text); /** * Send a qSymbol response to gdb, requesting the address of the * symbol |name|. */ void send_qsymbol(const std::string& name); /** * The "all done" response to a qSymbol packet from gdb. */ void qsymbols_finished(); /** * Respond to a qGetTLSAddr packet. If |ok| is true, then respond * with |address|. If |ok| is false, respond with an error. */ void reply_tls_addr(bool ok, remote_ptr address); /** * Respond to a vFile:setfs */ void reply_setfs(int err); /** * Respond to a vFile:open */ void reply_open(int fd, int err); /** * Respond to a vFile:pread */ void reply_pread(const uint8_t* bytes, ssize_t len, int err); /** * Respond to a vFile:close */ void reply_close(int err); /** * Create a checkpoint of the given Session with the given id. Delete the * existing checkpoint with that id if there is one. */ void created_checkpoint(ReplaySession::shr_ptr& checkpoint, int checkpoint_id); /** * Delete the checkpoint with the given id. Silently fail if the checkpoint * does not exist. */ void delete_checkpoint(int checkpoint_id); /** * Get the checkpoint with the given id. Return null if not found. */ ReplaySession::shr_ptr get_checkpoint(int checkpoint_id); /** * Return true if there's a new packet to be read/process (whether * incomplete or not), and false if there isn't one. */ bool sniff_packet(); const Features& features() { return features_; } enum { CPU_X86_64 = 0x1, CPU_AVX = 0x2, CPU_AARCH64 = 0x4, CPU_PKU = 0x8 }; void set_cpu_features(uint32_t features) { cpu_features_ = features; } uint32_t cpu_features() const { return cpu_features_; } GdbConnection(pid_t tgid, const Features& features); /** * Wait for a debugger client to connect to |dbg|'s socket. Blocks * indefinitely. */ void await_debugger(ScopedFd& listen_fd); /** * Returns false if the connection has been closed */ bool is_connection_alive(); bool hwbreak_supported() { return hwbreak_supported_; } bool swbreak_supported() { return swbreak_supported_; } bool is_pass_signal(int sig); private: /** * read() incoming data exactly one time, successfully. May block. */ void read_data_once(); /** * Send all pending output to gdb. May block. */ void write_flush(); void write_data_raw(const uint8_t* data, ssize_t len); void write_hex(unsigned long hex); void write_packet_bytes(const uint8_t* data, size_t num_bytes); void write_packet(const char* data); void write_binary_packet(const char* pfx, const uint8_t* data, ssize_t num_bytes); void write_hex_bytes_packet(const char* prefix, const uint8_t* bytes, size_t len); void write_hex_bytes_packet(const uint8_t* bytes, size_t len); void write_xfer_response(const void* data, size_t size, uint64_t offset, uint64_t len); /** * Consume bytes in the input buffer until start-of-packet ('$') or * the interrupt character is seen. Does not block. Return true if * seen, false if not. */ bool skip_to_packet_start(); /** * Block until the sequence of bytes * * "[^$]*\$[^#]*#.*" * * has been read from the client fd. This is one (or more) gdb * packet(s). */ void read_packet(); /** * Return true if we need to do something in a debugger request, * false if we already handled the packet internally. */ bool xfer(const char* name, char* args); /** * Return true if we need to do something in a debugger request, * false if we already handled the packet internally. */ bool query(char* payload); /** * Return true if we need to do something in a debugger request, * false if we already handled the packet internally. */ bool set_var(char* payload); /** * Return true if we need to do something in a debugger request, * false if we already handled the packet internally. */ bool process_vpacket(char* payload); /** * Return true if we need to do something in a debugger request, * false if we already handled the packet internally. */ bool process_bpacket(char* payload); /** * Return true if we need to do something in a debugger request, * false if we already handled the packet internally. */ bool process_packet(); void consume_request(); void send_stop_reply_packet(GdbThreadId thread, int sig, const char *reason); void send_file_error_reply(int system_errno); // Current request to be processed. GdbRequest req; // Thread to be resumed. GdbThreadId resume_thread; // Thread for get/set requests. GdbThreadId query_thread; // gdb and rr don't work well together in multi-process and // multi-exe-image debugging scenarios, so we pretend only // this thread group exists when interfacing with gdb pid_t tgid; uint32_t cpu_features_; // true when "no-ack mode" enabled, in which we don't have // to send ack packets back to gdb. This is a huge perf win. bool no_ack; // contains signals (gdb not native) which should be passed directly to the // debuggee without gdb being informed, speeding up // reverse execution std::unordered_set pass_signals; ScopedFd sock_fd; std::vector inbuf; /* buffered input from gdb */ size_t packetend; /* index of '#' character */ std::vector outbuf; /* buffered output for gdb */ Features features_; bool connection_alive_; bool multiprocess_supported_; // client supports multiprocess extension bool hwbreak_supported_; // client supports hwbreak extension bool swbreak_supported_; // client supports swbreak extension }; } // namespace rr #endif /* RR_GDB_CONNECTION_H_ */ rr-5.7.0/src/GdbExpression.cc000066400000000000000000000263031450675474200160420ustar00rootroot00000000000000/* -*- Mode: C++; tab-width: 8; c-basic-offset: 2; indent-tabs-mode: nil; -*- */ #include "GdbExpression.h" #include "GdbServer.h" #include "Task.h" #include "core.h" using namespace std; namespace rr { #define WORKAROUND_GDB_BUGS // Extracted from // https://sourceware.org/gdb/current/onlinedocs/gdb/Bytecode-Descriptions.html enum Opcode { OP_float = 0x01, OP_add = 0x02, OP_sub = 0x03, OP_mul = 0x04, OP_div_signed = 0x05, OP_div_unsigned = 0x06, OP_rem_signed = 0x07, OP_rem_unsigned = 0x08, OP_lsh = 0x09, OP_rsh_signed = 0x0a, OP_rsh_unsigned = 0x0b, OP_trace = 0x0c, OP_trace_quick = 0x0d, OP_log_not = 0x0e, OP_bit_and = 0x0f, OP_bit_or = 0x10, OP_bit_xor = 0x11, OP_bit_not = 0x12, OP_equal = 0x13, OP_less_signed = 0x14, OP_less_unsigned = 0x15, OP_ext = 0x16, OP_ref8 = 0x17, OP_ref16 = 0x18, OP_ref32 = 0x19, OP_ref64 = 0x1a, OP_ref_float = 0x1b, OP_ref_double = 0x1c, OP_ref_long_double = 0x1d, OP_l_to_d = 0x1e, OP_d_to_l = 0x1f, OP_if_goto = 0x20, OP_goto = 0x21, OP_const8 = 0x22, OP_const16 = 0x23, OP_const32 = 0x24, OP_const64 = 0x25, OP_reg = 0x26, OP_end = 0x27, OP_dup = 0x28, OP_pop = 0x29, OP_zero_ext = 0x2a, OP_swap = 0x2b, OP_getv = 0x2c, OP_setv = 0x2d, OP_tracev = 0x2e, OP_tracenz = 0x2f, OP_trace16 = 0x30, OP_pick = 0x32, OP_rot = 0x33, OP_printf = 0x34, }; struct ExpressionState { typedef GdbExpression::Value Value; ExpressionState(const vector& bytecode) : bytecode(bytecode), pc(0), error(false), end(false) {} void set_error() { error = true; } // Methods set error to true if there's an error and return some sentinel // Value. Value pop() { if (stack.empty()) { set_error(); return Value(-1); } Value v = stack.back(); stack.pop_back(); return v; } struct BinaryOperands { BinaryOperands(int64_t a = 0, int64_t b = 0) : a(a), b(b) {} int64_t a; int64_t b; }; BinaryOperands pop_a_b() { int64_t b = pop().i; return BinaryOperands(pop().i, b); } int64_t nonzero(int64_t v) { if (!v) { set_error(); return 1; } return v; } int64_t pop_a() { return pop().i; } void push(int64_t i) { stack.push_back(Value(i)); } template T fetch() { if (pc + sizeof(T) > bytecode.size()) { set_error(); return T(-1); } T v = 0; for (size_t i = 0; i < sizeof(T); ++i) { v = (v << 8) | bytecode[pc + i]; } pc += sizeof(T); return v; } template void load(Task* t) { uint64_t addr = pop().i; if (error) { // Don't do unnecessary syscalls if we're already in an error state. return; } bool ok = true; T v = t->read_mem(remote_ptr(addr), &ok); if (!ok) { set_error(); return; } push(v); } void pick(size_t offset) { if (offset >= stack.size()) { set_error(); return; } push(stack[stack.size() - 1 - offset].i); } void step(Task* t) { DEBUG_ASSERT(!error); BinaryOperands operands; switch (fetch()) { case OP_add: operands = pop_a_b(); return push(operands.a + operands.b); case OP_sub: operands = pop_a_b(); return push(operands.a - operands.b); case OP_mul: operands = pop_a_b(); return push(operands.a * operands.b); case OP_div_signed: operands = pop_a_b(); return push(operands.a / nonzero(operands.b)); case OP_div_unsigned: operands = pop_a_b(); return push(uint64_t(operands.a) / uint64_t(nonzero(operands.b))); case OP_rem_signed: operands = pop_a_b(); return push(operands.a % nonzero(operands.b)); case OP_rem_unsigned: operands = pop_a_b(); return push(uint64_t(operands.a) % uint64_t(nonzero(operands.b))); case OP_lsh: operands = pop_a_b(); return push(operands.a << operands.b); case OP_rsh_signed: operands = pop_a_b(); return push(operands.a >> operands.b); case OP_rsh_unsigned: operands = pop_a_b(); return push(uint64_t(operands.a) >> operands.b); case OP_log_not: return push(!pop_a()); case OP_bit_and: operands = pop_a_b(); return push(operands.a & operands.b); case OP_bit_or: operands = pop_a_b(); return push(operands.a | operands.b); case OP_bit_xor: operands = pop_a_b(); return push(operands.a ^ operands.b); case OP_bit_not: return push(~pop_a()); case OP_equal: operands = pop_a_b(); return push(operands.a == operands.b); case OP_less_signed: operands = pop_a_b(); return push(operands.a < operands.b); case OP_less_unsigned: operands = pop_a_b(); return push(uint64_t(operands.a) < uint64_t(operands.b)); case OP_ext: { int64_t n = nonzero(fetch()); if (n >= 64) { return; } int64_t a = pop_a(); int64_t n_mask = (int64_t(1) << n) - 1; int sign_bit = (a >> (n - 1)) & 1; return push((sign_bit * ~n_mask) | (a & n_mask)); } case OP_zero_ext: { int64_t n = fetch(); if (n >= 64) { return; } int64_t a = pop_a(); int64_t n_mask = (int64_t(1) << n) - 1; return push(a & n_mask); } case OP_ref8: return load(t); case OP_ref16: return load(t); case OP_ref32: return load(t); case OP_ref64: return load(t); case OP_dup: return pick(0); case OP_swap: operands = pop_a_b(); push(operands.b); return push(operands.a); case OP_pop: pop_a(); return; case OP_pick: return pick(fetch()); case OP_rot: { int64_t c = pop_a(); int64_t b = pop_a(); int64_t a = pop_a(); push(c); push(b); return push(a); } case OP_if_goto: { uint16_t offset = fetch(); if (pop_a()) { pc = offset; } return; } case OP_goto: pc = fetch(); return; case OP_const8: return push(fetch()); case OP_const16: return push(fetch()); case OP_const32: return push(fetch()); case OP_const64: return push(fetch()); case OP_reg: { GdbRegisterValue v = GdbServer::get_reg(t->regs(), t->extra_regs(), GdbRegister(fetch())); if (!v.defined) { set_error(); return; } switch (v.size) { case 1: return push(v.value1); case 2: return push(v.value2); case 4: return push(v.value4); case 8: return push(v.value8); } set_error(); return; } case OP_end: end = true; return; default: set_error(); return; } } const vector& bytecode; vector stack; size_t pc; bool error; bool end; }; #ifdef WORKAROUND_GDB_BUGS /* https://sourceware.org/bugzilla/show_bug.cgi?id=18617 means that * gdb generates incorrect operands for OP_ext and OP_zero_ext. * We work around this bug by generating all the alternative programs that gdb * maybe should have generated, and evaluating all of them. If they agree on * the result, we return that as the correct result, otherwise we return * failure. */ static int count_variants(int bits) { int result = 1; if (bits > 8) { ++result; } if (bits > 16) { ++result; } if (bits > 32) { ++result; } return result; } template static T fetch(const uint8_t* data, size_t size, size_t pc) { if (pc + sizeof(T) > size) { return T(-1); } T v = 0; for (size_t i = 0; i < sizeof(T); ++i) { v = (v << 8) | data[pc + i]; } return v; } GdbExpression::GdbExpression(const uint8_t* data, size_t size) { vector instruction_starts; instruction_starts.resize(size); fill(instruction_starts.begin(), instruction_starts.end(), false); int64_t num_variants = 1; vector unvisited; unvisited.push_back(0); while (!unvisited.empty()) { size_t pc = unvisited.back(); unvisited.pop_back(); if (pc >= instruction_starts.size() || instruction_starts[pc]) { continue; } instruction_starts[pc] = true; switch (data[pc]) { case OP_ext: case OP_zero_ext: if (pc + 1 < size) { num_variants *= count_variants(data[pc + 1]); if (num_variants > 64) { // Too many variants, giving up on this expression return; } } unvisited.push_back(pc + 2); break; case OP_pick: case OP_const8: unvisited.push_back(pc + 2); break; case OP_if_goto: unvisited.push_back(fetch(data, size, pc + 1)); unvisited.push_back(pc + 3); break; case OP_goto: unvisited.push_back(fetch(data, size, pc + 1)); break; case OP_const16: case OP_reg: unvisited.push_back(pc + 3); break; case OP_const32: unvisited.push_back(pc + 5); break; case OP_const64: unvisited.push_back(pc + 9); break; case OP_end: break; default: unvisited.push_back(pc + 1); break; } } bytecode_variants.push_back(vector(data, data + size)); for (size_t i = 0; i < size; ++i) { if (!instruction_starts[i]) { continue; } if ((data[i] == OP_ext || data[i] == OP_zero_ext) && i + 1 < size) { uint8_t bits = data[i + 1]; vector> variants; for (auto& b : bytecode_variants) { // gdb perhaps should have used a smaller type width here --- 8, 16 or // 32 bits. if (bits > 8) { vector v = b; v[i + 1] = 8; variants.push_back(std::move(v)); } if (bits > 16) { vector v = b; v[i + 1] = 16; variants.push_back(std::move(v)); } if (bits > 32) { vector v = b; v[i + 1] = 32; variants.push_back(std::move(v)); } variants.push_back(std::move(b)); } bytecode_variants = std::move(variants); } } } #else GdbExpression::GdbExpression(const uint8_t* data, size_t size) { bytecode_variants.push_back(vector(data, data + size)); } #endif bool GdbExpression::evaluate(Task* t, Value* result) const { if (bytecode_variants.empty()) { return false; } bool first = true; for (auto& b : bytecode_variants) { ExpressionState state(b); for (int steps = 0; !state.end; ++steps) { if (steps >= 10000 || state.error) { return false; } state.step(t); } Value v = state.pop(); if (state.error) { return false; } if (first) { *result = v; first = false; } else if (*result != v) { return false; } } return true; } } // namespace rr rr-5.7.0/src/GdbExpression.h000066400000000000000000000022071450675474200157010ustar00rootroot00000000000000/* -*- Mode: C++; tab-width: 8; c-basic-offset: 2; indent-tabs-mode: nil; -*- */ #ifndef RR_GDB_EXPRESSION_H_ #define RR_GDB_EXPRESSION_H_ #include #include #include namespace rr { class Task; /** * gdb has a simple bytecode language for writing expressions to be evaluated * in a remote target. This class implements evaluation of such expressions. * See https://sourceware.org/gdb/current/onlinedocs/gdb/Agent-Expressions.html */ class GdbExpression { public: GdbExpression(const uint8_t* data, size_t size); struct Value { Value(int64_t i = 0) : i(i) {} bool operator==(const Value& v) { return i == v.i; } bool operator!=(const Value& v) { return !(*this == v); } int64_t i; }; /** * If evaluation succeeds, store the final result in *result and return true. * Otherwise return false. */ bool evaluate(Task* t, Value* result) const; private: /** * To work around gdb bugs, we may generate and evaluate multiple versions of * the same expression program. */ std::vector> bytecode_variants; }; } // namespace rr #endif // RR_GDB_EXPRESSION_H_ rr-5.7.0/src/GdbInitCommand.cc000066400000000000000000000012211450675474200160750ustar00rootroot00000000000000/* -*- Mode: C++; tab-width: 8; c-basic-offset: 2; indent-tabs-mode: nil; -*- */ #include "Command.h" #include "GdbServer.h" #include "main.h" using namespace std; namespace rr { class GdbInitCommand : public Command { public: virtual int run(vector& args) override; protected: GdbInitCommand(const char* name, const char* help) : Command(name, help) {} static GdbInitCommand singleton; }; GdbInitCommand GdbInitCommand::singleton("gdbinit", " rr gdbinit\n"); int GdbInitCommand::run(vector& args) { while (parse_global_option(args)) { } fputs(GdbServer::init_script().c_str(), stdout); return 0; } } // namespace rr rr-5.7.0/src/GdbRegister.h000066400000000000000000000075211450675474200153320ustar00rootroot00000000000000/* -*- Mode: C++; tab-width: 8; c-basic-offset: 2; indent-tabs-mode: nil; -*- */ #ifndef RR_GDB_REGISTER_H_ #define RR_GDB_REGISTER_H_ namespace rr { /** * This is the register numbering used by GDB. */ enum GdbRegister { DREG_EAX, DREG_ECX, DREG_EDX, DREG_EBX, DREG_ESP, DREG_EBP, DREG_ESI, DREG_EDI, DREG_EIP, DREG_EFLAGS, DREG_CS, DREG_SS, DREG_DS, DREG_ES, DREG_FS, DREG_GS, DREG_FIRST_FXSAVE_REG, DREG_ST0 = DREG_FIRST_FXSAVE_REG, DREG_ST1, DREG_ST2, DREG_ST3, DREG_ST4, DREG_ST5, DREG_ST6, DREG_ST7, // These are the names GDB gives the registers. DREG_FCTRL, DREG_FSTAT, DREG_FTAG, DREG_FISEG, DREG_FIOFF, DREG_FOSEG, DREG_FOOFF, DREG_FOP, DREG_XMM0, DREG_XMM1, DREG_XMM2, DREG_XMM3, DREG_XMM4, DREG_XMM5, DREG_XMM6, DREG_XMM7, DREG_MXCSR, // XXX the last fxsave reg on *x86* DREG_LAST_FXSAVE_REG = DREG_MXCSR, DREG_ORIG_EAX, DREG_YMM0H, DREG_YMM1H, DREG_YMM2H, DREG_YMM3H, DREG_YMM4H, DREG_YMM5H, DREG_YMM6H, DREG_YMM7H, DREG_PKRU, DREG_NUM_LINUX_I386, // Last register we can find in user_regs_struct // (except for orig_eax). DREG_NUM_USER_REGS = DREG_GS + 1, // x86-64 register numbers DREG_RAX = 0, DREG_RBX, DREG_RCX, DREG_RDX, DREG_RSI, DREG_RDI, DREG_RBP, DREG_RSP, DREG_R8, DREG_R9, DREG_R10, DREG_R11, DREG_R12, DREG_R13, DREG_R14, DREG_R15, DREG_RIP, // Things get a little tricky here, because x86-64 has some registers // named identically to its x86 counterpart, but we've used the names // in the x86 register definitions above, and the numbers they need // to represent are different. Hence the unique names here. DREG_64_EFLAGS, DREG_64_CS, DREG_64_SS, DREG_64_DS, DREG_64_ES, DREG_64_FS, DREG_64_GS, DREG_64_FIRST_FXSAVE_REG, DREG_64_ST0 = DREG_64_FIRST_FXSAVE_REG, DREG_64_ST1, DREG_64_ST2, DREG_64_ST3, DREG_64_ST4, DREG_64_ST5, DREG_64_ST6, DREG_64_ST7, // These are the names GDB gives the registers. DREG_64_FCTRL, DREG_64_FSTAT, DREG_64_FTAG, DREG_64_FISEG, DREG_64_FIOFF, DREG_64_FOSEG, DREG_64_FOOFF, DREG_64_FOP, DREG_64_XMM0, DREG_64_XMM1, DREG_64_XMM2, DREG_64_XMM3, DREG_64_XMM4, DREG_64_XMM5, DREG_64_XMM6, DREG_64_XMM7, DREG_64_XMM8, DREG_64_XMM9, DREG_64_XMM10, DREG_64_XMM11, DREG_64_XMM12, DREG_64_XMM13, DREG_64_XMM14, DREG_64_XMM15, DREG_64_MXCSR, DREG_64_LAST_FXSAVE_REG = DREG_64_MXCSR, DREG_ORIG_RAX, DREG_FS_BASE, DREG_GS_BASE, DREG_64_YMM0H, DREG_64_YMM1H, DREG_64_YMM2H, DREG_64_YMM3H, DREG_64_YMM4H, DREG_64_YMM5H, DREG_64_YMM6H, DREG_64_YMM7H, DREG_64_YMM8H, DREG_64_YMM9H, DREG_64_YMM10H, DREG_64_YMM11H, DREG_64_YMM12H, DREG_64_YMM13H, DREG_64_YMM14H, DREG_64_YMM15H, DREG_64_PKRU, DREG_NUM_LINUX_X86_64, // Last register we can find in user_regs_struct (except for orig_rax). DREG_64_NUM_USER_REGS = DREG_64_GS + 1, // aarch64-core.xml DREG_X0 = 0, DREG_X1, DREG_X2, DREG_X3, DREG_X4, DREG_X5, DREG_X6, DREG_X7, DREG_X8, DREG_X9, DREG_X10, DREG_X11, DREG_X12, DREG_X13, DREG_X14, DREG_X15, DREG_X16, DREG_X17, DREG_X18, DREG_X19, DREG_X20, DREG_X21, DREG_X22, DREG_X23, DREG_X24, DREG_X25, DREG_X26, DREG_X27, DREG_X28, DREG_X29, DREG_X30, DREG_SP, DREG_PC, DREG_CPSR, // aarch64-fpu.xml DREG_V0 = 34, DREG_V1, DREG_V2, DREG_V3, DREG_V4, DREG_V5, DREG_V6, DREG_V7, DREG_V8, DREG_V9, DREG_V10, DREG_V11, DREG_V12, DREG_V13, DREG_V14, DREG_V15, DREG_V16, DREG_V17, DREG_V18, DREG_V19, DREG_V20, DREG_V21, DREG_V22, DREG_V23, DREG_V24, DREG_V25, DREG_V26, DREG_V27, DREG_V28, DREG_V29, DREG_V30, DREG_V31, DREG_FPSR, DREG_FPCR, DREG_NUM_LINUX_AARCH64 = DREG_FPCR + 1, }; } // namespace rr #endif /* RR_GDB_REGISTER_H_ */ rr-5.7.0/src/GdbServer.cc000066400000000000000000002313071450675474200151530ustar00rootroot00000000000000/* -*- Mode: C++; tab-width: 8; c-basic-offset: 2; indent-tabs-mode: nil; -*- */ #include "GdbServer.h" #include #include #include #include #include #include #include #include #include #include #include #include #include #include "BreakpointCondition.h" #include "ElfReader.h" #include "Event.h" #include "GdbCommandHandler.h" #include "GdbExpression.h" #include "ReplaySession.h" #include "ReplayTask.h" #include "ScopedFd.h" #include "StringVectorToCharArray.h" #include "Task.h" #include "ThreadGroup.h" #include "core.h" #include "kernel_metadata.h" #include "log.h" #include "util.h" using namespace std; namespace rr { GdbServer::GdbServer(std::unique_ptr& dbg, Task* t) : dbg(std::move(dbg)), debuggee_tguid(t->thread_group()->tguid()), last_continue_tuid(t->tuid()), last_query_tuid(t->tuid()), final_event(UINT32_MAX), stop_replaying_to_target(false), interrupt_pending(false), exit_sigkill_pending(false), emergency_debug_session(&t->session()), file_scope_pid(0) { memset(&stop_siginfo, 0, sizeof(stop_siginfo)); } // Special-sauce macros defined by rr when launching the gdb client, // which implement functionality outside of the gdb remote protocol. // (Don't stare at them too long or you'll go blind ;).) static const string& gdb_rr_macros() { static string s; if (s.empty()) { stringstream ss; ss << GdbCommandHandler::gdb_macros() << "define restart\n" << " run c$arg0\n" << "end\n" << "document restart\n" << "restart at checkpoint N\n" << "checkpoints are created with the 'checkpoint' command\n" << "end\n" << "define seek-ticks\n" << " run t$arg0\n" << "end\n" << "document seek-ticks\n" << "restart at given ticks value\n" << "end\n" << "define jump\n" << " rr-denied jump\n" << "end\n" // In gdb version "Fedora 7.8.1-30.fc21", a raw "run" command // issued before any user-generated resume-execution command // results in gdb hanging just after the inferior hits an internal // gdb breakpoint. This happens outside of rr, with gdb // controlling gdbserver, as well. We work around that by // ensuring *some* resume-execution command has been issued before // restarting the session. But, only if the inferior hasn't // already finished execution ($_thread != 0). If it has and we // issue the "stepi" command, then gdb refuses to restart // execution. << "define hook-run\n" << " rr-hook-run\n" << "end\n" << "define hookpost-continue\n" << " rr-set-suppress-run-hook 1\n" << "end\n" << "define hookpost-step\n" << " rr-set-suppress-run-hook 1\n" << "end\n" << "define hookpost-stepi\n" << " rr-set-suppress-run-hook 1\n" << "end\n" << "define hookpost-next\n" << " rr-set-suppress-run-hook 1\n" << "end\n" << "define hookpost-nexti\n" << " rr-set-suppress-run-hook 1\n" << "end\n" << "define hookpost-finish\n" << " rr-set-suppress-run-hook 1\n" << "end\n" << "define hookpost-reverse-continue\n" << " rr-set-suppress-run-hook 1\n" << "end\n" << "define hookpost-reverse-step\n" << " rr-set-suppress-run-hook 1\n" << "end\n" << "define hookpost-reverse-stepi\n" << " rr-set-suppress-run-hook 1\n" << "end\n" << "define hookpost-reverse-finish\n" << " rr-set-suppress-run-hook 1\n" << "end\n" << "define hookpost-run\n" << " rr-set-suppress-run-hook 0\n" << "end\n" << "set unwindonsignal on\n" << "handle SIGURG stop\n" << "set prompt (rr) \n" // Try both "set target-async" and "maint set target-async" since // that changed recently. << "python\n" << "import re\n" << "m = re.compile(" << "'[^0-9]*([0-9]+)\\.([0-9]+)(\\.([0-9]+))?'" << ").match(gdb.VERSION)\n" << "ver = int(m.group(1))*10000 + int(m.group(2))*100\n" << "if m.group(4):\n" << " ver = ver + int(m.group(4))\n" << "\n" << "if ver == 71100:\n" << " gdb.write(" << "'This version of gdb (7.11.0) has known bugs that break rr. " << "Install 7.11.1 or later.\\n', gdb.STDERR)\n" << "\n" << "if ver < 71101:\n" << " gdb.execute('set target-async 0')\n" << " gdb.execute('maint set target-async 0')\n" << "end\n"; s = ss.str(); } return s; } /** * Attempt to find the value of |regname| (a DebuggerRegister * name), and if so (i) write it to |buf|; (ii) * set |*defined = true|; (iii) return the size of written * data. If |*defined == false|, the value of |buf| is * meaningless. * * This helper can fetch the values of both general-purpose * and "extra" registers. * * NB: |buf| must be large enough to hold the largest register * value that can be named by |regname|. */ static size_t get_reg(const Registers& regs, const ExtraRegisters& extra_regs, uint8_t* buf, GdbRegister regname, bool* defined) { size_t num_bytes = regs.read_register(buf, regname, defined); if (!*defined) { num_bytes = extra_regs.read_register(buf, regname, defined); } return num_bytes; } static bool set_reg(Task* target, const GdbRegisterValue& reg) { if (!reg.defined) { return false; } Registers regs = target->regs(); if (regs.write_register(reg.name, reg.value, reg.size)) { target->set_regs(regs); return true; } ExtraRegisters extra_regs = target->extra_regs(); if (extra_regs.write_register(reg.name, reg.value, reg.size)) { target->set_extra_regs(extra_regs); return true; } LOG(warn) << "Unhandled register name " << reg.name; return false; } /** * Return the register |which|, which may not have a defined value. */ GdbRegisterValue GdbServer::get_reg(const Registers& regs, const ExtraRegisters& extra_regs, GdbRegister which) { GdbRegisterValue reg; memset(®, 0, sizeof(reg)); reg.name = which; reg.size = rr::get_reg(regs, extra_regs, ®.value[0], which, ®.defined); return reg; } static GdbThreadId get_threadid(const Session& session, const TaskUid& tuid) { Task* t = session.find_task(tuid); pid_t pid = t ? t->tgid() : GdbThreadId::ANY.pid; return GdbThreadId(pid, tuid.tid()); } static GdbThreadId get_threadid(Task* t) { return GdbThreadId(t->tgid(), t->rec_tid); } static bool matches_threadid(const GdbThreadId& tid, const GdbThreadId& target) { return (target.pid <= 0 || target.pid == tid.pid) && (target.tid <= 0 || target.tid == tid.tid); } static bool matches_threadid(Task* t, const GdbThreadId& target) { GdbThreadId tid = get_threadid(t); return matches_threadid(tid, target); } static WatchType watchpoint_type(GdbRequestType req) { switch (req) { case DREQ_SET_HW_BREAK: case DREQ_REMOVE_HW_BREAK: return WATCH_EXEC; case DREQ_SET_WR_WATCH: case DREQ_REMOVE_WR_WATCH: return WATCH_WRITE; case DREQ_REMOVE_RDWR_WATCH: case DREQ_SET_RDWR_WATCH: // NB: x86 doesn't support read-only watchpoints (who would // ever want to use one?) so we treat them as readwrite // watchpoints and hope that gdb can figure out what's going // on. That is, if a user ever tries to set a read // watchpoint. case DREQ_REMOVE_RD_WATCH: case DREQ_SET_RD_WATCH: return WATCH_READWRITE; default: FATAL() << "Unknown dbg request " << req; return WatchType(-1); // not reached } } static void maybe_singlestep_for_event(Task* t, GdbRequest* req) { if (!t->session().is_replaying()) { return; } auto rt = static_cast(t); if (trace_instructions_up_to_event( rt->session().current_trace_frame().time())) { fputs("Stepping: ", stderr); t->regs().print_register_file_compact(stderr); fprintf(stderr, " ticks:%" PRId64 "\n", t->tick_count()); *req = GdbRequest(DREQ_CONT); req->suppress_debugger_stop = true; req->cont().actions.push_back( GdbContAction(ACTION_STEP, get_threadid(t->session(), t->tuid()))); } } void GdbServer::dispatch_regs_request(const Registers& regs, const ExtraRegisters& extra_regs) { GdbRegister end; // Send values for all the registers we sent XML register descriptions for. // Those descriptions are controlled by GdbConnection::cpu_features(). bool have_PKU = dbg->cpu_features() & GdbConnection::CPU_PKU; bool have_AVX = dbg->cpu_features() & GdbConnection::CPU_AVX; switch (regs.arch()) { case x86: end = have_PKU ? DREG_PKRU : (have_AVX ? DREG_YMM7H : DREG_ORIG_EAX); break; case x86_64: end = have_PKU ? DREG_64_PKRU : (have_AVX ? DREG_64_YMM15H : DREG_GS_BASE); break; case aarch64: end = DREG_FPCR; break; default: FATAL() << "Unknown architecture"; return; } vector rs; for (GdbRegister r = GdbRegister(0); r <= end; r = GdbRegister(r + 1)) { rs.push_back(get_reg(regs, extra_regs, r)); } dbg->reply_get_regs(rs); } class GdbBreakpointCondition : public BreakpointCondition { public: GdbBreakpointCondition(const vector>& bytecodes) { for (auto& b : bytecodes) { expressions.push_back(GdbExpression(b.data(), b.size())); } } virtual bool evaluate(Task* t) const override { for (auto& e : expressions) { GdbExpression::Value v; // Break if evaluation fails or the result is nonzero if (!e.evaluate(t, &v) || v.i != 0) { return true; } } return false; } private: vector expressions; }; static unique_ptr breakpoint_condition( const GdbRequest& request) { if (request.watch().conditions.empty()) { return nullptr; } return unique_ptr( new GdbBreakpointCondition(request.watch().conditions)); } static bool search_memory(Task* t, const MemoryRange& where, const vector& find, remote_ptr* result) { vector buf; buf.resize(page_size() + find.size() - 1); for (const auto& m : t->vm()->maps()) { MemoryRange r = MemoryRange(m.map.start(), m.map.end() + find.size() - 1) .intersect(where); // We basically read page by page here, but we read past the end of the // page to handle the case where a found string crosses page boundaries. // This approach isn't great for handling long search strings but gdb's find // command isn't really suited to that. // Reading page by page lets us avoid problems where some pages in a // mapping aren't readable (e.g. reading beyond end of file). while (r.size() >= find.size()) { ssize_t nread = t->read_bytes_fallible( r.start(), std::min(buf.size(), r.size()), buf.data()); if (nread >= ssize_t(find.size())) { void* found = memmem(buf.data(), nread, find.data(), find.size()); if (found) { *result = r.start() + (static_cast(found) - buf.data()); return true; } } r = MemoryRange( std::min(r.end(), floor_page_size(r.start()) + page_size()), r.end()); } } return false; } static bool is_in_patch_stubs(Task* t, remote_code_ptr ip) { auto p = ip.to_data_ptr(); return t->vm()->has_mapping(p) && (t->vm()->mapping_flags_of(p) & AddressSpace::Mapping::IS_PATCH_STUBS); } void GdbServer::maybe_intercept_mem_request(Task* target, const GdbRequest& req, vector* result) { DEBUG_ASSERT(req.mem_.len >= result->size()); /* Crazy hack! * When gdb tries to read the word at the top of the stack, and we're in our * dynamically-generated stub code, tell it the value is zero, so that gdb's * stack-walking code doesn't find a bogus value that it treats as a return * address and sets a breakpoint there, potentially corrupting program data. * gdb sometimes reads a whole block of memory around the stack pointer so * handle cases where the top-of-stack word is contained in a larger range. */ size_t size = word_size(target->arch()); if (target->regs().sp().as_int() >= req.mem_.addr && target->regs().sp().as_int() + size <= req.mem_.addr + result->size() && is_in_patch_stubs(target, target->ip())) { memset(result->data() + target->regs().sp().as_int() - req.mem_.addr, 0, size); } } void GdbServer::dispatch_debugger_request(Session& session, const GdbRequest& req, ReportState state) { DEBUG_ASSERT(!req.is_resume_request()); // These requests don't require a target task. switch (req.type) { case DREQ_RESTART: DEBUG_ASSERT(false); return; // unreached case DREQ_GET_CURRENT_THREAD: dbg->reply_get_current_thread(get_threadid(session, last_continue_tuid)); return; case DREQ_GET_OFFSETS: /* TODO */ dbg->reply_get_offsets(); return; case DREQ_GET_THREAD_LIST: { vector tids; if (state != REPORT_THREADS_DEAD) { for (auto& kv : session.tasks()) { tids.push_back(get_threadid(session, kv.second->tuid())); } } dbg->reply_get_thread_list(tids); return; } case DREQ_INTERRUPT: { Task* t = session.find_task(last_continue_tuid); ASSERT(t, session.is_diversion()) << "Replay interrupts should be handled at a higher level"; DEBUG_ASSERT(!t || t->thread_group()->tguid() == debuggee_tguid); dbg->notify_stop(t ? get_threadid(t) : GdbThreadId(), 0); memset(&stop_siginfo, 0, sizeof(stop_siginfo)); if (t) { last_query_tuid = last_continue_tuid = t->tuid(); } return; } case DREQ_GET_EXEC_FILE: { // We shouldn't normally receive this since we try to pass the exe file // name on gdb's command line, but the user might start gdb manually // and this is easy to support in case some other debugger or // configuration needs it. Task* t = nullptr; if (req.target.tid) { ThreadGroup* tg = session.find_thread_group(req.target.tid); if (tg) { t = *tg->task_set().begin(); } } else { t = session.find_task(last_continue_tuid); } if (t) { dbg->reply_get_exec_file(t->vm()->exe_image()); } else { dbg->reply_get_exec_file(string()); } return; } case DREQ_FILE_SETFS: // Only the filesystem as seen by the remote stub is supported currently file_scope_pid = req.file_setfs().pid; dbg->reply_setfs(0); return; case DREQ_FILE_OPEN: // We only support reading files if (req.file_open().flags == O_RDONLY) { Task* t = session.find_task(last_continue_tuid); int fd = open_file(session, t, req.file_open().file_name); dbg->reply_open(fd, fd >= 0 ? 0 : ENOENT); } else { dbg->reply_open(-1, EACCES); } return; case DREQ_FILE_PREAD: { GdbRequest::FilePread read_req = req.file_pread(); { auto it = files.find(read_req.fd); if (it != files.end()) { size_t size = min(read_req.size, 1024 * 1024); vector data; data.resize(size); ssize_t bytes = read_to_end(it->second, read_req.offset, data.data(), size); dbg->reply_pread(data.data(), bytes, bytes >= 0 ? 0 : -errno); return; } } { auto it = memory_files.find(read_req.fd); if (it != memory_files.end() && timeline.is_running()) { // Search our mmap stream for a record that can satisfy this request TraceReader tmp_reader(timeline.current_session().trace_reader()); tmp_reader.rewind(); while (true) { TraceReader::MappedData data; bool found; KernelMapping km = tmp_reader.read_mapped_region( &data, &found, TraceReader::DONT_VALIDATE, TraceReader::ANY_TIME); if (!found) break; if (it->second == FileId(km)) { if (data.source != TraceReader::SOURCE_FILE) { LOG(warn) << "Not serving file because it is not a file source"; break; } ScopedFd fd(data.file_name.c_str(), O_RDONLY); vector data; data.resize(read_req.size); LOG(debug) << "Reading " << read_req.size << " bytes at offset " << read_req.offset; ssize_t bytes = read_to_end(fd, read_req.offset, data.data(), read_req.size); if (bytes < (ssize_t)read_req.size) { LOG(warn) << "Requested " << read_req.size << " bytes but only got " << bytes; } dbg->reply_pread(data.data(), bytes, bytes >= 0 ? 0 : -errno); return; } } LOG(warn) << "No mapping found"; } } LOG(warn) << "Unknown file descriptor requested"; dbg->reply_pread(nullptr, 0, EIO); return; } case DREQ_FILE_CLOSE: { { auto it = files.find(req.file_close().fd); if (it != files.end()) { files.erase(it); dbg->reply_close(0); return; } } { auto it = memory_files.find(req.file_close().fd); if (it != memory_files.end()) { memory_files.erase(it); dbg->reply_close(0); return; } } LOG(warn) << "Unable to find file descriptor for close"; dbg->reply_close(EBADF); return; } default: /* fall through to next switch stmt */ break; } bool is_query = req.type != DREQ_SET_CONTINUE_THREAD; Task* target = req.target.tid > 0 ? session.find_task(req.target.tid) : session.find_task(is_query ? last_query_tuid : last_continue_tuid); if (target) { if (is_query) { last_query_tuid = target->tuid(); } else { last_continue_tuid = target->tuid(); } } // These requests query or manipulate which task is the // target, so it's OK if the task doesn't exist. switch (req.type) { case DREQ_GET_IS_THREAD_ALIVE: dbg->reply_get_is_thread_alive(target != nullptr); return; case DREQ_GET_THREAD_EXTRA_INFO: dbg->reply_get_thread_extra_info(target->name()); return; case DREQ_SET_CONTINUE_THREAD: dbg->reply_select_thread(target != nullptr); return; case DREQ_SET_QUERY_THREAD: dbg->reply_select_thread(target != nullptr); return; default: // fall through to next switch stmt break; } // These requests require a valid target task. We don't trust // the debugger to use the information provided above to only // query valid tasks. if (!target) { dbg->notify_no_such_thread(req); return; } switch (req.type) { case DREQ_GET_AUXV: { dbg->reply_get_auxv(target->vm()->saved_auxv()); return; } case DREQ_GET_MEM: { vector mem; mem.resize(req.mem().len); ssize_t nread = target->read_bytes_fallible(req.mem().addr, req.mem().len, mem.data()); mem.resize(max(ssize_t(0), nread)); target->vm()->replace_breakpoints_with_original_values( mem.data(), mem.size(), req.mem().addr); maybe_intercept_mem_request(target, req, &mem); dbg->reply_get_mem(mem); return; } case DREQ_SET_MEM: { // gdb has been observed to send requests of length 0 at // odd times // (e.g. before sending the magic write to create a checkpoint) if (req.mem().len == 0) { dbg->reply_set_mem(true); return; } // If an address is recognised as belonging to a SystemTap semaphore it's // because it was detected by the audit library during recording and // pre-incremented. if (target->vm()->is_stap_semaphore(req.mem().addr)) { LOG(info) << "Suppressing write to SystemTap semaphore"; dbg->reply_set_mem(true); return; } // We only allow the debugger to write memory if the // memory will be written to an diversion session. // Arbitrary writes to replay sessions cause // divergence. if (!session.is_diversion()) { LOG(error) << "Attempt to write memory outside diversion session"; dbg->reply_set_mem(false); return; } LOG(debug) << "Writing " << req.mem().len << " bytes to " << HEX(req.mem().addr); // TODO fallible target->write_bytes_helper(req.mem().addr, req.mem().len, req.mem().data.data()); dbg->reply_set_mem(true); return; } case DREQ_SEARCH_MEM: { remote_ptr addr; bool found = search_memory(target, MemoryRange(req.mem().addr, req.mem().len), req.mem().data, &addr); dbg->reply_search_mem(found, addr); return; } case DREQ_GET_REG: { GdbRegisterValue reg = get_reg(target->regs(), target->extra_regs(), req.reg().name); dbg->reply_get_reg(reg); return; } case DREQ_GET_REGS: { dispatch_regs_request(target->regs(), target->extra_regs()); return; } case DREQ_SET_REG: { if (!session.is_diversion()) { // gdb sets orig_eax to -1 during a restart. For a // replay session this is not correct (we might be // restarting from an rr checkpoint inside a system // call, and we must not tamper with replay state), so // just ignore it. if ((target->arch() == x86 && req.reg().name == DREG_ORIG_EAX) || (target->arch() == x86_64 && req.reg().name == DREG_ORIG_RAX)) { dbg->reply_set_reg(true); return; } LOG(error) << "Attempt to write register outside diversion session"; dbg->reply_set_reg(false); return; } if (!set_reg(target, req.reg())) { LOG(warn) << "Attempt to set register " << req.reg().name << " failed"; } dbg->reply_set_reg(true /*currently infallible*/); return; } case DREQ_GET_STOP_REASON: { dbg->reply_get_stop_reason(get_threadid(session, last_continue_tuid), stop_siginfo.si_signo); return; } case DREQ_SET_SW_BREAK: { ASSERT(target, req.watch().kind == bkpt_instruction_length(target->arch())) << "Debugger setting bad breakpoint insn"; // Mirror all breakpoint/watchpoint sets/unsets to the target process // if it's not part of the timeline (i.e. it's a diversion). ReplayTask* replay_task = timeline.current_session().find_task(target->tuid()); bool ok = timeline.add_breakpoint(replay_task, req.watch().addr, breakpoint_condition(req)); if (ok && &session != &timeline.current_session()) { bool diversion_ok = target->vm()->add_breakpoint(req.watch().addr, BKPT_USER); ASSERT(target, diversion_ok); } dbg->reply_watchpoint_request(ok); return; } case DREQ_SET_HW_BREAK: case DREQ_SET_RD_WATCH: case DREQ_SET_WR_WATCH: case DREQ_SET_RDWR_WATCH: { ReplayTask* replay_task = timeline.current_session().find_task(target->tuid()); bool ok = timeline.add_watchpoint( replay_task, req.watch().addr, req.watch().kind, watchpoint_type(req.type), breakpoint_condition(req)); if (ok && &session != &timeline.current_session()) { bool diversion_ok = target->vm()->add_watchpoint( req.watch().addr, req.watch().kind, watchpoint_type(req.type)); ASSERT(target, diversion_ok); } dbg->reply_watchpoint_request(ok); return; } case DREQ_REMOVE_SW_BREAK: { ReplayTask* replay_task = timeline.current_session().find_task(target->tuid()); timeline.remove_breakpoint(replay_task, req.watch().addr); if (&session != &timeline.current_session()) { target->vm()->remove_breakpoint(req.watch().addr, BKPT_USER); } dbg->reply_watchpoint_request(true); return; } case DREQ_REMOVE_HW_BREAK: case DREQ_REMOVE_RD_WATCH: case DREQ_REMOVE_WR_WATCH: case DREQ_REMOVE_RDWR_WATCH: { ReplayTask* replay_task = timeline.current_session().find_task(target->tuid()); timeline.remove_watchpoint(replay_task, req.watch().addr, req.watch().kind, watchpoint_type(req.type)); if (&session != &timeline.current_session()) { target->vm()->remove_watchpoint(req.watch().addr, req.watch().kind, watchpoint_type(req.type)); } dbg->reply_watchpoint_request(true); return; } case DREQ_READ_SIGINFO: { vector si_bytes; si_bytes.resize(req.mem().len); memset(si_bytes.data(), 0, si_bytes.size()); memcpy(si_bytes.data(), &stop_siginfo, min(si_bytes.size(), sizeof(stop_siginfo))); dbg->reply_read_siginfo(si_bytes); return; } case DREQ_WRITE_SIGINFO: LOG(warn) << "WRITE_SIGINFO request outside of diversion session"; dbg->reply_write_siginfo(); return; case DREQ_RR_CMD: dbg->reply_rr_cmd( GdbCommandHandler::process_command(*this, target, req.text())); return; #ifdef PROC_SERVICE_H case DREQ_QSYMBOL: { // When gdb sends "qSymbol::", it means that gdb is ready to // respond to symbol requests. This can be sent multiple times // during the course of a session -- gdb sends it whenever // something in the inferior has changed, making it possible // that previous failed symbol lookups could now succeed. In // response to a qSymbol request from gdb, we either send back a // qSymbol response, requesting the address of a symbol; or we // send back OK. We have to do this as an ordinary response and // maintain our own state explicitly, as opposed to simply // reading another packet from gdb, because when gdb looks up a // symbol it might send other requests that must be served. So, // we keep a copy of the symbol names, and an iterator into this // copy. When gdb sends a plain "qSymbol::" packet, because gdb // has detected some change in the inferior state that might // enable more symbol lookups, we restart the iterator. if (!thread_db) { thread_db = std::unique_ptr(new ThreadDb(debuggee_tguid.tid())); } const string& name = req.sym().name; if (req.sym().has_address) { // Got a response holding a previously-requested symbol's name // and address. thread_db->register_symbol(name, req.sym().address); } else if (name == "") { // Plain "qSymbol::" request. symbols = thread_db->get_symbols_and_clear_map(target->thread_group().get()); symbols_iter = symbols.begin(); } if (symbols_iter == symbols.end()) { dbg->qsymbols_finished(); } else { string symbol = *symbols_iter++; dbg->send_qsymbol(symbol); } return; } case DREQ_TLS: { if (!thread_db) { thread_db = std::unique_ptr(new ThreadDb(debuggee_tguid.tid())); } remote_ptr address; bool ok = thread_db->get_tls_address(target->thread_group().get(), target->rec_tid, req.tls().offset, req.tls().load_module, &address); dbg->reply_tls_addr(ok, address); return; } #endif default: FATAL() << "Unknown debugger request " << req.type; } } static bool any_action_targets_match(const Session& session, const TaskUid& tuid, const vector& actions) { GdbThreadId tid = get_threadid(session, tuid); return any_of(actions.begin(), actions.end(), [tid](GdbContAction action) { return matches_threadid(tid, action.target); }); } static Task* find_first_task_matching_target( const Session& session, const vector& actions) { const Session::TaskMap& tasks = session.tasks(); auto it = find_first_of( tasks.begin(), tasks.end(), actions.begin(), actions.end(), [](Session::TaskMap::value_type task_pair, GdbContAction action) { return matches_threadid(task_pair.second, action.target); }); return it != tasks.end() ? it->second : nullptr; } bool GdbServer::diverter_process_debugger_requests( DiversionSession& diversion_session, uint32_t& diversion_refcount, GdbRequest* req) { while (true) { *req = dbg->get_request(); if (req->is_resume_request()) { const vector& actions = req->cont().actions; DEBUG_ASSERT(actions.size() > 0); // GDB may ask us to resume more than one task, so we have to // choose one. We give priority to the task last resumed, as // this is likely to be the context in which GDB is executing // code; selecting any other task runs the risk of resuming // replay, denying the diverted code an opportunity to complete // and end the diversion session. if (!any_action_targets_match(diversion_session, last_continue_tuid, actions)) { // If none of the resumption targets match the task last // resumed, we simply choose any matching task. This ensures // that GDB (and the user) can choose an arbitrary thread to // serve as the context of the code being evaluated. // TODO: maybe it makes sense to try and select the matching // task that was most recently resumed, or possibly the // matching task with an event in the replay trace nearest to // 'now'. Task* task = find_first_task_matching_target(diversion_session, actions); DEBUG_ASSERT(task != nullptr); last_continue_tuid = task->tuid(); } return diversion_refcount > 0; } switch (req->type) { case DREQ_RESTART: case DREQ_DETACH: diversion_refcount = 0; return false; case DREQ_READ_SIGINFO: { LOG(debug) << "Adding ref to diversion session"; ++diversion_refcount; // TODO: maybe share with replayer.cc? vector si_bytes; si_bytes.resize(req->mem().len); memset(si_bytes.data(), 0, si_bytes.size()); dbg->reply_read_siginfo(si_bytes); continue; } case DREQ_SET_QUERY_THREAD: { if (req->target.tid) { Task* next = diversion_session.find_task(req->target.tid); if (next) { last_query_tuid = next->tuid(); } } break; } case DREQ_WRITE_SIGINFO: LOG(debug) << "Removing reference to diversion session ..."; DEBUG_ASSERT(diversion_refcount > 0); --diversion_refcount; if (diversion_refcount == 0) { LOG(debug) << " ... dying at next continue request"; } dbg->reply_write_siginfo(); continue; case DREQ_RR_CMD: { DEBUG_ASSERT(req->type == DREQ_RR_CMD); Task* task = diversion_session.find_task(last_continue_tuid); if (task) { std::string reply = GdbCommandHandler::process_command(*this, task, req->text()); // Certain commands cause the diversion to end immediately // while other commands must work within a diversion. if (reply == GdbCommandHandler::cmd_end_diversion()) { diversion_refcount = 0; return false; } dbg->reply_rr_cmd(reply); continue; } else { diversion_refcount = 0; return false; } break; } default: break; } dispatch_debugger_request(diversion_session, *req, REPORT_NORMAL); } } static bool is_last_thread_exit(const BreakStatus& break_status) { // The task set may be empty if the task has already exited. return break_status.task_exit && break_status.task_context.thread_group->task_set().size() <= 1; } static Task* is_in_exec(ReplayTimeline& timeline) { Task* t = timeline.current_session().current_task(); if (!t) { return nullptr; } return timeline.current_session().next_step_is_successful_exec_syscall_exit() ? t : nullptr; } void GdbServer::maybe_notify_stop(const GdbRequest& req, const BreakStatus& break_status) { bool do_stop = false; remote_ptr watch_addr; char watch[1024]; watch[0] = '\0'; if (!break_status.watchpoints_hit.empty()) { do_stop = true; memset(&stop_siginfo, 0, sizeof(stop_siginfo)); stop_siginfo.si_signo = SIGTRAP; watch_addr = break_status.watchpoints_hit[0].addr; bool any_hw_break = false; for (const auto& w : break_status.watchpoints_hit) { if (w.type == WATCH_EXEC) { any_hw_break = true; } } if (dbg->hwbreak_supported() && any_hw_break) { snprintf(watch, sizeof(watch) - 1, "hwbreak:;"); } else if (watch_addr) { snprintf(watch, sizeof(watch) - 1, "watch:%" PRIxPTR ";", watch_addr.as_int()); } LOG(debug) << "Stopping for watchpoint at " << watch_addr; } if (break_status.breakpoint_hit || break_status.singlestep_complete) { do_stop = true; memset(&stop_siginfo, 0, sizeof(stop_siginfo)); stop_siginfo.si_signo = SIGTRAP; if (break_status.breakpoint_hit) { if (dbg->swbreak_supported()) { snprintf(watch, sizeof(watch) - 1, "swbreak:;"); } LOG(debug) << "Stopping for breakpoint"; } else { LOG(debug) << "Stopping for singlestep"; } } if (break_status.signal) { do_stop = true; stop_siginfo = *break_status.signal; LOG(debug) << "Stopping for signal " << stop_siginfo; } if (is_last_thread_exit(break_status)) { if (break_status.task_context.session->is_diversion()) { // If the last task of a diversion session has exited, we need // to make sure GDB knows it's unrecoverable. There's no good // way to do this: a stop is insufficient, but an inferior exit // typically signals the end of a debugging session. Using the // latter approach appears to work, but stepping through GDB's // processing of the event seems to indicate it isn't really // supposed to. FIXME. LOG(debug) << "Last task of diversion exiting. " << "Notifying exit with synthetic SIGKILL"; dbg->notify_exit_signal(SIGKILL); return; } else if (dbg->features().reverse_execution) { do_stop = true; memset(&stop_siginfo, 0, sizeof(stop_siginfo)); if (req.cont().run_direction == RUN_FORWARD) { // The exit of the last task in a thread group generates a fake SIGKILL, // when reverse-execution is enabled, because users often want to run // backwards from the end of the task. stop_siginfo.si_signo = SIGKILL; LOG(debug) << "Stopping for synthetic SIGKILL"; } else { // The start of the debuggee task-group should trigger a silent stop. stop_siginfo.si_signo = 0; LOG(debug) << "Stopping at start of execution while running backwards"; } } } Task* t = break_status.task(); Task* in_exec_task = is_in_exec(timeline); if (in_exec_task) { do_stop = true; memset(&stop_siginfo, 0, sizeof(stop_siginfo)); t = in_exec_task; LOG(debug) << "Stopping at exec"; } if (do_stop && t->thread_group()->tguid() == debuggee_tguid) { /* Notify the debugger and process any new requests * that might have triggered before resuming. */ dbg->notify_stop(get_threadid(t), stop_siginfo.si_signo, watch); last_query_tuid = last_continue_tuid = t->tuid(); } } static RunCommand compute_run_command_from_actions(Task* t, const GdbRequest& req, int* signal_to_deliver) { for (auto& action : req.cont().actions) { if (matches_threadid(t, action.target)) { // We can only run task |t|; neither diversion nor replay sessions // support running multiple threads. So even if gdb tells us to continue // multiple threads, we don't do that. *signal_to_deliver = action.signal_to_deliver; return action.type == ACTION_STEP ? RUN_SINGLESTEP : RUN_CONTINUE; } } // gdb told us to run (or step) some thread that's not |t|, without resuming // |t|. It sometimes does this even though its target thread is entering a // blocking syscall and |t| must run before gdb's target thread can make // progress. So, allow |t| to run anyway. *signal_to_deliver = 0; return RUN_CONTINUE; } struct AllowedTasks { TaskUid task; // tid 0 means 'any member of debuggee_tguid' RunCommand command; }; static RunCommand compute_run_command_for_reverse_exec( Session& session, const ThreadGroupUid& debuggee_tguid, const GdbRequest& req, vector& allowed_tasks) { // Singlestep if any of the actions request singlestepping. RunCommand result = RUN_CONTINUE; for (auto& action : req.cont().actions) { if (action.target.pid > 0 && action.target.pid != debuggee_tguid.tid()) { continue; } AllowedTasks allowed; allowed.command = RUN_CONTINUE; if (action.type == ACTION_STEP) { allowed.command = result = RUN_SINGLESTEP; } if (action.target.tid > 0) { Task* t = session.find_task(action.target.tid); if (t) { allowed.task = t->tuid(); } } allowed_tasks.push_back(allowed); } return result; } /** * Create a new diversion session using |replay| session as the * template. The |replay| session isn't mutated. * * Execution begins in the new diversion session under the control of * |dbg| starting with initial thread target |task|. The diversion * session ends at the request of |dbg|, and |req| returns the first * request made that wasn't handled by the diversion session. That * is, the first request that should be handled by |replay| upon * resuming execution in that session. */ GdbRequest GdbServer::divert(ReplaySession& replay) { GdbRequest req; LOG(debug) << "Starting debugging diversion for " << &replay; if (timeline.is_running()) { // Ensure breakpoints and watchpoints are applied before we fork the // diversion, to ensure the diversion is consistent with the timeline // breakpoint/watchpoint state. timeline.apply_breakpoints_and_watchpoints(); } DiversionSession::shr_ptr diversion_session = replay.clone_diversion(); uint32_t diversion_refcount = 1; TaskUid saved_query_tuid = last_query_tuid; TaskUid saved_continue_tuid = last_continue_tuid; while (diverter_process_debugger_requests(*diversion_session, diversion_refcount, &req)) { DEBUG_ASSERT(req.is_resume_request()); if (req.cont().run_direction == RUN_BACKWARD) { // We don't support reverse execution in a diversion. Just issue // an immediate stop. dbg->notify_stop(get_threadid(*diversion_session, last_continue_tuid), 0); memset(&stop_siginfo, 0, sizeof(stop_siginfo)); last_query_tuid = last_continue_tuid; continue; } Task* t = diversion_session->find_task(last_continue_tuid); DEBUG_ASSERT(t != nullptr); int signal_to_deliver; RunCommand command = compute_run_command_from_actions(t, req, &signal_to_deliver); auto result = diversion_session->diversion_step(t, command, signal_to_deliver); if (result.status == DiversionSession::DIVERSION_EXITED) { diversion_refcount = 0; maybe_notify_stop(req, result.break_status); if (timeline.is_running()) { // gdb assumes that the process is gone and all its // breakpoints have gone with it. It will set new breakpoints. timeline.remove_breakpoints_and_watchpoints(); } req = GdbRequest(DREQ_NONE); break; } DEBUG_ASSERT(result.status == DiversionSession::DIVERSION_CONTINUE); maybe_notify_stop(req, result.break_status); } LOG(debug) << "... ending debugging diversion"; DEBUG_ASSERT(diversion_refcount == 0); diversion_session->kill_all_tasks(); last_query_tuid = saved_query_tuid; last_continue_tuid = saved_continue_tuid; return req; } /** * Reply to debugger requests until the debugger asks us to resume * execution, detach, restart, or interrupt. */ GdbRequest GdbServer::process_debugger_requests(ReportState state) { while (true) { GdbRequest req = dbg->get_request(); req.suppress_debugger_stop = false; try_lazy_reverse_singlesteps(req); if (req.type == DREQ_READ_SIGINFO) { vector si_bytes; si_bytes.resize(req.mem().len); memset(si_bytes.data(), 0, si_bytes.size()); memcpy(si_bytes.data(), &stop_siginfo, min(si_bytes.size(), sizeof(stop_siginfo))); dbg->reply_read_siginfo(si_bytes); // READ_SIGINFO is usually the start of a diversion. It can also be // triggered by "print $_siginfo" but that is rare so we just assume it's // a diversion start; if "print $_siginfo" happens we'll print the correct // siginfo and then incorrectly start a diversion and go haywire :-(. // Ideally we'd come up with a better way to detect diversions so that // "print $_siginfo" works. req = divert(timeline.current_session()); if (req.type == DREQ_NONE) { continue; } // Carry on to process the request that was rejected by // the diversion session } if (req.is_resume_request()) { Task* t = current_session().find_task(last_continue_tuid); if (t) { maybe_singlestep_for_event(t, &req); } return req; } if (req.type == DREQ_INTERRUPT) { LOG(debug) << " request to interrupt"; return req; } if (req.type == DREQ_RESTART) { // Debugger client requested that we restart execution // from the beginning. Restart our debug session. LOG(debug) << " request to restart at event " << req.restart().param; return req; } if (req.type == DREQ_DETACH) { LOG(debug) << " debugger detached"; dbg->reply_detach(); return req; } dispatch_debugger_request(current_session(), req, state); } } void GdbServer::try_lazy_reverse_singlesteps(GdbRequest& req) { if (!timeline.is_running()) { return; } ReplayTimeline::Mark now; bool need_seek = false; ReplayTask* t = timeline.current_session().current_task(); while (t && req.type == DREQ_CONT && req.cont().run_direction == RUN_BACKWARD && req.cont().actions.size() == 1 && req.cont().actions[0].type == ACTION_STEP && req.cont().actions[0].signal_to_deliver == 0 && matches_threadid(t, req.cont().actions[0].target) && !req.suppress_debugger_stop) { if (!now) { now = timeline.mark(); } ReplayTimeline::Mark previous = timeline.lazy_reverse_singlestep(now, t); if (!previous) { break; } now = previous; need_seek = true; BreakStatus break_status; break_status.task_context = TaskContext(t); break_status.singlestep_complete = true; LOG(debug) << " using lazy reverse-singlestep"; maybe_notify_stop(req, break_status); while (true) { req = dbg->get_request(); req.suppress_debugger_stop = false; if (req.type != DREQ_GET_REGS) { break; } LOG(debug) << " using lazy reverse-singlestep registers"; dispatch_regs_request(now.regs(), now.extra_regs()); } } if (need_seek) { timeline.seek_to_mark(now); } } bool GdbServer::detach_or_restart(const GdbRequest& req, ContinueOrStop* s) { if (DREQ_RESTART == req.type) { restart_session(req); *s = CONTINUE_DEBUGGING; return true; } if (DREQ_DETACH == req.type) { *s = STOP_DEBUGGING; return true; } return false; } GdbServer::ContinueOrStop GdbServer::handle_exited_state( GdbRequest& last_resume_request) { // TODO return real exit code, if it's useful. dbg->notify_exit_code(0); final_event = timeline.current_session().trace_reader().time(); GdbRequest req = process_debugger_requests(REPORT_THREADS_DEAD); ContinueOrStop s; if (detach_or_restart(req, &s)) { last_resume_request = GdbRequest(); return s; } FATAL() << "Received continue/interrupt request after end-of-trace."; return STOP_DEBUGGING; } GdbServer::ContinueOrStop GdbServer::debug_one_step( GdbRequest& last_resume_request) { ReplayResult result; GdbRequest req; if (in_debuggee_end_state) { // Treat the state where the last thread is about to exit like // termination. req = process_debugger_requests(); // If it's a forward execution request, fake the exited state. if (req.is_resume_request() && req.cont().run_direction == RUN_FORWARD) { if (interrupt_pending) { // Just process this. We're getting it after a restart. } else { return handle_exited_state(last_resume_request); } } else { if (req.type != DREQ_DETACH) { in_debuggee_end_state = false; } } // Otherwise (e.g. detach, restart, interrupt or reverse-exec) process // the request as normal. } else if (!interrupt_pending || last_resume_request.type == DREQ_NONE) { req = process_debugger_requests(); } else { req = last_resume_request; } ContinueOrStop s; if (detach_or_restart(req, &s)) { last_resume_request = GdbRequest(); return s; } if (req.is_resume_request()) { last_resume_request = req; } else { DEBUG_ASSERT(req.type == DREQ_INTERRUPT); interrupt_pending = true; req = last_resume_request; DEBUG_ASSERT(req.is_resume_request()); } if (interrupt_pending) { Task* t = timeline.current_session().current_task(); if (t->thread_group()->tguid() == debuggee_tguid) { interrupt_pending = false; dbg->notify_stop(get_threadid(t), in_debuggee_end_state ? SIGKILL : 0); memset(&stop_siginfo, 0, sizeof(stop_siginfo)); return CONTINUE_DEBUGGING; } } if (exit_sigkill_pending) { Task* t = timeline.current_session().current_task(); if (t->thread_group()->tguid() == debuggee_tguid) { exit_sigkill_pending = false; if (req.cont().run_direction == RUN_FORWARD) { dbg->notify_stop(get_threadid(t), SIGKILL); memset(&stop_siginfo, 0, sizeof(stop_siginfo)); return CONTINUE_DEBUGGING; } } } if (req.cont().run_direction == RUN_FORWARD) { if (is_in_exec(timeline) && timeline.current_session().current_task()->thread_group()->tguid() == debuggee_tguid) { // Don't go any further forward. maybe_notify_stop will generate a // stop. result = ReplayResult(); } else { int signal_to_deliver; RunCommand command = compute_run_command_from_actions( timeline.current_session().current_task(), req, &signal_to_deliver); // Ignore gdb's |signal_to_deliver|; we just have to follow the replay. result = timeline.replay_step_forward(command); } if (result.status == REPLAY_EXITED) { return handle_exited_state(last_resume_request); } } else { vector allowed_tasks; // Convert the tids in GdbContActions into TaskUids to avoid issues // if tids get reused. RunCommand command = compute_run_command_for_reverse_exec( timeline.current_session(), debuggee_tguid, req, allowed_tasks); auto stop_filter = [&](ReplayTask* t, const BreakStatus &break_status) -> bool { if (t->thread_group()->tguid() != debuggee_tguid) { return false; } // don't stop for a signal that has been specified by QPassSignal if (break_status.signal && dbg->is_pass_signal(break_status.signal->si_signo)) { LOG(debug) << "Filtering out event for signal " << break_status.signal->si_signo; return false; } // If gdb's requested actions don't allow the task to run, we still // let it run (we can't do anything else, since we're replaying), but // we won't report stops in that task. for (auto& a : allowed_tasks) { if (a.task.tid() == 0 || a.task == t->tuid()) { return true; } } return false; }; auto interrupt_check = [&]() { return dbg->sniff_packet(); }; switch (command) { case RUN_CONTINUE: result = timeline.reverse_continue(stop_filter, interrupt_check); break; case RUN_SINGLESTEP: { Task* t = timeline.current_session().find_task(last_continue_tuid); DEBUG_ASSERT(t); result = timeline.reverse_singlestep( last_continue_tuid, t->tick_count(), stop_filter, interrupt_check); break; } default: DEBUG_ASSERT(0 && "Unknown RunCommand"); } if (result.status == REPLAY_EXITED) { return handle_exited_state(last_resume_request); } } if (!req.suppress_debugger_stop) { maybe_notify_stop(req, result.break_status); } if (req.cont().run_direction == RUN_FORWARD && is_last_thread_exit(result.break_status) && result.break_status.task_context.thread_group->tguid() == debuggee_tguid) { in_debuggee_end_state = true; } return CONTINUE_DEBUGGING; } static bool target_event_reached(const ReplayTimeline& timeline, const GdbServer::Target& target, const ReplayResult& result) { if (target.event == -1) { return is_last_thread_exit(result.break_status) && (target.pid <= 0 || result.break_status.task_context.thread_group->tgid == target.pid); } else { return timeline.current_session().current_trace_frame().time() > target.event; } } bool GdbServer::at_target(ReplayResult& result) { // Don't launch the debugger for the initial rr fork child. // No one ever wants that to happen. if (!timeline.current_session().done_initial_exec()) { return false; } Task* t = timeline.current_session().current_task(); if (!t) { return false; } bool target_is_exit = target.event == -1; if (!(timeline.can_add_checkpoint() || target_is_exit)) { return false; } if (stop_replaying_to_target) { return true; } // When we decide to create the debugger, we may end up // creating a checkpoint. In that case, we want the // checkpoint to retain the state it had *before* we started // replaying the next frame. Otherwise, the TraceIfstream // will be one frame ahead of its tracee tree. // // So we make the decision to create the debugger based on the // frame we're *about to* replay, without modifying the // TraceIfstream. // NB: we'll happily attach to whichever task within the // group happens to be scheduled here. We don't take // "attach to process" to mean "attach to thread-group // leader". return target_event_reached(timeline, target, result) && (!target.pid || t->tgid() == target.pid) && (!target.require_exec || t->execed()) && // Ensure we're at the start of processing an event. We don't // want to attach while we're finishing an exec() since that's a // slightly confusing state for ReplayTimeline's reverse execution. (!timeline.current_session().current_step_key().in_execution() || target_is_exit); } /** * The trace has reached the event at which the user wanted to start debugging. * Set up the appropriate state. */ void GdbServer::activate_debugger() { TraceFrame next_frame = timeline.current_session().current_trace_frame(); FrameTime event_now = next_frame.time(); Task* t = timeline.current_session().current_task(); if (target.event || target.pid) { if (stop_replaying_to_target) { fprintf(stderr, "\a\n" "--------------------------------------------------\n" " ---> Interrupted; attached to NON-TARGET process %d at event %llu.\n" "--------------------------------------------------\n", t->tgid(), (long long)event_now); } else if (target.event >= 0) { fprintf(stderr, "\a\n" "--------------------------------------------------\n" " ---> Reached target process %d at event %llu.\n" "--------------------------------------------------\n", t->tgid(), (long long)event_now); } else { ASSERT(t, target.event == -1); fprintf(stderr, "\a\n" "--------------------------------------------------\n" " ---> Reached exit of target process %d at event %llu.\n" "--------------------------------------------------\n", t->tgid(), (long long)event_now); exit_sigkill_pending = true; } } // Store the current tgid and event as the "execution target" // for the next replay session, if we end up restarting. This // allows us to determine if a later session has reached this // target without necessarily replaying up to this point. target.pid = t->tgid(); target.require_exec = false; target.event = event_now; last_query_tuid = last_continue_tuid = t->tuid(); // Have the "checkpoint" be the original replay // session, and then switch over to using the cloned // session. The cloned tasks will look like children // of the clonees, so this scheme prevents |pstree| // output from getting /too/ far out of whack. const char* where = "???"; if (timeline.can_add_checkpoint()) { debugger_restart_checkpoint = Checkpoint(timeline, last_continue_tuid, Checkpoint::EXPLICIT, where); } else { debugger_restart_checkpoint = Checkpoint(timeline, last_continue_tuid, Checkpoint::NOT_EXPLICIT, where); } } void GdbServer::restart_session(const GdbRequest& req) { DEBUG_ASSERT(req.type == DREQ_RESTART); DEBUG_ASSERT(dbg); in_debuggee_end_state = false; timeline.remove_breakpoints_and_watchpoints(); Checkpoint checkpoint_to_restore; if (req.restart().type == RESTART_FROM_CHECKPOINT) { auto it = checkpoints.find(req.restart().param); if (it == checkpoints.end()) { cout << "Checkpoint " << req.restart().param_str << " not found.\n"; cout << "Valid checkpoints:"; for (auto& c : checkpoints) { cout << " " << c.first; } cout << "\n"; dbg->notify_restart_failed(); return; } checkpoint_to_restore = it->second; } else if (req.restart().type == RESTART_FROM_PREVIOUS) { checkpoint_to_restore = debugger_restart_checkpoint; } else if (req.restart().type == RESTART_FROM_TICKS) { Ticks target = req.restart().param; ReplaySession &session = timeline.current_session(); Task* task = session.current_task(); FrameTime current_time = session.current_frame_time(); TraceReader tmp_reader(session.trace_reader()); FrameTime last_time = current_time; if (session.ticks_at_start_of_current_event() > target) { tmp_reader.rewind(); FrameTime task_time; // EXEC and CLONE reset the ticks counter. Find the first event // where the tuid matches our current task. // We'll always hit at least one CLONE/EXEC event for a task // (we can't debug the time before the initial exec) // but set this to 0 anyway to silence compiler warnings. FrameTime ticks_start_time = 0; while (true) { TraceTaskEvent r = tmp_reader.read_task_event(&task_time); if (task_time >= current_time) { break; } if (r.type() == TraceTaskEvent::CLONE || r.type() == TraceTaskEvent::EXEC) { if (r.tid() == task->tuid().tid()) { ticks_start_time = task_time; } } } // Forward the frame reader to the current event last_time = ticks_start_time + 1; while (true) { TraceFrame frame = tmp_reader.read_frame(); if (frame.time() >= ticks_start_time) { break; } } } while (true) { if (tmp_reader.at_end()) { cout << "No event found matching specified ticks target.\n"; dbg->notify_restart_failed(); return; } TraceFrame frame = tmp_reader.read_frame(); if (frame.tid() == task->tuid().tid() && frame.ticks() >= target) { break; } last_time = frame.time() + 1; } timeline.seek_to_ticks(last_time, target); } interrupt_pending = true; if (checkpoint_to_restore.mark) { timeline.seek_to_mark(checkpoint_to_restore.mark); last_query_tuid = last_continue_tuid = checkpoint_to_restore.last_continue_tuid; if (debugger_restart_checkpoint.is_explicit == Checkpoint::EXPLICIT) { timeline.remove_explicit_checkpoint(debugger_restart_checkpoint.mark); } debugger_restart_checkpoint = checkpoint_to_restore; if (timeline.can_add_checkpoint()) { timeline.add_explicit_checkpoint(); } return; } stop_replaying_to_target = false; if (req.restart().type == RESTART_FROM_EVENT) { // Note that we don't reset the target pid; we intentionally keep targeting // the same process no matter what is running when we hit the event. target.event = req.restart().param; target.event = min(final_event - 1, target.event); timeline.seek_to_before_event(target.event); ReplayResult result; do { result = timeline.replay_step_forward(RUN_CONTINUE); // We should never reach the end of the trace without hitting the stop // condition below. DEBUG_ASSERT(result.status != REPLAY_EXITED); if (is_last_thread_exit(result.break_status) && result.break_status.task_context.thread_group->tgid == target.pid) { // Debuggee task is about to exit. Stop here. in_debuggee_end_state = true; break; } } while (!at_target(result)); } activate_debugger(); } static uint32_t get_cpu_features(SupportedArch arch) { uint32_t cpu_features; switch (arch) { case x86: case x86_64: { cpu_features = arch == x86_64 ? GdbConnection::CPU_X86_64 : 0; unsigned int AVX_cpuid_flags = AVX_FEATURE_FLAG | OSXSAVE_FEATURE_FLAG; auto cpuid_data = cpuid(CPUID_GETEXTENDEDFEATURES, 0); if ((cpuid_data.ecx & PKU_FEATURE_FLAG) == PKU_FEATURE_FLAG) { // PKU (Skylake) implies AVX (Sandy Bridge). cpu_features |= GdbConnection::CPU_AVX | GdbConnection::CPU_PKU; break; } cpuid_data = cpuid(CPUID_GETFEATURES, 0); // We're assuming here that AVX support on the system making the recording // is the same as the AVX support during replay. But if that's not true, // rr is totally broken anyway. if ((cpuid_data.ecx & AVX_cpuid_flags) == AVX_cpuid_flags) { cpu_features |= GdbConnection::CPU_AVX; } break; } case aarch64: cpu_features = GdbConnection::CPU_AARCH64; break; default: FATAL() << "Unknown architecture"; return 0; } return cpu_features; } struct DebuggerParams { char exe_image[PATH_MAX]; char host[16]; // INET_ADDRSTRLEN, omitted for header churn short port; }; static void push_default_gdb_options(vector& vec, bool serve_files) { // The gdb protocol uses the "vRun" packet to reload // remote targets. The packet is specified to be like // "vCont", in which gdb waits infinitely long for a // stop reply packet. But in practice, gdb client // expects the vRun to complete within the remote-reply // timeout, after which it issues vCont. The timeout // causes gdb<-->rr communication to go haywire. // // rr can take a very long time indeed to send the // stop-reply to gdb after restarting replay; the time // to reach a specified execution target is // theoretically unbounded. Timing out on vRun is // technically a gdb bug, but because the rr replay and // the gdb reload models don't quite match up, we'll // work around it on the rr side by disabling the // remote-reply timeout. vec.push_back("-l"); vec.push_back("10000"); if (!serve_files) { // For now, avoid requesting binary files through vFile. That is slow and // hard to make work correctly, because gdb requests files based on the // names it sees in memory and in ELF, and those names may be symlinks to // the filenames in the trace, so it's hard to match those names to files in // the trace. vec.push_back("-ex"); vec.push_back("set sysroot /"); } } static void push_target_remote_cmd(vector& vec, const string& host, unsigned short port) { vec.push_back("-ex"); stringstream ss; // If we omit the address, then gdb can try to resolve "localhost" which // in some broken environments may not actually resolve to the local host ss << "target extended-remote " << host << ":" << port; vec.push_back(ss.str()); } /** * Wait for exactly one gdb host to connect to this remote target on * the specified IP address |host|, port |port|. If |probe| is nonzero, * a unique port based on |start_port| will be searched for. Otherwise, * if |port| is already bound, this function will fail. * * Pass the |tgid| of the task on which this debug-connection request * is being made. The remaining debugging session will be limited to * traffic regarding |tgid|, but clients don't need to and shouldn't * need to assume that. * * If we're opening this connection on behalf of a known client, pass * an fd in |client_params_fd|; we'll write the allocated port and |exe_image| * through the fd before waiting for a connection. |exe_image| is the * process that will be debugged by client, or null ptr if there isn't * a client. * * This function is infallible: either it will return a valid * debugging context, or it won't return. */ static unique_ptr await_connection( Task* t, ScopedFd& listen_fd, const GdbConnection::Features& features) { auto dbg = unique_ptr(new GdbConnection(t->tgid(), features)); dbg->set_cpu_features(get_cpu_features(t->arch())); dbg->await_debugger(listen_fd); return dbg; } static void print_debugger_launch_command(Task* t, const string& host, unsigned short port, bool serve_files, const char* debugger_name, FILE* out) { vector options; push_default_gdb_options(options, serve_files); push_target_remote_cmd(options, host, port); fprintf(out, "%s ", debugger_name); for (auto& opt : options) { fprintf(out, "'%s' ", opt.c_str()); } fprintf(out, "%s\n", t->vm()->exe_image().c_str()); } void GdbServer::serve_replay(const ConnectionFlags& flags) { ReplayResult result; do { result = timeline.replay_step_forward(RUN_CONTINUE); if (result.status == REPLAY_EXITED) { LOG(info) << "Debugger was not launched before end of trace"; return; } } while (!at_target(result)); unsigned short port = flags.dbg_port > 0 ? flags.dbg_port : getpid(); // Don't probe if the user specified a port. Explicitly // selecting a port is usually done by scripts, which would // presumably break if a different port were to be selected by // rr (otherwise why would they specify a port in the first // place). So fail with a clearer error message. auto probe = flags.dbg_port > 0 ? DONT_PROBE : PROBE_PORT; Task* t = timeline.current_session().current_task(); ScopedFd listen_fd = open_socket(flags.dbg_host.c_str(), &port, probe); if (flags.debugger_params_write_pipe) { DebuggerParams params; memset(¶ms, 0, sizeof(params)); strncpy(params.exe_image, t->vm()->exe_image().c_str(), sizeof(params.exe_image) - 1); strncpy(params.host, flags.dbg_host.c_str(), sizeof(params.host) - 1); params.port = port; ssize_t nwritten = write(*flags.debugger_params_write_pipe, ¶ms, sizeof(params)); DEBUG_ASSERT(nwritten == sizeof(params)); } else { fputs("Launch gdb with\n ", stderr); print_debugger_launch_command(t, flags.dbg_host, port, flags.serve_files, flags.debugger_name.c_str(), stderr); } if (flags.debugger_params_write_pipe) { flags.debugger_params_write_pipe->close(); } debuggee_tguid = t->thread_group()->tguid(); FrameTime first_run_event = std::max(t->vm()->first_run_event(), t->thread_group()->first_run_event()); if (first_run_event) { timeline.set_reverse_execution_barrier_event(first_run_event); } do { LOG(debug) << "initializing debugger connection"; dbg = await_connection(t, listen_fd, GdbConnection::Features()); activate_debugger(); GdbRequest last_resume_request; while (debug_one_step(last_resume_request) == CONTINUE_DEBUGGING) { } timeline.remove_breakpoints_and_watchpoints(); } while (flags.keep_listening); LOG(debug) << "debugger server exiting ..."; } static string create_gdb_command_file(const string& macros) { TempFile file = create_temporary_file("rr-gdb-commands-XXXXXX"); // This fd is just leaked. That's fine since we only call this once // per rr invocation at the moment. int fd = file.fd.extract(); unlink(file.name.c_str()); ssize_t len = macros.size(); int written = write(fd, macros.c_str(), len); if (written != len) { FATAL() << "Failed to write gdb command file"; } stringstream procfile; procfile << "/proc/" << getpid() << "/fd/" << fd; return procfile.str(); } static string to_string(const vector& args) { stringstream ss; for (auto& a : args) { ss << "'" << a << "' "; } return ss.str(); } static bool needs_target(const string& option) { return !strncmp(option.c_str(), "continue", option.size()); } /** * Exec gdb using the params that were written to * |params_pipe_fd|. Optionally, pre-define in the gdb client the set * of macros defined in |macros| if nonnull. */ void GdbServer::launch_gdb(ScopedFd& params_pipe_fd, const string& gdb_binary_file_path, const vector& gdb_options, bool serve_files) { auto macros = gdb_rr_macros(); string gdb_command_file = create_gdb_command_file(macros); DebuggerParams params; ssize_t nread; while (true) { nread = read(params_pipe_fd, ¶ms, sizeof(params)); if (nread == 0) { // pipe was closed. Probably rr failed/died. return; } if (nread != -1 || errno != EINTR) { break; } } DEBUG_ASSERT(nread == sizeof(params)); vector args; args.push_back(gdb_binary_file_path); push_default_gdb_options(args, serve_files); args.push_back("-x"); args.push_back(gdb_command_file); bool did_set_remote = false; for (size_t i = 0; i < gdb_options.size(); ++i) { if (!did_set_remote && gdb_options[i] == "-ex" && i + 1 < gdb_options.size() && needs_target(gdb_options[i + 1])) { push_target_remote_cmd(args, string(params.host), params.port); did_set_remote = true; } args.push_back(gdb_options[i]); } if (!did_set_remote) { push_target_remote_cmd(args, string(params.host), params.port); } args.push_back(params.exe_image); vector env = current_env(); env.push_back("GDB_UNDER_RR=1"); LOG(debug) << "launching " << to_string(args); StringVectorToCharArray c_args(args); StringVectorToCharArray c_env(env); execvpe(gdb_binary_file_path.c_str(), c_args.get(), c_env.get()); CLEAN_FATAL() << "Failed to exec " << gdb_binary_file_path << "."; } void GdbServer::emergency_debug(Task* t) { // See the comment in |guard_overshoot()| explaining why we do // this. Unlike in that context though, we don't know if |t| // overshot an internal breakpoint. If it did, cover that // breakpoint up. if (t->vm()) { t->vm()->remove_all_breakpoints(); } // Don't launch a debugger on fatal errors; the user is most // likely already in a debugger, and wouldn't be able to // control another session. Instead, launch a new GdbServer and wait for // the user to connect from another window. GdbConnection::Features features; // Don't advertise reverse_execution to gdb because a) it won't work and // b) some gdb versions will fail if the user doesn't turn off async // mode (and we don't want to require users to do that) features.reverse_execution = false; unsigned short port = t->tid; ScopedFd listen_fd = open_socket(localhost_addr.c_str(), &port, PROBE_PORT); dump_rr_stack(); char* test_monitor_pid = getenv("RUNNING_UNDER_TEST_MONITOR"); if (test_monitor_pid) { pid_t pid = atoi(test_monitor_pid); // Tell test-monitor to wake up and take a snapshot. It will also // connect the emergency debugger so let that happen. FILE* gdb_cmd = fopen("gdb_cmd", "w"); if (gdb_cmd) { print_debugger_launch_command(t, localhost_addr, port, false, "gdb", gdb_cmd); fclose(gdb_cmd); } kill(pid, SIGURG); } else { fputs("Launch gdb with\n ", stderr); print_debugger_launch_command(t, localhost_addr, port, false, "gdb", stderr); } unique_ptr dbg = await_connection(t, listen_fd, features); GdbServer(dbg, t).process_debugger_requests(); } string GdbServer::init_script() { return gdb_rr_macros(); } static ScopedFd generate_fake_proc_maps(Task* t) { TempFile file = create_temporary_file("rr-fake-proc-maps-XXXXXX"); unlink(file.name.c_str()); int fd = dup(file.fd); if (fd < 0) { FATAL() << "Cannot dup"; } FILE* f = fdopen(fd, "w"); int addr_min_width = word_size(t->arch()) == 8 ? 10 : 8; for (AddressSpace::Maps::iterator it = t->vm()->maps().begin(); it != t->vm()->maps().end(); ++it) { // If this is the mapping just before the rr page and it's still librrpage, // merge this mapping with the subsequent one. We'd like gdb to treat // librrpage as the vdso, but it'll only do so if the entire vdso is one // mapping. auto m = *it; uintptr_t map_end = (long long)m.recorded_map.end().as_int(); if (m.recorded_map.end() == t->vm()->rr_page_start()) { auto it2 = it; if (++it2 != t->vm()->maps().end()) { auto m2 = *it2; if (m2.flags & AddressSpace::Mapping::IS_RR_PAGE) { // Extend this mapping map_end += PRELOAD_LIBRARY_PAGE_SIZE; // Skip the rr page ++it; } } } int len = fprintf(f, "%0*llx-%0*llx %s%s%s%s %08llx %02x:%02x %lld", addr_min_width, (long long)m.recorded_map.start().as_int(), addr_min_width, (long long)map_end, (m.recorded_map.prot() & PROT_READ) ? "r" : "-", (m.recorded_map.prot() & PROT_WRITE) ? "w" : "-", (m.recorded_map.prot() & PROT_EXEC) ? "x" : "-", (m.recorded_map.flags() & MAP_SHARED) ? "s" : "p", (long long)m.recorded_map.file_offset_bytes(), major(m.recorded_map.device()), minor(m.recorded_map.device()), (long long)m.recorded_map.inode()); while (len < 72) { fputc(' ', f); ++len; } fputc(' ', f); string name; const string& fsname = m.recorded_map.fsname(); for (size_t i = 0; i < fsname.size(); ++i) { if (fsname[i] == '\n') { name.append("\\012"); } else { name.push_back(fsname[i]); } } fputs(name.c_str(), f); fputc('\n', f); } if (ferror(f) || fclose(f)) { FATAL() << "Can't write"; } return std::move(file.fd); } static bool is_ld_mapping(string map_name) { char ld_start[] = "ld-"; size_t matchpos = map_name.find_last_of('/'); string fname = map_name.substr(matchpos == string::npos ? 0 : matchpos + 1); return memcmp(fname.c_str(), ld_start, sizeof(ld_start)-1) == 0; } static bool is_likely_interp(string fsname) { #ifdef __aarch64__ return fsname == "/lib/ld-linux-aarch64.so.1"; #else return fsname == "/lib64/ld-linux-x86-64.so.2" || fsname == "/lib/ld-linux.so.2"; #endif } static remote_ptr base_addr_from_rendezvous(Task* t, string fname) { remote_ptr interpreter_base = t->vm()->saved_interpreter_base(); if (!interpreter_base || !t->vm()->has_mapping(interpreter_base)) { return nullptr; } string ld_path = t->vm()->saved_ld_path(); if (ld_path.length() == 0) { FATAL() << "Failed to retrieve interpreter name with interpreter_base=" << interpreter_base; } ScopedFd ld(ld_path.c_str(), O_RDONLY); if (ld < 0) { FATAL() << "Open failed: " << ld_path; } ElfFileReader reader(ld); auto syms = reader.read_symbols(".dynsym", ".dynstr"); static const char r_debug[] = "_r_debug"; bool found = false; uintptr_t r_debug_offset = 0; for (size_t i = 0; i < syms.size(); ++i) { if (!syms.is_name(i, r_debug)) { continue; } r_debug_offset = syms.addr(i); found = true; } if (!found) { return nullptr; } bool ok = true; remote_ptr r_debug_remote = interpreter_base.as_int()+r_debug_offset; remote_ptr link_map = t->read_mem(REMOTE_PTR_FIELD(r_debug_remote, r_map), &ok); while (ok && link_map != nullptr) { if (fname == t->read_c_str(t->read_mem(REMOTE_PTR_FIELD(link_map, l_name), &ok), &ok)) { remote_ptr result = t->read_mem(REMOTE_PTR_FIELD(link_map, l_addr), &ok); return ok ? result : nullptr; } link_map = t->read_mem(REMOTE_PTR_FIELD(link_map, l_next), &ok); } return nullptr; } int GdbServer::open_file(Session& session, Task* continue_task, const std::string& file_name) { // XXX should we require file_scope_pid == 0 here? ScopedFd contents; if (file_name.empty()) { return -1; } LOG(debug) << "Trying to open " << file_name; if (file_name.substr(0, 6) == "/proc/") { char* tid_end; long tid = strtol(file_name.c_str() + 6, &tid_end, 10); if (*tid_end != '/') { return -1; } if (!strncmp(tid_end, "/task/", 6)) { tid = strtol(tid_end + 6, &tid_end, 10); if (*tid_end != '/') { return -1; } } if (tid != (pid_t)tid) { return -1; } Task* t = session.find_task(tid); if (!t) { return -1; } if (!strcmp(tid_end, "/maps")) { contents = generate_fake_proc_maps(t); } else { return -1; } } else if (file_name == continue_task->vm()->interp_name()) { remote_ptr interp_base = continue_task->vm()->interp_base(); auto m = continue_task->vm()->mapping_of(interp_base); LOG(debug) << "Found dynamic linker as memory mapping " << m.recorded_map; int ret_fd = 0; while (files.find(ret_fd) != files.end() || memory_files.find(ret_fd) != memory_files.end()) { ++ret_fd; } memory_files.insert(make_pair(ret_fd, FileId(m.recorded_map))); return ret_fd; } else { // See if we can find the file by serving one of our mappings std::string normalized_file_name = file_name; normalize_file_name(normalized_file_name); for (const auto& m : continue_task->vm()->maps()) { // The dynamic linker is generally a symlink that is resolved by the // kernel when the process image gets loaded. We add a special case to // substitute the correct mapping, so gdb can find the dynamic linker // rendezvous structures. // Use our old hack for ld from before we read PT_INTERP for backwards // compat with older traces. if (m.recorded_map.fsname().compare(0, normalized_file_name.length(), normalized_file_name) == 0 || m.map.fsname().compare(0, normalized_file_name.length(), normalized_file_name) == 0 || (is_ld_mapping(m.recorded_map.fsname()) && is_likely_interp(normalized_file_name))) { int ret_fd = 0; while (files.find(ret_fd) != files.end() || memory_files.find(ret_fd) != memory_files.end()) { ++ret_fd; } LOG(debug) << "Found as memory mapping " << m.recorded_map; memory_files.insert(make_pair(ret_fd, FileId(m.recorded_map))); return ret_fd; } } // Last ditch attempt: Dig through the tracee's libc rendezvous struct to // see if we can find this file by a different name (e.g. if it was opened // via symlink) remote_ptr base = base_addr_from_rendezvous(continue_task, file_name); if (base != nullptr && continue_task->vm()->has_mapping(base)) { int ret_fd = 0; while (files.find(ret_fd) != files.end() || memory_files.find(ret_fd) != memory_files.end()) { ++ret_fd; } memory_files.insert(make_pair(ret_fd, FileId(continue_task->vm()->mapping_of(base).recorded_map))); return ret_fd; } LOG(debug) << "... not found"; return -1; } int ret_fd = 0; while (files.find(ret_fd) != files.end()) { ++ret_fd; } files.insert(make_pair(ret_fd, std::move(contents))); return ret_fd; } } // namespace rr rr-5.7.0/src/GdbServer.h000066400000000000000000000251311450675474200150110ustar00rootroot00000000000000/* -*- Mode: C++; tab-width: 8; c-basic-offset: 2; indent-tabs-mode: nil; -*- */ #ifndef RR_GDB_SERVER_H_ #define RR_GDB_SERVER_H_ #include #include #include #include "DiversionSession.h" #include "GdbConnection.h" #include "ReplaySession.h" #include "ReplayTimeline.h" #include "ScopedFd.h" #ifdef PROC_SERVICE_H #include "ThreadDb.h" #endif #include "TraceFrame.h" namespace rr { static std::string localhost_addr = "127.0.0.1"; class GdbServer { // Not ideal but we can't inherit friend from GdbCommand friend std::string invoke_checkpoint(GdbServer&, Task*, const std::vector&); friend std::string invoke_delete_checkpoint(GdbServer&, Task*, const std::vector&); friend std::string invoke_info_checkpoints(GdbServer&, Task*, const std::vector&); public: struct Target { Target() : pid(0), require_exec(false), event(0) {} // Target process to debug, or 0 to just debug the first process pid_t pid; // If true, wait for the target process to exec() before attaching debugger bool require_exec; // Wait until at least 'event' has elapsed before attaching FrameTime event; }; struct ConnectionFlags { // -1 to let GdbServer choose the port, a positive integer to select a // specific port to listen on. If keep_listening is on, wait for another // debugger connection after the first one is terminated. int dbg_port; std::string dbg_host; bool keep_listening; bool serve_files; // If non-null, then when the gdbserver is set up, we write its connection // parameters through this pipe. GdbServer::launch_gdb is passed the // other end of this pipe to exec gdb with the parameters. ScopedFd* debugger_params_write_pipe; // Name of the debugger to suggest. Only used if debugger_params_write_pipe // is null. std::string debugger_name; ConnectionFlags() : dbg_port(-1), dbg_host(localhost_addr), keep_listening(false), serve_files(false), debugger_params_write_pipe(nullptr) {} }; /** * Create a gdbserver serving the replay of 'session'. */ GdbServer(std::shared_ptr session, const Target& target) : target(target), final_event(UINT32_MAX), in_debuggee_end_state(false), stop_replaying_to_target(false), interrupt_pending(false), exit_sigkill_pending(false), timeline(std::move(session)), emergency_debug_session(nullptr) { memset(&stop_siginfo, 0, sizeof(stop_siginfo)); } /** * Actually run the server. Returns only when the debugger disconnects. */ void serve_replay(const ConnectionFlags& flags); /** * exec()'s gdb using parameters read from params_pipe_fd (and sent through * the pipe passed to serve_replay_with_debugger). */ static void launch_gdb(ScopedFd& params_pipe_fd, const std::string& gdb_binary_file_path, const std::vector& gdb_options, bool serve_files); /** * Start a debugging connection for |t| and return when there are no * more requests to process (usually because the debugger detaches). * * This helper doesn't attempt to determine whether blocking rr on a * debugger connection might be a bad idea. It will always open the debug * socket and block awaiting a connection. */ static void emergency_debug(Task* t); /** * A string containing the default gdbinit script that we load into gdb. */ static std::string init_script(); /** * Called from a signal handler (or other thread) during serve_replay, * this will cause the replay-to-target phase to be interrupted and * debugging started wherever the replay happens to be. */ void interrupt_replay_to_target() { stop_replaying_to_target = true; } /** * Return the register |which|, which may not have a defined value. */ static GdbRegisterValue get_reg(const Registers& regs, const ExtraRegisters& extra_regs, GdbRegister which); ReplayTimeline& get_timeline() { return timeline; } private: GdbServer(std::unique_ptr& dbg, Task* t); Session& current_session() { return timeline.is_running() ? timeline.current_session() : *emergency_debug_session; } void dispatch_regs_request(const Registers& regs, const ExtraRegisters& extra_regs); enum ReportState { REPORT_NORMAL, REPORT_THREADS_DEAD }; void maybe_intercept_mem_request(Task* target, const GdbRequest& req, std::vector* result); /** * Process the single debugger request |req| inside the session |session|. * * Callers should implement any special semantics they want for * particular debugger requests before calling this helper, to do * generic processing. */ void dispatch_debugger_request(Session& session, const GdbRequest& req, ReportState state); bool at_target(ReplayResult& result); void activate_debugger(); void restart_session(const GdbRequest& req); GdbRequest process_debugger_requests(ReportState state = REPORT_NORMAL); enum ContinueOrStop { CONTINUE_DEBUGGING, STOP_DEBUGGING }; bool detach_or_restart(const GdbRequest& req, ContinueOrStop* s); ContinueOrStop handle_exited_state(GdbRequest& last_resume_request); ContinueOrStop debug_one_step(GdbRequest& last_resume_request); /** * If 'req' is a reverse-singlestep, try to obtain the resulting state * directly from ReplayTimeline's mark database. If that succeeds, * report the singlestep break status to gdb and process any get-registers * requests. Repeat until we get a request that isn't reverse-singlestep * or get-registers, returning that request in 'req'. * During reverse-next commands, gdb tends to issue a series of * reverse-singlestep/get-registers pairs, and this makes those much * more efficient by avoiding having to actually reverse-singlestep the * session. */ void try_lazy_reverse_singlesteps(GdbRequest& req); /** * Process debugger requests made in |diversion_session| until action needs * to be taken by the caller (a resume-execution request is received). * The received request is returned through |req|. * Returns true if diversion should continue, false if it should end. */ bool diverter_process_debugger_requests(DiversionSession& diversion_session, uint32_t& diversion_refcount, GdbRequest* req); /** * Create a new diversion session using |replay| session as the * template. The |replay| session isn't mutated. * * Execution begins in the new diversion session under the control of * |dbg| starting with initial thread target |task|. The diversion * session ends at the request of |dbg|, and |divert| returns the first * request made that wasn't handled by the diversion session. That * is, the first request that should be handled by |replay| upon * resuming execution in that session. */ GdbRequest divert(ReplaySession& replay); /** * If |break_status| indicates a stop that we should report to gdb, * report it. |req| is the resume request that generated the stop. */ void maybe_notify_stop(const GdbRequest& req, const BreakStatus& break_status); /** * Return the checkpoint stored as |checkpoint_id| or nullptr if there * isn't one. */ ReplaySession::shr_ptr get_checkpoint(int checkpoint_id); /** * Delete the checkpoint stored as |checkpoint_id| if it exists, or do * nothing if it doesn't exist. */ void delete_checkpoint(int checkpoint_id); /** * Handle GDB file open requests. If we can serve this read request, add * an entry to `files` with the file contents and return our internal * file descriptor. */ int open_file(Session& session, Task *continue_task, const std::string& file_name); Target target; // dbg is initially null. Once the debugger connection is established, it // never changes. std::unique_ptr dbg; // When dbg is non-null, the ThreadGroupUid of the task being debugged. Never // changes once the connection is established --- we don't currently // support switching gdb between debuggee processes. ThreadGroupUid debuggee_tguid; // ThreadDb for debuggee ThreadGroup #ifdef PROC_SERVICE_H std::unique_ptr thread_db; #endif // The TaskUid of the last continued task. TaskUid last_continue_tuid; // The TaskUid of the last queried task. TaskUid last_query_tuid; FrameTime final_event; // siginfo for last notified stop. siginfo_t stop_siginfo; bool in_debuggee_end_state; // True when the user has interrupted replaying to a target event. volatile bool stop_replaying_to_target; // True when a DREQ_INTERRUPT has been received but not handled, or when // we've restarted and want the first continue to be interrupted immediately. bool interrupt_pending; // True when a user has run to exit before attaching the debugger. bool exit_sigkill_pending; ReplayTimeline timeline; Session* emergency_debug_session; struct Checkpoint { enum Explicit { EXPLICIT, NOT_EXPLICIT }; Checkpoint(ReplayTimeline& timeline, TaskUid last_continue_tuid, Explicit e, const std::string& where) : last_continue_tuid(last_continue_tuid), is_explicit(e), where(where) { if (e == EXPLICIT) { mark = timeline.add_explicit_checkpoint(); } else { mark = timeline.mark(); } } Checkpoint() : is_explicit(NOT_EXPLICIT) {} ReplayTimeline::Mark mark; TaskUid last_continue_tuid; Explicit is_explicit; std::string where; }; // |debugger_restart_mark| is the point where we will restart from with // a no-op debugger "run" command. Checkpoint debugger_restart_checkpoint; // gdb checkpoints, indexed by ID std::map checkpoints; // Set of symbols to look up, for qSymbol. std::set symbols; // Iterator into |symbols|. std::set::iterator symbols_iter; // Contents of opened files. Maps our internal file descriptor to a real // file descriptor. Exposing our real file descriptor values is probably a // bad idea. std::map files; std::map memory_files; // The pid for gdb's last vFile:setfs pid_t file_scope_pid; }; } // namespace rr #endif /* RR_GDB_SERVER_H_ */ rr-5.7.0/src/HasTaskSet.cc000066400000000000000000000014521450675474200152760ustar00rootroot00000000000000/* -*- Mode: C++; tab-width: 8; c-basic-offset: 2; indent-tabs-mode: nil; -*- */ #include "HasTaskSet.h" #include "Task.h" #include "log.h" namespace rr { void HasTaskSet::insert_task(Task* t) { LOG(debug) << "adding " << t->tid << " to task set " << this; tasks.insert(t); } void HasTaskSet::erase_task(Task* t) { LOG(debug) << "removing " << t->tid << " from task set " << this; tasks.erase(t); } Task* HasTaskSet::first_running_task() const { for (Task* t : task_set()) { if (!t->already_exited() && !t->seen_ptrace_exit_event()) { return t; } } return nullptr; } Task* HasTaskSet::find_other_thread_group(Task* t) const { for (Task* tt : task_set()) { if (tt->thread_group() != t->thread_group()) { return tt; } } return nullptr; } } // namespace rr rr-5.7.0/src/HasTaskSet.h000066400000000000000000000012331450675474200151350ustar00rootroot00000000000000/* -*- Mode: C++; tab-width: 8; c-basic-offset: 2; indent-tabs-mode: nil; -*- */ #ifndef RR_HASTASKSET_H_ #define RR_HASTASKSET_H_ #include namespace rr { class Task; /** * Base class for classes that manage a set of Tasks. */ class HasTaskSet { public: typedef std::set TaskSet; const TaskSet& task_set() const { return tasks; } virtual void insert_task(Task* t); virtual void erase_task(Task* t); bool has_task(Task* t) const { return tasks.find(t) != tasks.end(); } Task* find_other_thread_group(Task* t) const; Task* first_running_task() const; protected: TaskSet tasks; }; } // namespace rr #endif /* RR_HASTASKSET_H_ */ rr-5.7.0/src/HelpCommand.cc000066400000000000000000000015631450675474200154560ustar00rootroot00000000000000/* -*- Mode: C++; tab-width: 8; c-basic-offset: 2; indent-tabs-mode: nil; -*- */ #include "Command.h" #include "main.h" using namespace std; namespace rr { class HelpCommand : public Command { public: virtual int run(std::vector& args) override; protected: HelpCommand(const char* name, const char* help) : Command(name, help) {} static HelpCommand help1; static HelpCommand help2; static HelpCommand help3; }; HelpCommand HelpCommand::help1("help", " rr help [command]\n"); HelpCommand HelpCommand::help2("-h", nullptr); HelpCommand HelpCommand::help3("--help", nullptr); int HelpCommand::run(std::vector& args) { if (args.size() == 0) { print_usage(stdout); } Command* command = Command::command_for_name(args[0]); if (!command) { print_usage(stderr); } command->print_help(stdout); return 0; } } // namespace rr rr-5.7.0/src/LsCommand.cc000066400000000000000000000160001450675474200151340ustar00rootroot00000000000000/* -*- Mode: C++; tab-width: 8; c-basic-offset: 2; indent-tabs-mode: nil; -*- */ #include #include #include #include #include #include #include #include #include "Command.h" #include "main.h" #include "TraceStream.h" #include "util.h" using namespace std; namespace rr { class LsCommand : public Command { public: virtual int run(vector& args); protected: LsCommand(const char* name, const char* help) : Command(name, help) {} static LsCommand singleton; }; LsCommand LsCommand::singleton( "ls", " rr ls [OPTION]...\n" " -l, --long-listing use a long listing format\n" " (trace name | start time | size | command line)\n" " -t, --sort-by-age, sort from newest to oldest\n" " -r, --reverse, the sort order\n"); struct LsFlags { bool reverse; bool full_listing; bool sort_by_time; LsFlags() : reverse(false), full_listing(false), sort_by_time(false) {} }; static bool parse_ls_arg(vector& args, LsFlags& flags) { if (parse_global_option(args)) { return true; } static const OptionSpec options[] = { { 'r', "reverse", NO_PARAMETER }, { 'l', "long-listing", NO_PARAMETER }, { 't', "sort-by-age", NO_PARAMETER } }; ParsedOption opt; if (!Command::parse_option(args, options, &opt)) { return false; } switch (opt.short_name) { case 'r': flags.reverse = true; break; case 'l': flags.full_listing = true; break; case 't': flags.sort_by_time = true; break; default: assert(0 && "Unknown option"); } return true; } struct TraceInfo { string name; struct timespec ctime; string exit; TraceInfo(string in_name) : name(in_name) {} }; static bool compare_by_name(const TraceInfo& at, const TraceInfo& bt) { auto a = at.name; auto b = bt.name; return lexicographical_compare(begin(a), end(a), begin(b), end(b)); } static bool get_folder_size(string dir_name, string& size_str) { DIR* dir = opendir(dir_name.c_str()); if (!dir) { cerr << "Cannot open " << dir_name << endl; return false; } size_t bytes = 0; while (struct dirent* ent = readdir(dir)) { string path = dir_name + "/" + ent->d_name; struct stat st; if (stat(path.c_str(), &st) == -1) { cerr << "stat " << path << " failed\n"; return false; } bytes += st.st_size; } closedir(dir); static const char suffixes[] = " KMGT"; double size = bytes; size_t suffix_idx = 0; while (size >= 1000.0) { size /= 1024.0; suffix_idx++; } char suffix = suffixes[suffix_idx]; ostringstream cvt; if (suffix == ' ') { cvt << bytes; } else if (size >= 10) { cvt << int(size) << suffix; } else { cvt << fixed << setprecision(1) << size << suffix; } size_str = cvt.str(); return true; } static string get_exec_path(TraceReader& reader) { while (true) { TraceTaskEvent r = reader.read_task_event(); if (r.type() == TraceTaskEvent::NONE) { break; } if (r.type() == TraceTaskEvent::EXEC) { return r.cmd_line()[0]; } } return string(); } string find_exit_code(pid_t pid, const vector& events, size_t current_event, const map current_tid_to_pid); static int ls(const string& traces_dir, const LsFlags& flags, FILE* out) { DIR* dir = opendir(traces_dir.c_str()); if (!dir) { fprintf(stderr, "Cannot open %s\n", traces_dir.c_str()); return 1; } const string cpu_lock = real_path(get_cpu_lock_file()); const string full_traces_dir = real_path(traces_dir) + "/"; vector traces; while (struct dirent* trace_dir = readdir(dir)) { if (full_traces_dir + trace_dir->d_name == cpu_lock) { continue; } if (!is_valid_trace_name(trace_dir->d_name)) { continue; } traces.emplace_back(TraceInfo(string(trace_dir->d_name))); if (flags.sort_by_time || flags.full_listing) { struct stat st; stat((traces_dir + "/" + trace_dir->d_name + "/data").c_str(), &st); traces.back().ctime = st.st_ctim; } if (flags.full_listing) { TraceReader trace(traces_dir + "/" + trace_dir->d_name); vector events; while (true) { TraceTaskEvent r = trace.read_task_event(); if (r.type() == TraceTaskEvent::NONE) { break; } events.push_back(r); } if (events.empty() || events[0].type() != TraceTaskEvent::EXEC) { traces.back().exit = "????"; continue; } map tid_to_pid; pid_t initial_tid = events[0].tid(); tid_to_pid[initial_tid] = initial_tid; traces.back().exit = find_exit_code(initial_tid, events, 0, tid_to_pid); } } closedir(dir); if (flags.sort_by_time) { auto compare_by_time = [&](const TraceInfo& at, const TraceInfo& bt) -> bool { if (at.ctime.tv_sec == bt.ctime.tv_sec) { return at.ctime.tv_nsec < bt.ctime.tv_nsec; } return at.ctime.tv_sec < bt.ctime.tv_sec; }; sort(traces.begin(), traces.end(), compare_by_time); } else { sort(traces.begin(), traces.end(), compare_by_name); } if (flags.reverse) { reverse(begin(traces), end(traces)); }; if (!flags.full_listing) { for (TraceInfo& t : traces) { cout << t.name << "\n"; } return 0; } int max_name_size = accumulate(traces.begin(), traces.end(), 0, [](int m, TraceInfo& t) { return max(m, static_cast(t.name.length())); }); fprintf(out, "%-*s %-19s %5s %6s %s\n", max_name_size, "NAME", "WHEN", "SIZE", "EXIT", "CMD"); for (TraceInfo& t : traces) { // Record date & runtime estimates string data_file = traces_dir + "/" + t.name + "/data"; char outstr[200]; struct tm ctime_tm; if (localtime_r(&t.ctime.tv_sec, &ctime_tm)) { strftime(outstr, sizeof(outstr), "%F %T", &ctime_tm); } else { strcpy(outstr, ""); } string folder_size = "????"; string exe = "(incomplete)"; string version_file = traces_dir + "/" + t.name + "/version"; struct stat st; if (stat(version_file.c_str(), &st) != -1) { TraceReader reader(traces_dir + "/" + t.name); get_folder_size(reader.dir(), folder_size); exe = get_exec_path(reader); } fprintf(out, "%-*s %s %5s %6s %s\n", max_name_size, t.name.c_str(), outstr, folder_size.c_str(), t.exit.c_str(), exe.c_str()); } return 0; } int LsCommand::run(vector& args) { bool found_dir = false; string trace_dir; LsFlags flags; while (!args.empty()) { if (parse_ls_arg(args, flags)) { continue; } if (!found_dir && parse_optional_trace_dir(args, &trace_dir)) { found_dir = true; continue; } print_help(stderr); return 1; }; if (!found_dir) { trace_dir = trace_save_dir(); } return ls(trace_dir, flags, stdout); }; } // namespace rr rr-5.7.0/src/MagicSaveDataMonitor.cc000066400000000000000000000052641450675474200172720ustar00rootroot00000000000000/* -*- Mode: C++; tab-width: 8; c-basic-offset: 2; indent-tabs-mode: nil; -*- */ #include "MagicSaveDataMonitor.h" #include #include #include "RecordTask.h" #include "ReplayTask.h" #include "Session.h" #include "log.h" #include "util.h" namespace rr { static void dump_path_data(Task* t, FrameTime global_time, const char* tag, char* filename, size_t filename_size, const void* buf, size_t buf_len, remote_ptr addr) { format_dump_filename(t, global_time, tag, filename, filename_size); dump_binary_data(filename, tag, (const uint32_t*)buf, buf_len / 4, addr); } static void notify_save_data_error(ReplayTask* t, remote_ptr addr, const void* rec_buf, size_t rec_buf_len, const void* rep_buf, size_t rep_buf_len) { char rec_dump[PATH_MAX]; char rep_dump[PATH_MAX]; FrameTime global_time = t->current_trace_frame().time(); dump_path_data(t, global_time, "rec_save_data", rec_dump, sizeof(rec_dump), rec_buf, rec_buf_len, addr); dump_path_data(t, global_time, "rep_save_data", rep_dump, sizeof(rep_dump), rep_buf, rep_buf_len, addr); ASSERT(t, (rec_buf_len == rep_buf_len && !memcmp(rec_buf, rep_buf, rec_buf_len))) << "Divergence in contents of 'tracee-save buffer'. Recording executed\n" "\n" " write(" << RR_MAGIC_SAVE_DATA_FD << ", " << addr << ", " << rec_buf_len << ")\n" "\n" "and replay executed\n" "\n" " write(" << RR_MAGIC_SAVE_DATA_FD << ", " << addr << ", " << rep_buf_len << ")\n" "\n" "The contents of the tracee-save buffers have been dumped to disk.\n" "Compare them by using the following command\n" "\n" "$ diff -u " << rec_dump << " " << rep_dump << " >save-data-diverge.diff\n"; } void MagicSaveDataMonitor::did_write(Task* t, const std::vector& ranges, LazyOffset&) { for (auto& r : ranges) { if (t->session().is_recording()) { static_cast(t)->record_remote(r.data.cast(), r.length); } else if (t->session().is_replaying()) { auto rt = static_cast(t); auto bytes = rt->read_mem(r.data.cast(), r.length); auto rec = rt->trace_reader().read_raw_data(); if (rec.data != bytes) { notify_save_data_error(rt, rec.addr, rec.data.data(), rec.data.size(), bytes.data(), bytes.size()); } } } } } // namespace rr rr-5.7.0/src/MagicSaveDataMonitor.h000066400000000000000000000011321450675474200171220ustar00rootroot00000000000000/* -*- Mode: C++; tab-width: 8; c-basic-offset: 2; indent-tabs-mode: nil; -*- */ #ifndef RR_MAGIC_SAVE_DATA_MONITOR_H_ #define RR_MAGIC_SAVE_DATA_MONITOR_H_ #include "FileMonitor.h" namespace rr { /** * A FileMonitor to track writes to RR_MAGIC_SAVE_DATA_FD. */ class MagicSaveDataMonitor : public FileMonitor { public: MagicSaveDataMonitor() {} virtual Type type() override { return MagicSaveData; } virtual void did_write(Task* t, const std::vector& ranges, LazyOffset& offset) override; }; } // namespace rr #endif /* RR_MAGIC_SAVE_DATA_MONITOR_H_ */ rr-5.7.0/src/MemoryRange.h000066400000000000000000000042421450675474200153530ustar00rootroot00000000000000/* -*- Mode: C++; tab-width: 8; c-basic-offset: 2; indent-tabs-mode: nil; -*- */ #ifndef RR_MEMORY_RANGE_H_ #define RR_MEMORY_RANGE_H_ #include "core.h" #include "log.h" #include "remote_ptr.h" namespace rr { /** * Range of memory addresses that can be used as a std::map key. */ class MemoryRange { public: MemoryRange() {} MemoryRange(remote_ptr addr, size_t num_bytes) : start_(addr), end_(addr + num_bytes) { DEBUG_ASSERT(start_ <= end_); } MemoryRange(remote_ptr addr, remote_ptr end) : start_(addr), end_(end) { DEBUG_ASSERT(start_ <= end); } MemoryRange(const MemoryRange&) = default; MemoryRange& operator=(const MemoryRange&) = default; bool operator==(const MemoryRange& o) const { return start_ == o.start_ && end_ == o.end_; } bool operator<(const MemoryRange& o) const { return start_ != o.start_ ? start_ < o.start_ : end_ < o.end_; } /** * Return true iff |o| is an address range fully contained by * this. */ bool contains(const MemoryRange& o) const { return start_ <= o.start_ && o.end_ <= end_; } bool contains(remote_ptr p) const { return start_ <= p && p < end_; } bool intersects(const MemoryRange& other) const { remote_ptr s = std::max(start_, other.start_); remote_ptr e = std::min(end_, other.end_); return s < e; } MemoryRange intersect(const MemoryRange& other) const { remote_ptr s = std::max(start_, other.start_); remote_ptr e = std::min(end_, other.end_); return MemoryRange(s, std::max(s, e)); } remote_ptr start() const { return start_; } remote_ptr end() const { return end_; } size_t size() const { return end_ - start_; } static MemoryRange all() { return MemoryRange(remote_ptr(), remote_ptr(UINTPTR_MAX)); } // XXX DO NOT USE void update_start(remote_ptr s) const { const_cast(this)->start_ = s; } private: remote_ptr start_; remote_ptr end_; }; inline std::ostream& operator<<(std::ostream& o, const MemoryRange& m) { o << m.start() << "-" << m.end(); return o; } } // namespace rr #endif /* RR_MEMORY_RANGE_H_ */ rr-5.7.0/src/MmappedFileMonitor.cc000066400000000000000000000066141450675474200170240ustar00rootroot00000000000000/* -*- Mode: C++; tab-width: 8; c-basic-offset: 2; indent-tabs-mode: nil; -*- */ #include "MmappedFileMonitor.h" #include "RecordSession.h" #include "RecordTask.h" #include "ReplayTask.h" #include "log.h" using namespace std; namespace rr { MmappedFileMonitor::MmappedFileMonitor(Task* t, int fd) { ASSERT(t, !t->session().is_replaying()); dead_ = false; auto stat = t->stat_fd(fd); device_ = stat.st_dev; inode_ = stat.st_ino; } MmappedFileMonitor::MmappedFileMonitor(Task* t, EmuFile::shr_ptr f) { ASSERT(t, t->session().is_replaying()); dead_ = false; device_ = f->device(); inode_ = f->inode(); } void MmappedFileMonitor::did_write(Task* t, const std::vector& ranges, LazyOffset& offset) { // If there are no remaining mappings that we care about, those can't reappear // without going through mmap again, at which point this will be reset to // false. if (dead_) { return; } if (ranges.empty()) { return; } // Dead until proven otherwise dead_ = true; int64_t realized_offset = 0; bool is_replay = t->session().is_replaying(); for (auto v : t->session().vms()) { for (const auto& m : v->maps()) { auto km = m.map; if (is_replay) { if (!m.emu_file || m.emu_file->device() != device_ || m.emu_file->inode() != inode_) { continue; } } else { if (km.device() != device_ || km.inode() != inode_) { continue; } // If the mapping is MAP_PRIVATE then this write is dangerous // because it's unpredictable what will be seen in the mapping. // However, it could be OK if the application doesn't read from // this part of the mapping. Just optimistically assume this mapping // is not affected. if (!(km.flags() & MAP_SHARED)) { LOG(warn) << "MAP_PRIVATE mapping affected by write"; continue; } } // We're discovering a mapping we care about if (dead_) { dead_ = false; realized_offset = offset.retrieve(true); } // stat matches. uint64_t mapping_offset = km.file_offset_bytes(); int64_t local_offset = realized_offset; for (auto r : ranges) { remote_ptr start = km.start() + local_offset - mapping_offset; MemoryRange mr(start, r.length); if (km.intersects(mr)) { if (is_replay) { // If we're writing beyond the EmuFile's end, resize it. m.emu_file->ensure_size(local_offset + r.length); } else { ASSERT(t, !v->task_set().empty()); // We will record multiple writes if the file is mapped multiple // times. This is inefficient --- one is sufficient --- but not // wrong. // Make sure we use a task for this address space. `t` might have // a different address space. for (auto tt : v->task_set()) { // If the task here has execed, we may not be able to record its // memory any longer, so loop through all tasks in this address // space in turn in case any *didn't* exec. if (!tt->already_exited() && static_cast(tt)->record_remote_fallible(km.intersect(mr)) > 0) { break; } } } } local_offset += r.length; } } } } } // namespace rr rr-5.7.0/src/MmappedFileMonitor.h000066400000000000000000000022651450675474200166640ustar00rootroot00000000000000/* -*- Mode: C++; tab-width: 8; c-basic-offset: 2; indent-tabs-mode: nil; -*- */ #ifndef RR_MMAPPED_FILE_MONITOR_H_ #define RR_MMAPPED_FILE_MONITOR_H_ #include "EmuFs.h" #include "FileMonitor.h" #include namespace rr { /** * A FileMonitor to track writes to files that are mmapped in so they can be * replayed. */ class MmappedFileMonitor : public FileMonitor { public: MmappedFileMonitor(Task* t, int fd); MmappedFileMonitor(Task* t, EmuFile::shr_ptr f); virtual Type type() override { return Mmapped; } void revive() { dead_ = false; } // If this write could potentially affect memory we need to PREVENT_SWITCH, // since the timing of the write is otherwise unpredictable from our // perspective. virtual Switchable will_write(Task*) override { return dead_ ? ALLOW_SWITCH : PREVENT_SWITCH; } /** * During recording, note writes to mapped segments. */ virtual void did_write(Task* t, const std::vector& ranges, LazyOffset& offset) override; private: // Whether this monitor is still actively monitoring bool dead_; dev_t device_; ino_t inode_; }; } // namespace rr #endif /* RR_MMAPPED_FILE_MONITOR_H_ */ rr-5.7.0/src/MonitoredSharedMemory.cc000066400000000000000000000053641450675474200175520ustar00rootroot00000000000000/* -*- Mode: C++; tab-width: 8; c-basic-offset: 2; indent-tabs-mode: nil; -*- */ #include "MonitoredSharedMemory.h" #include #include "AddressSpace.h" #include "AutoRemoteSyscalls.h" #include "RecordTask.h" #include "Session.h" #include "core.h" #include "log.h" using namespace std; namespace rr { MonitoredSharedMemory::~MonitoredSharedMemory() { munmap(real_mem, size); } static const char dconf_suffix[] = "/dconf/user"; void MonitoredSharedMemory::maybe_monitor(RecordTask* t, const string& file_name, const AddressSpace::Mapping& m, int tracee_fd, uint64_t offset) { size_t dconf_suffix_len = sizeof(dconf_suffix) - 1; if (file_name.size() < dconf_suffix_len || file_name.substr(file_name.size() - dconf_suffix_len) != dconf_suffix) { return; } AutoRemoteSyscalls remote(t); ScopedFd fd = remote.retrieve_fd(tracee_fd); if (!fd.is_open()) { // Tracee died return; } uint8_t* real_mem = static_cast( mmap(NULL, m.map.size(), PROT_READ, MAP_SHARED, fd, offset)); ASSERT(t, real_mem != MAP_FAILED); auto result = shared_ptr( new MonitoredSharedMemory(real_mem, m.map.size())); AddressSpace::Mapping shared = Session::steal_mapping(remote, m, std::move(result)); // m may be invalid now if (!shared.local_addr) { // tracee died return; } memcpy(shared.local_addr, real_mem, shared.map.size()); } MonitoredSharedMemory::shr_ptr MonitoredSharedMemory::subrange(uintptr_t, uintptr_t) { DEBUG_ASSERT(false && "Subranges not supported yet!"); return nullptr; } void MonitoredSharedMemory::check_all(RecordTask* t) { vector> addrs; for (auto a : t->vm()->monitored_addrs()) { addrs.push_back(a); } for (auto a : addrs) { auto m = t->vm()->mapping_of(a); if (m.monitored_shared_memory) { m.monitored_shared_memory->check_for_changes(t, m); } } } void MonitoredSharedMemory::check_for_changes(RecordTask* t, AddressSpace::Mapping& m) { ASSERT(t, m.map.size() == size); if (!m.local_addr) { // reestablish local mapping after a fork or whatever AutoRemoteSyscalls remote(t); auto msm = m.monitored_shared_memory; m = Session::recreate_shared_mmap(remote, m, Session::DISCARD_CONTENTS, std::move(msm)); if (!m.local_addr) { // Tracee died. return; } } if (!memcmp(m.local_addr, real_mem, size)) { return; } memcpy(m.local_addr, real_mem, size); t->record_local(m.map.start(), size, real_mem); } } rr-5.7.0/src/MonitoredSharedMemory.h000066400000000000000000000034561450675474200174140ustar00rootroot00000000000000/* -*- Mode: C++; tab-width: 8; c-basic-offset: 2; indent-tabs-mode: nil; -*- */ #ifndef RR_MONITORED_SHARED_MEMORY_H_ #define RR_MONITORED_SHARED_MEMORY_H_ #include #include "AddressSpace.h" namespace rr { class RecordTask; /** * Support tracees that share memory read-only with a non-tracee that * writes to the memory. Currently this just supports limited cases that * suffice for dconf: no remapping, coalescing or splitting of the memory is * allowed (|subrange| below just asserts). It doesn't handle mappings where * the mapping has more pages than the file. * * After such memory is mapped in the tracee, we also map it in rr at |real_mem| * and replace the tracee's mapping with a "shadow buffer" that's only shared * with rr. Then periodically rr reads the real memory, and if it doesn't match * the shadow buffer, we update the shadow buffer with the new values and * record that we did so. * * Currently we check the real memory after each syscall exit. This ensures * that if the tracee is woken up by some IPC mechanism (or after sched_yield), * it will get a chance to see updated memory values. */ class MonitoredSharedMemory { public: ~MonitoredSharedMemory(); typedef std::shared_ptr shr_ptr; static void maybe_monitor(RecordTask* t, const std::string& file_name, const AddressSpace::Mapping& m, int tracee_fd, uint64_t offset); static void check_all(RecordTask* t); shr_ptr subrange(uintptr_t start, uintptr_t size); private: void check_for_changes(RecordTask* t, AddressSpace::Mapping& m); MonitoredSharedMemory(uint8_t* real_mem, size_t size) : real_mem(real_mem), size(size) {} uint8_t* real_mem; size_t size; }; } // namespace rr #endif /* RR_MONITORED_SHARED_MEMORY_H_ */ rr-5.7.0/src/Monkeypatcher.cc000066400000000000000000002031051450675474200160740ustar00rootroot00000000000000/* -*- Mode: C++; tab-width: 8; c-basic-offset: 2; indent-tabs-mode: nil; -*- */ #include "Monkeypatcher.h" #include #include #include #include "AddressSpace.h" #include "AutoRemoteSyscalls.h" #include "ElfReader.h" #include "Flags.h" #include "RecordSession.h" #include "RecordTask.h" #include "ReplaySession.h" #include "ScopedFd.h" #include "core.h" #include "kernel_abi.h" #include "kernel_metadata.h" #include "log.h" using namespace std; namespace rr { #include "AssemblyTemplates.generated" static void write_and_record_bytes(RecordTask* t, remote_ptr child_addr, size_t size, const void* buf, bool* ok = nullptr) { t->write_bytes_helper(child_addr, size, buf, ok); if (!ok || *ok) { t->record_local(child_addr, size, buf); } } template static void write_and_record_bytes(RecordTask* t, remote_ptr child_addr, const uint8_t (&buf)[N], bool* ok = nullptr) { write_and_record_bytes(t, child_addr, N, buf, ok); } template static void write_and_record_mem(RecordTask* t, remote_ptr child_addr, const T* val, int count) { t->write_bytes_helper(child_addr, sizeof(*val) * count, static_cast(val)); t->record_local(child_addr, sizeof(T) * count, val); } /** * RecordSession sets up an LD_PRELOAD environment variable with an entry * SYSCALLBUF_LIB_FILENAME_PADDED (and, if enabled, an LD_AUDIT environment * variable with an entry RTLDAUDIT_LIB_FILENAME_PADDED) which is big enough to * hold either the 32-bit or 64-bit preload/audit library file names. * Immediately after exec we enter this function, which patches the environment * variable value with the correct library name for the task's architecture. * * It's possible for this to fail if a tracee alters the LD_PRELOAD value * and then does an exec. That's just too bad. If we ever have to handle that, * we should modify the environment passed to the exec call. This function * failing isn't necessarily fatal; a tracee might not rely on the functions * overridden by the preload library, or might override them itself (e.g. * because we're recording an rr replay). */ #define setup_library_path(arch, env_var, soname, task) \ setup_library_path_arch(task, env_var, soname ## _BASE, \ soname ## _PADDED, soname ## _32) template static void setup_library_path_arch(RecordTask* t, const char* env_var, const char* soname_base, const char* soname_padded, const char* soname_32) { const char* lib_name = sizeof(typename Arch::unsigned_word) < sizeof(uintptr_t) ? soname_32 : soname_padded; auto env_assignment = string(env_var) + "="; auto p = t->regs().sp().cast(); auto argc = t->read_mem(p); p += 1 + argc + 1; // skip argc, argc parameters, and trailing NULL while (true) { auto envp = t->read_mem(p); if (!envp) { LOG(debug) << env_var << " not found"; return; } string env = t->read_c_str(envp); if (env.find(env_assignment) != 0) { ++p; continue; } size_t lib_pos = env.find(soname_base); if (lib_pos == string::npos) { LOG(debug) << soname_base << " not found in " << env_var; return; } size_t next_colon = env.find(':', lib_pos); if (next_colon != string::npos) { while ((next_colon + 1 < env.length()) && (env[next_colon + 1] == ':' || env[next_colon + 1] == 0)) { ++next_colon; } if (next_colon + 1 < lib_pos + sizeof(soname_padded) - 1) { LOG(debug) << "Insufficient space for " << lib_name << " in " << env_var << " before next ':'"; return; } } if (env.length() < lib_pos + sizeof(soname_padded) - 1) { LOG(debug) << "Insufficient space for " << lib_name << " in " << env_var << " before end of string"; return; } remote_ptr dest = envp + lib_pos; write_and_record_mem(t, dest.cast(), lib_name, strlen(soname_padded)); return; } } template static void setup_preload_library_path(RecordTask* t) { static_assert(sizeof(SYSCALLBUF_LIB_FILENAME_PADDED) == sizeof(SYSCALLBUF_LIB_FILENAME_32), "filename length mismatch"); setup_library_path(Arch, "LD_PRELOAD", SYSCALLBUF_LIB_FILENAME, t); } template static void setup_audit_library_path(RecordTask* t) { static_assert(sizeof(RTLDAUDIT_LIB_FILENAME_PADDED) == sizeof(RTLDAUDIT_LIB_FILENAME_32), "filename length mismatch"); if (t->session().use_audit()) { setup_library_path(Arch, "LD_AUDIT", RTLDAUDIT_LIB_FILENAME, t); } } void Monkeypatcher::init_dynamic_syscall_patching( RecordTask* t, int syscall_patch_hook_count, remote_ptr syscall_patch_hooks) { if (syscall_patch_hook_count && syscall_hooks.empty()) { syscall_hooks = t->read_mem(syscall_patch_hooks, syscall_patch_hook_count); } } template static bool patch_syscall_with_hook_arch(Monkeypatcher& patcher, RecordTask* t, const syscall_patch_hook& hook, remote_code_ptr ip_of_instruction, size_t instruction_length, uint32_t fake_syscall_number); template static void substitute(uint8_t* buffer, uint64_t return_addr, uint32_t trampoline_relative_addr); template static void substitute_extended_jump(uint8_t* buffer, uint64_t patch_addr, uint64_t return_addr, uint64_t target_addr, uint32_t fake_syscall_number); template <> void substitute_extended_jump( uint8_t* buffer, uint64_t patch_addr, uint64_t return_addr, uint64_t target_addr, uint32_t) { int64_t offset = target_addr - (patch_addr + X86SyscallStubExtendedJump::trampoline_relative_addr_end); // An offset that appears to be > 2GB is OK here, since EIP will just // wrap around. X86SyscallStubExtendedJump::substitute(buffer, (uint32_t)return_addr, (uint32_t)offset); } template <> void substitute_extended_jump( uint8_t* buffer, uint64_t, uint64_t return_addr, uint64_t target_addr, uint32_t) { X64SyscallStubExtendedJump::substitute(buffer, (uint32_t)return_addr, (uint32_t)(return_addr >> 32), target_addr); } template <> void substitute_extended_jump( uint8_t* buffer, uint64_t patch_addr, uint64_t return_addr, uint64_t target_addr, uint32_t fake_syscall_number) { int64_t offset = target_addr - (patch_addr + X86SyscallStubExtendedJump::trampoline_relative_addr_end); // An offset that appears to be > 2GB is OK here, since EIP will just // wrap around. X86TrapInstructionStubExtendedJump::substitute(buffer, (uint32_t)return_addr, fake_syscall_number, (uint32_t)offset); } template <> void substitute_extended_jump( uint8_t* buffer, uint64_t, uint64_t return_addr, uint64_t target_addr, uint32_t fake_syscall_number) { X64TrapInstructionStubExtendedJump::substitute(buffer, (uint32_t)return_addr, (uint32_t)(return_addr >> 32), fake_syscall_number, target_addr); } /** * Allocate an extended jump in an extended jump page and return its address. * The resulting address must be within 2G of from_end, and the instruction * there must jump to to_start. */ template static remote_ptr allocate_extended_jump_x86ish( RecordTask* t, vector& pages, remote_ptr from_end) { Monkeypatcher::ExtendedJumpPage* page = nullptr; for (auto& p : pages) { remote_ptr page_jump_start = p.addr + p.allocated; int64_t offset = page_jump_start - from_end; if ((int32_t)offset == offset && p.allocated + ExtendedJumpPatch::size <= page_size()) { page = &p; break; } } if (!page) { // We're looking for a gap of three pages --- one page to allocate and // a page on each side as a guard page. uint32_t required_space = 3 * page_size(); remote_ptr free_mem = t->vm()->find_free_memory(t, required_space, // Find free space after the patch site. t->vm()->mapping_of(from_end).map.start()); if (!free_mem) { LOG(debug) << "Can't find free memory anywhere after the jump"; return nullptr; } remote_ptr addr = (free_mem + page_size()).cast(); int64_t offset = addr - from_end; if ((int32_t)offset != offset) { LOG(debug) << "Can't find space close enough for the jump"; return nullptr; } { AutoRemoteSyscalls remote(t); int prot = PROT_READ | PROT_EXEC; int flags = MAP_ANONYMOUS | MAP_FIXED | MAP_PRIVATE; auto ret = remote.infallible_mmap_syscall_if_alive(addr, page_size(), prot, flags, -1, 0); if (!ret) { /* Tracee died */ return nullptr; } KernelMapping recorded(addr, addr + page_size(), string(), KernelMapping::NO_DEVICE, KernelMapping::NO_INODE, prot, flags); t->vm()->map(t, addr, page_size(), prot, flags, 0, string(), KernelMapping::NO_DEVICE, KernelMapping::NO_INODE, nullptr, &recorded); t->vm()->mapping_flags_of(addr) |= AddressSpace::Mapping::IS_PATCH_STUBS; t->trace_writer().write_mapped_region(t, recorded, recorded.fake_stat(), recorded.fsname(), vector(), TraceWriter::PATCH_MAPPING); } pages.push_back(Monkeypatcher::ExtendedJumpPage(addr)); page = &pages.back(); } remote_ptr jump_addr = page->addr + page->allocated; page->allocated += ExtendedJumpPatch::size; return jump_addr; } /** * Encode the standard movz|movk sequence for moving constant `v` into register `reg` */ static void encode_immediate_aarch64(std::vector &buff, uint8_t reg, uint64_t v) { DEBUG_ASSERT(reg < 31); const uint32_t movz_inst = 0xd2800000; const uint32_t movk_inst = 0xf2800000; uint32_t mov_inst = movz_inst; for (int lsl = 3; lsl >= 0; lsl--) { uint32_t bits = (v >> (lsl * 16)) & 0xffff; if (bits == 0 && !(lsl == 0 && mov_inst == movz_inst)) { // Skip zero bits unless it's the only instruction, i.e. v == 0 continue; } // movz|movk x[reg], #bits, LSL #lsl buff.push_back(mov_inst | (uint32_t(lsl) << 21) | (bits << 5) | reg); mov_inst = movk_inst; } } /** * Encode the following assembly. * * cmp x8, 1024 * b.hi .Lnosys * movk x8, preload_thread_locals >> 16, lsl 16 * stp x15, x30, [x8, stub_scratch_2 - preload_thread_locals] * movz x30, #:abs_g3:_syscall_hook_trampoline * movk x30, #:abs_g2_nc:_syscall_hook_trampoline * movk x30, #:abs_g1_nc:_syscall_hook_trampoline * movk x30, #:abs_g0_nc:_syscall_hook_trampoline // Might be shorter depending on the address * blr x30 * ldp x15, x30, [x15] .Lreturn: * b syscall_return_address .Lnosys: * svc 0x0 // the test relies on invalid syscall triggering an event. * // mov x0, -ENOSYS * b .Lreturn * .long * * And return the instruction index of `.Lreturn`. * The branch instruction following that label will not be encoded * since it depends on the address of this code. */ static uint32_t encode_extended_jump_aarch64(std::vector &buff, uint64_t target, uint64_t return_addr, uint32_t *_retaddr_idx = nullptr) { // cmp x8, 1024 buff.push_back(0xf110011f); uint32_t b_hi_idx = buff.size(); buff.push_back(0); // place holder // movk x8, preload_thread_locals >> 16, lsl 16 buff.push_back(0xf2ae0028); // stp x15, x30, [x8, #104] buff.push_back(0xa906f90f); encode_immediate_aarch64(buff, 30, target); // blr x30 buff.push_back(0xd63f03c0); // ldp x15, x30, [x15] buff.push_back(0xa94079ef); uint32_t ret_idx = buff.size(); buff.push_back(0); // place holder // b.hi . + (ret_inst + 4 - .) buff[b_hi_idx] = 0x54000000 | ((ret_idx + 1 - b_hi_idx) << 5) | 0x8; // movn x0, (ENOSYS - 1), i.e. mov x0, -ENOSYS // buff.push_back(0x92800000 | ((ENOSYS - 1) << 5) | 0); buff.push_back(0xd4000001); // svc 0 // b .-2 buff.push_back(0x17fffffe); uint32_t retaddr_idx = buff.size(); if (_retaddr_idx) *_retaddr_idx = retaddr_idx; buff.resize(retaddr_idx + 2); memcpy(&buff[retaddr_idx], &return_addr, 8); return ret_idx; } // b and bl has a 26bit signed immediate in unit of 4 bytes constexpr int32_t aarch64_b_max_offset = ((1 << 25) - 1) * 4; constexpr int32_t aarch64_b_min_offset = (1 << 25) * -4; static remote_ptr allocate_extended_jump_aarch64( RecordTask* t, vector& pages, remote_ptr svc_ip, uint64_t to, std::vector &inst_buff) { uint64_t return_addr = svc_ip.as_int() + 4; auto ret_idx = encode_extended_jump_aarch64(inst_buff, to, return_addr); auto total_patch_size = inst_buff.size() * 4; Monkeypatcher::ExtendedJumpPage* page = nullptr; // There are two jumps we need to worry about for the offset // (actually 3 since there's also the jump back after unpatching // but the requirement for that is always more relaxed than the combination // of these two), // the jump to the stub and the jump back. // The jump to the stub has offset `stub - syscall` and the jump back has offset // `syscall + 4 - (stub + ret_idx * 4)` // We need to make sure both are within the offset range so // * aarch64_b_min_offset <= stub - syscall <= aarch64_b_max_offset // * aarch64_b_min_offset <= syscall + 4 - (stub + ret_idx * 4) <= aarch64_b_max_offset // or // * aarch64_b_min_offset <= stub - syscall <= aarch64_b_max_offset // * -aarch64_b_max_offset + 4 - ret_idx * 4 <= stub - syscall <= -aarch64_b_min_offset + 4 - ret_idx * 4 int64_t patch_offset_min = std::max(aarch64_b_min_offset, -aarch64_b_max_offset + 4 - int(ret_idx) * 4); int64_t patch_offset_max = std::min(aarch64_b_max_offset, -aarch64_b_min_offset + 4 - int(ret_idx) * 4); for (auto& p : pages) { remote_ptr page_jump_start = p.addr + p.allocated; int64_t offset = page_jump_start - svc_ip; if (offset <= patch_offset_max && offset >= patch_offset_min && p.allocated + total_patch_size <= page_size()) { page = &p; break; } } if (!page) { // We're looking for a gap of three pages --- one page to allocate and // a page on each side as a guard page. uint32_t required_space = 3 * page_size(); remote_ptr free_mem = t->vm()->find_free_memory(t, required_space, // Find free space after the patch site. t->vm()->mapping_of(svc_ip).map.start()); if (!free_mem) { LOG(debug) << "Can't find free memory anywhere after the jump"; return nullptr; } remote_ptr addr = (free_mem + page_size()).cast(); int64_t offset = addr - svc_ip; if (offset > patch_offset_max || offset < patch_offset_min) { LOG(debug) << "Can't find space close enough for the jump"; return nullptr; } { AutoRemoteSyscalls remote(t); int prot = PROT_READ | PROT_EXEC; int flags = MAP_ANONYMOUS | MAP_FIXED | MAP_PRIVATE; auto ret = remote.infallible_mmap_syscall_if_alive(addr, page_size(), prot, flags, -1, 0); if (!ret) { /* Tracee died */ return nullptr; } KernelMapping recorded(addr, addr + page_size(), string(), KernelMapping::NO_DEVICE, KernelMapping::NO_INODE, prot, flags); t->vm()->map(t, addr, page_size(), prot, flags, 0, string(), KernelMapping::NO_DEVICE, KernelMapping::NO_INODE, nullptr, &recorded); t->vm()->mapping_flags_of(addr) |= AddressSpace::Mapping::IS_PATCH_STUBS; t->trace_writer().write_mapped_region(t, recorded, recorded.fake_stat(), recorded.fsname(), vector(), TraceWriter::PATCH_MAPPING); } pages.push_back(Monkeypatcher::ExtendedJumpPage(addr)); page = &pages.back(); } remote_ptr jump_addr = page->addr + page->allocated; const uint64_t reverse_jump_addr = jump_addr.as_int() + ret_idx * 4; const int64_t reverse_offset = int64_t(return_addr - reverse_jump_addr); const uint32_t offset_imm26 = (reverse_offset >> 2) & 0x03ffffff; inst_buff[ret_idx] = 0x14000000 | offset_imm26; page->allocated += total_patch_size; return jump_addr; } bool Monkeypatcher::is_jump_stub_instruction(remote_code_ptr ip, bool include_safearea) { remote_ptr pp = ip.to_data_ptr(); auto it = syscallbuf_stubs.upper_bound(pp); if (it == syscallbuf_stubs.begin()) { return false; } --it; auto begin = it->first; auto end = begin + it->second.size; if (!include_safearea) { begin += it->second.safe_prefix; end -= it->second.safe_suffix; } return begin <= pp && pp < end; } remote_code_ptr Monkeypatcher::get_jump_stub_exit_breakpoint(remote_code_ptr ip, RecordTask *t) { if (t->arch() != aarch64) { return nullptr; } remote_ptr pp = ip.to_data_ptr(); auto it = syscallbuf_stubs.upper_bound(pp); if (it == syscallbuf_stubs.begin()) { return nullptr; } --it; auto bp = it->first + it->second.size - it->second.safe_suffix; if (pp == bp || pp == bp - 4) { return remote_code_ptr(bp.as_int()); } return nullptr; } static bool hook_can_ignore_interfering_branches(const syscall_patch_hook& hook, size_t jump_patch_size) { return hook.patch_region_length >= jump_patch_size && (hook.flags & (PATCH_IS_MULTIPLE_INSTRUCTIONS | PATCH_IS_NOP_INSTRUCTIONS)) == PATCH_IS_NOP_INSTRUCTIONS; } /** * Some functions make system calls while storing local variables in memory * below the stack pointer. We need to decrement the stack pointer by * some "safety zone" amount to get clear of those variables before we make * a call instruction. So, we allocate a stub per patched callsite, and jump * from the callsite to the stub. The stub decrements the stack pointer, * calls the appropriate syscall hook function, reincrements the stack pointer, * and jumps back to immediately after the patched callsite. * * It's important that gdb stack traces work while a thread is stopped in the * syscallbuf code. To ensure that the above manipulations don't foil gdb's * stack walking code, we add CFI data to all the stubs. To ease that, the * stubs are written in assembly and linked into the preload library. * * On x86-64 with ASLR, we need to be able to patch a call to a stub from * sites more than 2^31 bytes away. We only have space for a 5-byte jump * instruction. So, we allocate "extender pages" --- pages of memory within * 2GB of the patch site, that contain the stub code. We don't really need this * on x86, but we do it there too for consistency. * * If fake_syscall_number > 0 then we'll ensure AX is set to that number * by the stub code. */ template static bool patch_syscall_with_hook_x86ish(Monkeypatcher& patcher, RecordTask* t, const syscall_patch_hook& hook, remote_code_ptr ip_of_instruction, size_t instruction_length, uint32_t fake_syscall_number) { size_t patch_region_size = instruction_length + hook.patch_region_length; uint8_t jump_patch[patch_region_size]; // We're patching in a relative jump, so we need to compute the offset from // the end of the jump to our actual destination. remote_ptr jump_patch_start = ip_of_instruction.to_data_ptr(); if (hook.flags & PATCH_SYSCALL_INSTRUCTION_IS_LAST) { jump_patch_start -= hook.patch_region_length; } remote_ptr jump_patch_end = jump_patch_start + JumpPatch::size; remote_ptr return_addr = jump_patch_start + patch_region_size; remote_ptr extended_jump_start; if (fake_syscall_number) { extended_jump_start = allocate_extended_jump_x86ish( t, patcher.extended_jump_pages, jump_patch_end); } else { extended_jump_start = allocate_extended_jump_x86ish( t, patcher.extended_jump_pages, jump_patch_end); } if (extended_jump_start.is_null()) { return false; } if (fake_syscall_number) { uint8_t stub_patch[FakeSyscallExtendedJumpPatch::size]; substitute_extended_jump(stub_patch, extended_jump_start.as_int(), return_addr.as_int(), hook.hook_address, fake_syscall_number); write_and_record_bytes(t, extended_jump_start, stub_patch); patcher.syscallbuf_stubs[extended_jump_start] = { &hook, FakeSyscallExtendedJumpPatch::size }; } else { uint8_t stub_patch[ExtendedJumpPatch::size]; substitute_extended_jump(stub_patch, extended_jump_start.as_int(), return_addr.as_int(), hook.hook_address, 0); write_and_record_bytes(t, extended_jump_start, stub_patch); patcher.syscallbuf_stubs[extended_jump_start] = { &hook, ExtendedJumpPatch::size }; } intptr_t jump_offset = extended_jump_start - jump_patch_end; int32_t jump_offset32 = (int32_t)jump_offset; ASSERT(t, jump_offset32 == jump_offset) << "allocate_extended_jump_x86ish didn't work"; // pad with NOPs to the next instruction static const uint8_t NOP = 0x90; memset(jump_patch, NOP, sizeof(jump_patch)); if (hook_can_ignore_interfering_branches(hook, JumpPatch::size)) { // If the preceding instruction is long enough to contain the entire jump, // and is a nop, replace the original instruction by a jump back to the // start of the patch region. This allows us to ignore (likely spurious, // but nevertheless), interfering branches, because whether we jump to the // instruction or the start of the patch region, the effect is the same. jump_patch[patch_region_size-2] = 0xeb; // jmp rel jump_patch[patch_region_size-1] = (int8_t)-patch_region_size; } JumpPatch::substitute(jump_patch, jump_offset32); bool ok = true; write_and_record_bytes(t, jump_patch_start, sizeof(jump_patch), jump_patch, &ok); if (!ok) { LOG(warn) << "Couldn't write patch; errno=" << errno; } return ok; } template <> bool patch_syscall_with_hook_arch(Monkeypatcher& patcher, RecordTask* t, const syscall_patch_hook& hook, remote_code_ptr ip_of_instruction, size_t instruction_length, uint32_t fake_syscall_number) { return patch_syscall_with_hook_x86ish(patcher, t, hook, ip_of_instruction, instruction_length, fake_syscall_number); } template <> bool patch_syscall_with_hook_arch(Monkeypatcher& patcher, RecordTask* t, const syscall_patch_hook& hook, remote_code_ptr ip_of_instruction, size_t instruction_length, uint32_t fake_syscall_number) { return patch_syscall_with_hook_x86ish(patcher, t, hook, ip_of_instruction, instruction_length, fake_syscall_number); } template <> bool patch_syscall_with_hook_arch(Monkeypatcher& patcher, RecordTask *t, const syscall_patch_hook &hook, remote_code_ptr, size_t, uint32_t) { Registers r = t->regs(); remote_ptr svc_ip = r.ip().to_data_ptr(); std::vector inst_buff; remote_ptr extended_jump_start = allocate_extended_jump_aarch64( t, patcher.extended_jump_pages, svc_ip, hook.hook_address, inst_buff); if (extended_jump_start.is_null()) { return false; } LOG(debug) << "Allocated stub size " << inst_buff.size() * sizeof(uint32_t) << " bytes at " << extended_jump_start << " for syscall at " << svc_ip; auto total_patch_size = inst_buff.size() * 4; write_and_record_bytes(t, extended_jump_start, total_patch_size, &inst_buff[0]); patcher.syscallbuf_stubs[extended_jump_start] = { &hook, total_patch_size, /** * safe_prefix: * We have not modified any registers yet in the first two instructions. * More importantly, we may bail out and return to user code without * hitting the breakpoint in syscallbuf */ 2 * 4, /** * safe_suffix: * We've returned from syscallbuf and continue execution * won't hit syscallbuf breakpoint * (this also include the 8 bytes that stores the return address) * Note that the 4th last instruction also belongs to the syscallbuf return path * However, since it is still using the scratch memory, * it doesn't belong to the safe area. * The caller needs to have special handling for that instruction. */ 3 * 4 + 8 }; intptr_t jump_offset = extended_jump_start - svc_ip; ASSERT(t, jump_offset <= aarch64_b_max_offset && jump_offset >= aarch64_b_min_offset) << "allocate_extended_jump_aarch64 didn't work"; const uint32_t offset_imm26 = (jump_offset >> 2) & 0x03ffffff; const uint32_t b_inst = 0x14000000 | offset_imm26; bool ok = true; write_and_record_bytes(t, svc_ip, 4, &b_inst, &ok); if (!ok) { LOG(warn) << "Couldn't write patch; errno=" << errno; } return ok; } static bool patch_syscall_with_hook(Monkeypatcher& patcher, RecordTask* t, const syscall_patch_hook& hook, remote_code_ptr ip_of_instruction, size_t instruction_length, uint32_t fake_syscall_number) { RR_ARCH_FUNCTION(patch_syscall_with_hook_arch, t->arch(), patcher, t, hook, ip_of_instruction, instruction_length, fake_syscall_number); } template static bool match_extended_jump_patch(Task* t, uint8_t patch[], uint64_t* return_addr, vector* instruction); template <> bool match_extended_jump_patch( Task*, uint8_t patch[], uint64_t* return_addr, vector* instruction) { uint32_t return_addr_lo, return_addr_hi; uint64_t jmp_target; if (!X64SyscallStubExtendedJump::match(patch, &return_addr_lo, &return_addr_hi, &jmp_target)) { return false; } *instruction = rr::syscall_instruction(x86_64); *return_addr = return_addr_lo | (((uint64_t)return_addr_hi) << 32); return true; } template <> bool match_extended_jump_patch( Task* t, uint8_t patch[], uint64_t* return_addr, vector* instruction) { uint32_t return_addr_lo, return_addr_hi, fake_syscall_no; uint64_t jmp_target; if (!X64TrapInstructionStubExtendedJump::match(patch, &return_addr_lo, &return_addr_hi, &fake_syscall_no, &jmp_target)) { return false; } *return_addr = return_addr_lo | (((uint64_t)return_addr_hi) << 32); if ((int)fake_syscall_no == t->session().syscall_number_for_rrcall_rdtsc()) { instruction->resize(sizeof(rdtsc_insn)); memcpy(instruction->data(), rdtsc_insn, instruction->size()); } else { ASSERT(t, false) << "Unknown fake-syscall number " << fake_syscall_no; } return true; } template <> bool match_extended_jump_patch( Task*, uint8_t patch[], uint64_t* return_addr, vector* instruction) { uint32_t return_addr_32, jmp_target_relative; if (!X86SyscallStubExtendedJump::match(patch, &return_addr_32, &jmp_target_relative)) { return false; } *return_addr = return_addr_32; *instruction = rr::syscall_instruction(x86); return true; } template static void substitute_replacement_patch(uint8_t *buffer, uint64_t patch_addr, uint64_t jmp_target); template <> void substitute_replacement_patch(uint8_t *buffer, uint64_t patch_addr, uint64_t jmp_target) { (void)patch_addr; X64SyscallStubRestore::substitute(buffer, jmp_target); } template <> void substitute_replacement_patch(uint8_t *buffer, uint64_t patch_addr, uint64_t jmp_target) { int64_t offset = jmp_target - (patch_addr + X86SyscallStubRestore::trampoline_relative_addr_end); // An offset that appears to be > 2GB is OK here, since EIP will just // wrap around. X86SyscallStubRestore::substitute(buffer, (uint32_t)offset); } template static void unpatch_extended_jumps(Monkeypatcher& patcher, Task* t) { static_assert(ExtendedJumpPatch::size < FakeSyscallExtendedJumpPatch::size, "If these were the same size then the logic below wouldn't work"); for (auto patch : patcher.syscallbuf_stubs) { const syscall_patch_hook &hook = *patch.second.hook; uint8_t bytes[FakeSyscallExtendedJumpPatch::size]; t->read_bytes_helper(patch.first, patch.second.size, bytes); uint64_t return_addr = 0; vector syscall; if (patch.second.size == ExtendedJumpPatch::size) { if (!match_extended_jump_patch( t, bytes, &return_addr, &syscall)) { ASSERT(t, false) << "Failed to match extended jump patch at " << patch.first; return; } } else if (patch.second.size == FakeSyscallExtendedJumpPatch::size) { if (!match_extended_jump_patch( t, bytes, &return_addr, &syscall)) { ASSERT(t, false) << "Failed to match trap-instruction extended jump patch at " << patch.first; return; } } else { ASSERT(t, false) << "Unknown patch size " << patch.second.size; } // Replace with // extended_jump: // (unless PATCH_SYSCALL_INSTRUCTION_IS_LAST) // // (if PATCH_SYSCALL_INSTRUCTION_IS_LAST) // jmp *(return_addr) // As long as there are not relative branches or anything, this should // always be correct. size_t new_patch_size = hook.patch_region_length + syscall.size() + ReplacementPatch::size; ASSERT(t, new_patch_size <= sizeof(bytes)); uint8_t* ptr = bytes; if (!(hook.flags & PATCH_SYSCALL_INSTRUCTION_IS_LAST)) { memcpy(ptr, syscall.data(), syscall.size()); ptr += syscall.size(); } memcpy(ptr, hook.patch_region_bytes, hook.patch_region_length); ptr += hook.patch_region_length; if (hook.flags & PATCH_SYSCALL_INSTRUCTION_IS_LAST) { memcpy(ptr, syscall.data(), syscall.size()); ptr += syscall.size(); } substitute_replacement_patch(ptr, patch.first.as_int() + hook.patch_region_length + syscall.size(), return_addr); t->write_bytes_helper(patch.first, new_patch_size, bytes); } } template static void unpatch_syscalls_arch(Monkeypatcher &patcher, Task *t); template <> void unpatch_syscalls_arch(Monkeypatcher &patcher, Task *t) { // There is no 32-bit equivalent to X64TrapInstructionStubExtendedJump. // We just pass the X64TrapInstructionStubExtendedJump; its length // will never match any jump stub for 32-bit. return unpatch_extended_jumps(patcher, t); } template <> void unpatch_syscalls_arch(Monkeypatcher &patcher, Task *t) { return unpatch_extended_jumps(patcher, t); } template <> void unpatch_syscalls_arch(Monkeypatcher &patcher, Task *t) { for (auto patch : patcher.syscallbuf_stubs) { const syscall_patch_hook &hook = *patch.second.hook; std::vector hook_prefix; uint32_t prefix_ninst; encode_extended_jump_aarch64(hook_prefix, hook.hook_address, 0, &prefix_ninst); uint32_t prefix_size = prefix_ninst * 4; DEBUG_ASSERT(prefix_size <= 13 * 4); ASSERT(t, patch.second.size >= prefix_size + 8); uint8_t bytes[15 * 4]; t->read_bytes_helper(patch.first, prefix_size + 8, bytes); // 3rd last instruction is the one jumping back and it won't match if (memcmp(&hook_prefix[0], bytes, prefix_size - 3 * 4) != 0) { ASSERT(t, false) << "Failed to match extended jump patch at " << patch.first; return; } uint64_t return_addr; memcpy(&return_addr, &bytes[prefix_size], 8); uint32_t svc_inst = 0xd4000001; memcpy(bytes, &svc_inst, 4); uint64_t reverse_jump_addr = patch.first.as_int() + 4; int64_t reverse_offset = int64_t(return_addr - reverse_jump_addr); ASSERT(t, reverse_offset <= aarch64_b_max_offset && reverse_offset >= aarch64_b_min_offset) << "Cannot encode b instruction to jump back"; uint32_t offset_imm26 = (reverse_offset >> 2) & 0x03ffffff; uint32_t binst = 0x14000000 | offset_imm26; memcpy(&bytes[4], &binst, 4); t->write_bytes_helper(patch.first, 4 * 2, bytes); } } void Monkeypatcher::unpatch_syscalls_in(Task *t) { RR_ARCH_FUNCTION(unpatch_syscalls_arch, t->arch(), *this, t); } static string bytes_to_string(uint8_t* bytes, size_t size) { stringstream ss; for (size_t i = 0; i < size; ++i) { if (i > 0) { ss << ' '; } ss << HEX(bytes[i]); } return ss.str(); } static bool task_safe_for_syscall_patching(RecordTask* t, remote_code_ptr start, remote_code_ptr end) { if (t->is_stopped()) { remote_code_ptr ip = t->ip(); if (start <= ip && ip < end) { return false; } } for (auto& e : t->pending_events) { if (e.is_syscall_event()) { remote_code_ptr ip = e.Syscall().regs.ip(); if (start <= ip && ip < end) { return false; } } } return true; } static bool safe_for_syscall_patching(remote_code_ptr start, remote_code_ptr end, RecordTask* exclude) { for (auto& p : exclude->session().tasks()) { RecordTask* rt = static_cast(p.second); if (rt != exclude && !task_safe_for_syscall_patching(rt, start, end)) { return false; } } return true; } bool Monkeypatcher::try_patch_vsyscall_caller(RecordTask* t, remote_code_ptr ret_addr) { // Emit FLUSH_SYSCALLBUF if there's one pending. // We want our mmap records to be associated with the next (PATCH_SYSCALL) // event, not a FLUSH_SYSCALLBUF event. t->maybe_flush_syscallbuf(); uint8_t bytes[X64VSyscallEntry::size]; remote_ptr patch_start = ret_addr.to_data_ptr() - sizeof(bytes); size_t bytes_count = t->read_bytes_fallible(patch_start, sizeof(bytes), bytes); if (bytes_count < sizeof(bytes)) { return false; } uint32_t target_addr = 0; if (!X64VSyscallEntry::match(bytes, &target_addr)) { return false; } uint64_t target_addr_sext = (uint64_t)(int32_t)target_addr; int syscallno = 0; switch (target_addr_sext) { case 0xffffffffff600000: syscallno = X64Arch::gettimeofday; break; case 0xffffffffff600400: syscallno = X64Arch::time; break; case 0xffffffffff600800: syscallno = X64Arch::getcpu; break; default: return false; } X64VSyscallReplacement::substitute(bytes, syscallno); write_and_record_bytes(t, patch_start, bytes); LOG(debug) << "monkeypatched vsyscall caller at " << patch_start; return true; } static uint64_t jump_patch_size(SupportedArch arch) { switch (arch) { case x86: return X86SysenterVsyscallSyscallHook::size; case x86_64: return X64JumpMonkeypatch::size; case aarch64: return 2*rr::syscall_instruction_length(arch); default: FATAL() << "Unimplemented for this architecture"; return 0; } } const syscall_patch_hook* Monkeypatcher::find_syscall_hook(RecordTask* t, remote_code_ptr ip, bool entering_syscall, size_t instruction_length) { /* we need to inspect this many bytes before the start of the instruction, to find every short jump that might land after it. Conservative. */ static const intptr_t LOOK_BACK = 0x80; /* we need to inspect this many bytes after the start of the instruction, to find every short jump that might land after it into the patch area. Conservative. */ static const intptr_t LOOK_FORWARD = 15 + 15 + 0x80; uint8_t bytes[LOOK_BACK + LOOK_FORWARD]; memset(bytes, 0, sizeof(bytes)); // Split reading the code into separate reads for each page, so that if we can't read // from one page, we still get the data from the other page. ASSERT(t, sizeof(bytes) < page_size()); remote_ptr code_start = ip.to_data_ptr() - LOOK_BACK; size_t buf_valid_start_offset = 0; size_t buf_valid_end_offset = sizeof(bytes); ssize_t first_page_bytes = min(ceil_page_size(code_start) - code_start, sizeof(bytes)); if (t->read_bytes_fallible(code_start, first_page_bytes, bytes) < first_page_bytes) { buf_valid_start_offset = first_page_bytes; } if (first_page_bytes < (ssize_t)sizeof(bytes)) { if (t->read_bytes_fallible(code_start + first_page_bytes, sizeof(bytes) - first_page_bytes, bytes + first_page_bytes) < (ssize_t)sizeof(bytes) - first_page_bytes) { buf_valid_end_offset = first_page_bytes; } } if (buf_valid_start_offset > LOOK_BACK || buf_valid_end_offset < LOOK_BACK + instruction_length) { ASSERT(t, false) << "Can't read memory containing patchable instruction, why are we trying this?"; } uint8_t* following_bytes = &bytes[LOOK_BACK + instruction_length]; size_t following_bytes_count = buf_valid_end_offset - (LOOK_BACK + instruction_length); size_t preceding_bytes_count = LOOK_BACK - buf_valid_start_offset; for (const auto& hook : syscall_hooks) { bool matches_hook = false; if ((!(hook.flags & PATCH_SYSCALL_INSTRUCTION_IS_LAST) && following_bytes_count >= hook.patch_region_length && memcmp(following_bytes, hook.patch_region_bytes, hook.patch_region_length) == 0)) { matches_hook = true; } else if ((hook.flags & PATCH_SYSCALL_INSTRUCTION_IS_LAST) && hook.patch_region_length <= preceding_bytes_count && memcmp(bytes + LOOK_BACK - hook.patch_region_length, hook.patch_region_bytes, hook.patch_region_length) == 0) { if (entering_syscall) { // A patch that uses bytes before the syscall can't be done when // entering the syscall, it must be done when exiting. So set a flag on // the Task that tells us to come back later. t->retry_syscall_patching = true; LOG(debug) << "Deferring syscall patching at " << ip << " in " << t << " until syscall exit."; return nullptr; } matches_hook = true; } if (!matches_hook) { continue; } if (!hook_can_ignore_interfering_branches(hook, jump_patch_size(t->arch()))) { // Search for a following short-jump instruction that targets an // instruction // after the syscall. False positives are OK. // glibc-2.23.1-8.fc24.x86_64's __clock_nanosleep needs this. bool found_potential_interfering_branch = false; for (size_t i = buf_valid_start_offset; i + 2 <= buf_valid_end_offset; ++i) { uint8_t b = bytes[i]; // Check for short conditional or unconditional jump if (b == 0xeb || (b >= 0x70 && b < 0x80)) { int offset_from_instruction_end = (int)i + 2 + (int8_t)bytes[i + 1] - (LOOK_BACK + instruction_length); if (hook.flags & PATCH_SYSCALL_INSTRUCTION_IS_LAST) { if (hook.flags & PATCH_IS_MULTIPLE_INSTRUCTIONS) { found_potential_interfering_branch = offset_from_instruction_end <= -(ssize_t)instruction_length && offset_from_instruction_end > -(ssize_t)(instruction_length + hook.patch_region_length); } else { found_potential_interfering_branch = offset_from_instruction_end == -(ssize_t)instruction_length; } } else { if (hook.flags & PATCH_IS_MULTIPLE_INSTRUCTIONS) { found_potential_interfering_branch = offset_from_instruction_end >= 0 && offset_from_instruction_end < hook.patch_region_length; } else { found_potential_interfering_branch = offset_from_instruction_end == 0; } } if (found_potential_interfering_branch) { LOG(debug) << "Found potential interfering branch at " << ip.to_data_ptr() - LOOK_BACK + i; break; } } } if (found_potential_interfering_branch) { continue; } } remote_code_ptr start_range, end_range; if (hook.flags & PATCH_SYSCALL_INSTRUCTION_IS_LAST) { start_range = ip - hook.patch_region_length; // if a thread has its RIP at the end of our range, // it could be immediately after a syscall instruction that // will need to be restarted. Patching out that instruction will // prevent the kernel from restarting it. So, extend our range by // one byte to detect such threads. end_range = ip + instruction_length + 1; } else { start_range = ip; end_range = ip + instruction_length + hook.patch_region_length; } if (!safe_for_syscall_patching(start_range, end_range, t)) { LOG(debug) << "Temporarily declining to patch syscall at " << ip << " because a different task has its ip in the patched range"; return nullptr; } LOG(debug) << "Trying to patch bytes " << bytes_to_string( following_bytes, min(following_bytes_count, sizeof(syscall_patch_hook::patch_region_bytes))); return &hook; } LOG(debug) << "Failed to find a syscall hook for bytes " << bytes_to_string( following_bytes, min(following_bytes_count, sizeof(syscall_patch_hook::patch_region_bytes))); return nullptr; } // Syscalls can be patched either on entry or exit. For most syscall // instruction code patterns we can steal bytes after the syscall instruction // and thus we patch on entry, but some patterns require using bytes from // before the syscall instruction itself and thus can only be patched on exit. // The `entering_syscall` flag tells us whether or not we're at syscall entry. // If we are, and we find a pattern that can only be patched at exit, we'll // set a flag on the RecordTask telling it to try again after syscall exit. bool Monkeypatcher::try_patch_syscall_x86ish(RecordTask* t, bool entering_syscall, SupportedArch arch) { Registers r = t->regs(); remote_code_ptr ip = r.ip(); ASSERT(t, is_x86ish(arch)) << "Unsupported architecture"; size_t instruction_length = rr::syscall_instruction_length(arch); const syscall_patch_hook* hook_ptr = find_syscall_hook(t, ip - instruction_length, entering_syscall, instruction_length); bool success = false; intptr_t syscallno = r.original_syscallno(); if (hook_ptr) { // Get out of executing the current syscall before we patch it. if (entering_syscall && !t->exit_syscall_and_prepare_restart()) { return false; } LOG(debug) << "Patching syscall at " << ip << " syscall " << syscall_name(syscallno, t->arch()) << " tid " << t->tid; success = patch_syscall_with_hook(*this, t, *hook_ptr, ip - instruction_length, instruction_length, 0); if (!success && entering_syscall) { // Need to reenter the syscall to undo exit_syscall_and_prepare_restart t->enter_syscall(); } } if (!success) { if (!t->retry_syscall_patching) { LOG(debug) << "Failed to patch syscall at " << ip << " syscall " << syscall_name(syscallno, t->arch()) << " tid " << t->tid; tried_to_patch_syscall_addresses.insert(ip); } return false; } return true; } bool Monkeypatcher::try_patch_syscall_aarch64(RecordTask* t, bool entering_syscall) { Registers r = t->regs(); remote_code_ptr ip = r.ip() - 4; uint32_t inst[2] = {0, 0}; size_t bytes_count = t->read_bytes_fallible(ip.to_data_ptr() - 4, 8, &inst); if (bytes_count < sizeof(inst) || inst[1] != 0xd4000001) { LOG(debug) << "Declining to patch syscall at " << ip << " for unexpected instruction"; tried_to_patch_syscall_addresses.insert(ip); return false; } // mov x8, 0xdc if (inst[0] == 0xd2801b88) { // Clone may either cause the new and the old process to share stack (vfork) // or replacing the stack (pthread_create) // and requires special handling on the caller. // Our syscall hook cannot do that so this would have to be a raw syscall. // We can handle this at runtime but if we know the call is definitely // a clone we can avoid patching it here. LOG(debug) << "Declining to patch clone syscall at " << ip; tried_to_patch_syscall_addresses.insert(ip); return false; } ASSERT(t, (syscall_hooks.size() == 1 && syscall_hooks[0].patch_region_length == 4 && memcmp(syscall_hooks[0].patch_region_bytes, &inst[1], 4) == 0)) << "Unknown syscall hook"; if (!safe_for_syscall_patching(ip, ip + 4, t)) { LOG(debug) << "Temporarily declining to patch syscall at " << ip << " because a different task has its ip in the patched range"; return false; } // Get out of executing the current syscall before we patch it. if (entering_syscall && !t->exit_syscall_and_prepare_restart()) { return false; } LOG(debug) << "Patching syscall at " << ip << " syscall " << syscall_name(r.original_syscallno(), aarch64) << " tid " << t->tid; auto success = patch_syscall_with_hook(*this, t, syscall_hooks[0], ip, 4, 0); if (!success && entering_syscall) { // Need to reenter the syscall to undo exit_syscall_and_prepare_restart if (!t->enter_syscall()) { return false; } } if (!success) { LOG(debug) << "Failed to patch syscall at " << ip << " syscall " << syscall_name(r.original_syscallno(), aarch64) << " tid " << t->tid; tried_to_patch_syscall_addresses.insert(ip); return false; } return true; } bool Monkeypatcher::try_patch_syscall(RecordTask* t, bool entering_syscall) { if (syscall_hooks.empty()) { // Syscall hooks not set up yet. Don't spew warnings, and don't // fill tried_to_patch_syscall_addresses with addresses that we might be // able to patch later. return false; } if (t->emulated_ptracer) { // Syscall patching can confuse ptracers, which may be surprised to see // a syscall instruction at the current IP but then when running // forwards, that the syscall occurs deep in the preload library instead. return false; } if (t->is_in_traced_syscall()) { // Never try to patch the traced-syscall in our preload library! return false; } Registers r = t->regs(); remote_code_ptr ip = r.ip(); // We should not get here for untraced syscalls or anything else from the rr page. // These should be normally prevented by our seccomp filter // and in the case of syscalls interrupted by signals, // the check for the syscall restart should prevent us from reaching here. DEBUG_ASSERT(ip.to_data_ptr() < AddressSpace::rr_page_start() || ip.to_data_ptr() >= AddressSpace::rr_page_end()); if (tried_to_patch_syscall_addresses.count(ip) || is_jump_stub_instruction(ip, true)) { return false; } // We could examine the current syscall number and if it's not one that // we support syscall buffering for, refuse to patch the syscall instruction. // This would, on the face of it, reduce overhead since patching the // instruction just means a useless trip through the syscall buffering logic. // However, it actually wouldn't help much since we'd still do a switch // on the syscall number in this function instead, and due to context // switching costs any overhead saved would be insignificant. // Also, implementing that would require keeping a buffered-syscalls // list in sync with the preload code, which is unnecessary complexity. SupportedArch arch; if (!get_syscall_instruction_arch( t, ip.decrement_by_syscall_insn_length(t->arch()), &arch) || arch != t->arch()) { LOG(debug) << "Declining to patch cross-architecture syscall at " << ip; tried_to_patch_syscall_addresses.insert(ip); return false; } // Emit FLUSH_SYSCALLBUF if there's one pending. // We want our mmap records to be associated with the next (PATCH_SYSCALL) // event, not a FLUSH_SYSCALLBUF event. t->maybe_flush_syscallbuf(); if (!t->is_stopped()) { // Tracee was unexpectedly kicked out of a ptrace-stop by SIGKILL or // equivalent. Abort trying to patch. return false; } if (arch == aarch64) { return try_patch_syscall_aarch64(t, entering_syscall); } return try_patch_syscall_x86ish(t, entering_syscall, arch); } bool Monkeypatcher::try_patch_trapping_instruction(RecordTask* t, size_t instruction_length, bool before_instruction) { if (syscall_hooks.empty()) { // Syscall hooks not set up yet. Don't spew warnings, and don't // fill tried_to_patch_syscall_addresses with addresses that we might be // able to patch later. return false; } if (t->emulated_ptracer) { // Patching can confuse ptracers. return false; } Registers r = t->regs(); remote_code_ptr ip_of_instruction = r.ip() - (before_instruction ? 0 : instruction_length); if (tried_to_patch_syscall_addresses.count(ip_of_instruction + instruction_length)) { return false; } // Emit FLUSH_SYSCALLBUF if there's one pending. // We want our mmap records to be associated with the next (PATCH_SYSCALL) // event, not a FLUSH_SYSCALLBUF event. t->maybe_flush_syscallbuf(); const syscall_patch_hook* hook_ptr = find_syscall_hook(t, ip_of_instruction, before_instruction, instruction_length); bool success = false; if (hook_ptr) { LOG(debug) << "Patching trapping instruction at " << ip_of_instruction << " tid " << t->tid; success = patch_syscall_with_hook(*this, t, *hook_ptr, ip_of_instruction, instruction_length, SYS_rrcall_rdtsc); } if (!success) { if (!t->retry_syscall_patching) { LOG(debug) << "Failed to patch trapping instruction at " << ip_of_instruction << " tid " << t->tid; tried_to_patch_syscall_addresses.insert(ip_of_instruction + instruction_length); } return false; } return true; } // VDSOs are filled with overhead critical functions related to getting the // time and current CPU. We need to ensure that these syscalls get redirected // into actual trap-into-the-kernel syscalls so rr can intercept them. template static void patch_after_exec_arch(RecordTask* t, Monkeypatcher& patcher); template static void patch_at_preload_init_arch(RecordTask* t, Monkeypatcher& patcher); template <> void patch_after_exec_arch(RecordTask* t, Monkeypatcher& patcher) { (void)patcher; setup_preload_library_path(t); setup_audit_library_path(t); if (!t->vm()->has_vdso()) { patch_auxv_vdso(t, AT_SYSINFO_EHDR, AT_IGNORE); } else { size_t librrpage_base = RR_PAGE_ADDR - AddressSpace::RRPAGE_RECORD_PAGE_OFFSET*PRELOAD_LIBRARY_PAGE_SIZE; patch_auxv_vdso(t, AT_SYSINFO_EHDR, librrpage_base); patch_auxv_vdso(t, X86Arch::RR_AT_SYSINFO, librrpage_base + AddressSpace::RRVDSO_PAGE_OFFSET*PRELOAD_LIBRARY_PAGE_SIZE); } } // Monkeypatch x86 vsyscall hook only after the preload library // has initialized. The vsyscall hook expects to be able to use the syscallbuf. // Before the preload library has initialized, the regular vsyscall code // will trigger ptrace traps and be handled correctly by rr. template <> void patch_at_preload_init_arch(RecordTask* t, Monkeypatcher& patcher) { auto params = t->read_mem( remote_ptr>(t->regs().arg1())); if (!params.syscallbuf_enabled) { return; } patcher.init_dynamic_syscall_patching(t, params.syscall_patch_hook_count, params.syscall_patch_hooks); } template <> void patch_after_exec_arch(RecordTask* t, Monkeypatcher& patcher) { setup_preload_library_path(t); setup_audit_library_path(t); for (const auto& m : t->vm()->maps()) { auto& km = m.map; patcher.patch_after_mmap(t, km.start(), km.size(), km.file_offset_bytes(), -1, Monkeypatcher::MMAP_EXEC); } if (!t->vm()->has_vdso()) { patch_auxv_vdso(t, AT_SYSINFO_EHDR, AT_IGNORE); } else { size_t librrpage_base = RR_PAGE_ADDR - AddressSpace::RRPAGE_RECORD_PAGE_OFFSET*PRELOAD_LIBRARY_PAGE_SIZE; patch_auxv_vdso(t, AT_SYSINFO_EHDR, librrpage_base); } } template <> void patch_after_exec_arch(RecordTask* t, Monkeypatcher& patcher) { setup_preload_library_path(t); setup_audit_library_path(t); for (const auto& m : t->vm()->maps()) { auto& km = m.map; patcher.patch_after_mmap(t, km.start(), km.size(), km.file_offset_bytes(), -1, Monkeypatcher::MMAP_EXEC); } if (!t->vm()->has_vdso()) { patch_auxv_vdso(t, AT_SYSINFO_EHDR, AT_IGNORE); } else { size_t librrpage_base = RR_PAGE_ADDR - AddressSpace::RRPAGE_RECORD_PAGE_OFFSET*PRELOAD_LIBRARY_PAGE_SIZE; patch_auxv_vdso(t, AT_SYSINFO_EHDR, librrpage_base); } } template <> void patch_at_preload_init_arch(RecordTask* t, Monkeypatcher& patcher) { auto params = t->read_mem( remote_ptr>(t->regs().arg1())); if (!params.syscallbuf_enabled) { return; } patcher.init_dynamic_syscall_patching(t, params.syscall_patch_hook_count, params.syscall_patch_hooks); } template <> void patch_at_preload_init_arch(RecordTask* t, Monkeypatcher& patcher) { auto params = t->read_mem( remote_ptr>(t->regs().orig_arg1())); if (!params.syscallbuf_enabled) { return; } patcher.init_dynamic_syscall_patching(t, params.syscall_patch_hook_count, params.syscall_patch_hooks); } void Monkeypatcher::patch_after_exec(RecordTask* t) { ASSERT(t, 1 == t->vm()->task_set().size()) << "Can't have multiple threads immediately after exec!"; RR_ARCH_FUNCTION(patch_after_exec_arch, t->arch(), t, *this); } void Monkeypatcher::patch_at_preload_init(RecordTask* t) { // NB: the tracee can't be interrupted with a signal while // we're processing the rrcall, because it's masked off all // signals. RR_ARCH_FUNCTION(patch_at_preload_init_arch, t->arch(), t, *this); } static remote_ptr resolve_address(ElfReader& reader, uintptr_t elf_addr, remote_ptr map_start, size_t map_size, uintptr_t map_offset) { uintptr_t file_offset; if (!reader.addr_to_offset(elf_addr, file_offset)) { LOG(warn) << "ELF address " << HEX(elf_addr) << " not in file"; } if (file_offset < map_offset || file_offset + 32 > map_offset + map_size) { // The value(s) to be set are outside the mapped range. This happens // because code and data can be mapped in separate, partial mmaps in which // case some symbols will be outside the mapped range. return nullptr; } return map_start + uintptr_t(file_offset - map_offset); } static void set_and_record_bytes(RecordTask* t, ElfReader& reader, uintptr_t elf_addr, const void* bytes, size_t size, remote_ptr map_start, size_t map_size, size_t map_offset) { remote_ptr addr = resolve_address(reader, elf_addr, map_start, map_size, map_offset); if (!addr) { return; } bool ok = true; t->write_bytes_helper(addr, size, bytes, &ok); // Writing can fail when the value appears to be in the mapped range, but it // actually is beyond the file length. if (ok) { t->record_local(addr, size, bytes); } } /** * Patch _dl_runtime_resolve_(fxsave,xsave,xsavec) to clear "FDP Data Pointer" * register so that CPU-specific behaviors involving that register don't leak * into stack memory. */ static void patch_dl_runtime_resolve(Monkeypatcher& patcher, RecordTask* t, ElfReader& reader, uintptr_t elf_addr, remote_ptr map_start, size_t map_size, size_t map_offset) { if (t->arch() != x86_64) { return; } remote_ptr addr = resolve_address(reader, elf_addr, map_start, map_size, map_offset); if (!addr) { return; } uint8_t impl[X64DLRuntimeResolve::size + X64EndBr::size]; uint8_t *impl_start = impl; t->read_bytes(addr, impl); if (X64EndBr::match(impl) || X86EndBr::match(impl)) { assert(X64EndBr::size == X86EndBr::size); LOG(debug) << "Starts with endbr, skipping"; addr += X64EndBr::size; impl_start += X64EndBr::size; } if (!X64DLRuntimeResolve::match(impl_start) && !X64DLRuntimeResolve2::match(impl_start)) { LOG(warn) << "_dl_runtime_resolve implementation doesn't look right"; return; } uint8_t call_patch[X64CallMonkeypatch::size]; // We're patching in a relative call, so we need to compute the offset from // the end of the call to our actual destination. auto call_patch_start = addr.cast(); auto call_patch_end = call_patch_start + sizeof(call_patch); remote_ptr extended_call_start = allocate_extended_jump_x86ish( t, patcher.extended_jump_pages, call_patch_end); if (extended_call_start.is_null()) { return; } uint8_t stub_patch[X64DLRuntimeResolvePrelude::size]; X64DLRuntimeResolvePrelude::substitute(stub_patch); write_and_record_bytes(t, extended_call_start, stub_patch); intptr_t call_offset = extended_call_start - call_patch_end; int32_t call_offset32 = (int32_t)call_offset; ASSERT(t, call_offset32 == call_offset) << "allocate_extended_jump_x86ish didn't work"; X64CallMonkeypatch::substitute(call_patch, call_offset32); write_and_record_bytes(t, call_patch_start, call_patch); // pad with NOPs to the next instruction static const uint8_t NOP = 0x90; uint8_t nops[X64DLRuntimeResolve::size - sizeof(call_patch)]; memset(nops, NOP, sizeof(nops)); write_and_record_mem(t, call_patch_start + sizeof(call_patch), nops, sizeof(nops)); } static bool file_may_need_instrumentation(const AddressSpace::Mapping& map) { size_t file_part = map.map.fsname().rfind('/'); if (file_part == string::npos) { file_part = 0; } else { ++file_part; } const string& fsname = map.map.fsname(); return fsname.find("libpthread", file_part) != string::npos || fsname.find("ld", file_part) != string::npos; } void Monkeypatcher::patch_after_mmap(RecordTask* t, remote_ptr start, size_t size, size_t offset_bytes, int child_fd, MmapMode mode) { const auto& map = t->vm()->mapping_of(start); if (file_may_need_instrumentation(map) && (t->arch() == x86 || t->arch() == x86_64)) { ScopedFd open_fd; if (child_fd >= 0) { open_fd = t->open_fd(child_fd, O_RDONLY); ASSERT(t, open_fd.is_open()) << "Failed to open child fd " << child_fd; } else { char buf[100]; sprintf(buf, "/proc/%d/map_files/%llx-%llx", t->tid, (long long)start.as_int(), (long long)start.as_int() + size); // Reading these directly requires CAP_SYS_ADMIN, so open the link target // instead. char link[PATH_MAX]; int ret = readlink(buf, link, sizeof(link) - 1); if (ret < 0) { return; } link[ret] = 0; open_fd = ScopedFd(link, O_RDONLY); if (!open_fd.is_open()) { return; } } ElfFileReader reader(open_fd, t->arch()); // Check for symbols first in the library itself, regardless of whether // there is a debuglink. For example, on Fedora 26, the .symtab and // .strtab sections are stripped from the debuginfo file for // libpthread.so. SymbolTable syms = reader.read_symbols(".symtab", ".strtab"); if (syms.size() == 0) { ScopedFd debug_fd = reader.open_debug_file(map.map.fsname()); if (debug_fd.is_open()) { ElfFileReader debug_reader(debug_fd, t->arch()); syms = debug_reader.read_symbols(".symtab", ".strtab"); } } for (size_t i = 0; i < syms.size(); ++i) { if (syms.is_name(i, "__elision_aconf")) { static const int zero = 0; // Setting __elision_aconf.retry_try_xbegin to zero means that // pthread rwlocks don't try to use elision at all. See ELIDE_LOCK // in glibc's elide.h. set_and_record_bytes(t, reader, syms.addr(i) + 8, &zero, sizeof(zero), start, size, offset_bytes); } if (syms.is_name(i, "elision_init")) { // Make elision_init return without doing anything. This means // the __elision_available and __pthread_force_elision flags will // remain zero, disabling elision for mutexes. See glibc's // elision-conf.c. static const uint8_t ret = 0xC3; set_and_record_bytes(t, reader, syms.addr(i), &ret, sizeof(ret), start, size, offset_bytes); } // The following operations can only be applied once because after the // patch is applied the code no longer matches the expected template. // For replaying a replay to work, we need to only apply these changes // during a real exec, not during the mmap operations performed when rr // replays an exec. if (mode == MMAP_EXEC && (syms.is_name(i, "_dl_runtime_resolve_fxsave") || syms.is_name(i, "_dl_runtime_resolve_xsave") || syms.is_name(i, "_dl_runtime_resolve_xsavec"))) { patch_dl_runtime_resolve(*this, t, reader, syms.addr(i), start, size, offset_bytes); } } } } } // namespace rr rr-5.7.0/src/Monkeypatcher.h000066400000000000000000000136671450675474200157520ustar00rootroot00000000000000/* -*- Mode: C++; tab-width: 8; c-basic-offset: 2; indent-tabs-mode: nil; -*- */ #ifndef RR_MONKEYPATCHER_H_ #define RR_MONKEYPATCHER_H_ #include #include #include #include "preload/preload_interface.h" #include "remote_code_ptr.h" #include "remote_ptr.h" namespace rr { class RecordTask; class ScopedFd; class Task; /** * A class encapsulating patching state. There is one instance of this * class per tracee address space. Currently this class performs the following * tasks: * * 1) Patch the VDSO's user-space-only implementation of certain system calls * (e.g. gettimeofday) to do a proper kernel system call instead, so rr can * trap and record it (x86-64 only). * * 2) Patch the VDSO __kernel_vsyscall fast-system-call stub to redirect to * our syscall hook in the preload library (x86 only). * * 3) Patch syscall instructions whose following instructions match a known * pattern to call the syscall hook. * * Monkeypatcher only runs during recording, never replay. */ class Monkeypatcher { public: Monkeypatcher() {} Monkeypatcher(const Monkeypatcher&) = default; /** * Apply any necessary patching immediately after exec. * In this hook we patch everything that doesn't depend on the preload * library being loaded. */ void patch_after_exec(RecordTask* t); /** * During librrpreload initialization, apply patches that require the * preload library to be initialized. */ void patch_at_preload_init(RecordTask* t); /** * Try to patch the syscall instruction that |t| just entered. If this * returns false, patching failed and the syscall should be processed * as normal. If this returns true, patching succeeded and the syscall * was aborted; ip() has been reset to the start of the patched syscall, * and execution should resume normally to execute the patched code. * Zero or more mapping operations are also recorded to the trace and must * be replayed. */ bool try_patch_syscall(RecordTask* t, bool entering_syscall = true); bool try_patch_syscall_x86ish(RecordTask* t, bool entering_syscall, SupportedArch arch); bool try_patch_syscall_aarch64(RecordTask* t, bool entering_syscall); /** * Try to patch the trapping instruction that |t| just trapped on. If this * returns false, patching failed and the instruction should be processed * as normal. If this returns true, patching succeeded. * t->ip() is the address of the trapping instruction. * and execution should resume normally to execute the patched code. * Zero or more mapping operations are also recorded to the trace and must * be replayed. */ bool try_patch_trapping_instruction(RecordTask* t, size_t instruction_length, bool before_instruction = true); /** * Replace all extended jumps by syscalls again. Note that we do not try to * patch the original locations, since we don't know what the tracee may have * done with them in the meantime, we only patch the extended jump stubs, * which the tracee isn't allowed to touch. */ void unpatch_syscalls_in(Task *t); /** * Try to patch the vsyscall-entry pattern occurring right before ret_addr * to instead point into the corresponding entry points in the vdso. * Returns true if the patching succeeded, false if it doesn't. The tasks * registers are left unmodified. */ bool try_patch_vsyscall_caller(RecordTask *t, remote_code_ptr ret_addr); void init_dynamic_syscall_patching( RecordTask* t, int syscall_patch_hook_count, remote_ptr syscall_patch_hooks); /** * Try to allocate a stub from the sycall patching stub buffer. Returns null * if there's no buffer or we've run out of free stubs. */ remote_ptr allocate_stub(RecordTask* t, size_t bytes); enum MmapMode { MMAP_EXEC, MMAP_SYSCALL, }; /** * Apply any necessary patching immediately after an mmap. We use this to * patch libpthread.so. */ void patch_after_mmap(RecordTask* t, remote_ptr start, size_t size, size_t offset_bytes, int child_fd, MmapMode mode); /** * The list of pages we've allocated to hold our extended jumps. */ struct ExtendedJumpPage { ExtendedJumpPage(remote_ptr addr) : addr(addr), allocated(0) {} remote_ptr addr; size_t allocated; }; std::vector extended_jump_pages; bool is_jump_stub_instruction(remote_code_ptr p, bool include_safearea); // Return the breakpoint instruction (i.e. the last branch back to caller) // if we are on the exit path in the jump stub remote_code_ptr get_jump_stub_exit_breakpoint(remote_code_ptr ip, RecordTask *t); struct patched_syscall { // Pointer to hook inside the syscall_hooks array, which gets initialized // once and is fixed afterwards. const syscall_patch_hook *hook; size_t size; uint16_t safe_prefix = 0; uint16_t safe_suffix = 0; }; /** * Addresses/lengths of syscallbuf stubs. */ std::map, patched_syscall> syscallbuf_stubs; private: /** * `ip` is the address of the instruction that triggered the syscall or trap */ const syscall_patch_hook* find_syscall_hook(RecordTask* t, remote_code_ptr ip, bool entering_syscall, size_t instruction_length); /** * The list of supported syscall patches obtained from the preload * library. Each one matches a specific byte signature for the instruction(s) * after a syscall instruction. */ std::vector syscall_hooks; /** * The addresses of the instructions following syscalls or other * instructions that we've tried (or are currently trying) to patch. */ std::unordered_set tried_to_patch_syscall_addresses; }; } // namespace rr #endif /* RR_MONKEYPATCHER_H_ */ rr-5.7.0/src/MvCommand.cc000066400000000000000000000055711450675474200151530ustar00rootroot00000000000000/* -*- Mode: C++; tab-width: 8; c-basic-offset: 2; indent-tabs-mode: nil; -*- */ #include #include #include #include "Command.h" #include "TraceStream.h" #include "main.h" #include "util.h" using namespace std; namespace rr { class MvCommand : public Command { public: virtual int run(vector& args); protected: MvCommand(const char* name, const char* help) : Command(name, help) {} static MvCommand singleton; }; MvCommand MvCommand::singleton("mv", " rr mv \n"); static int mv(const string& from, const string& to, FILE* out) { string reason; if (!is_valid_trace_name(from, &reason) || !is_valid_trace_name(to, &reason)) { fprintf(stderr, "\n" "rr: Trace name is invalid: %s\n" "\n", reason.c_str()); return 1; } string from_path = resolve_trace_name(from); if (!is_trace(from_path)) { fprintf(stderr, "\n" "rr: Could not access / identify '%s' as a trace (errno %d).\n" "\n", from_path.c_str(), errno); return 1; } // resolve symlinks like latest_trace from_path = real_path(from_path); string to_path = to; // if 'to' is not a path, view it as trace name and move to trace_dir/to if (to.find("/") == string::npos) { to_path = trace_save_dir() + "/" + to; } string to_fname = filename(to_path.c_str()); if (to_fname == "latest-trace") { fprintf(stderr, "\nrr: Cannot rename to latest-trace.\n\n"); return 1; } if (access(to_path.c_str(), F_OK) == 0) { fprintf(stderr, "\n" "rr: New trace '%s' already exists or cannot be accessed.\n" "\n", to_path.c_str()); return 1; } else if (errno != ENOENT) { fprintf(stderr, "\n" "rr: Cannot access new trace path '%s': errno %d\n" "\n", to_path.c_str(), errno); return 1; } // remove symlink before removing trace in case the former fails // a bad symlink crashes e.g. rr ls and midas if (is_latest_trace(from_path)) { if (!remove_latest_trace_symlink()) { return 1; } } int ret = rename(from_path.c_str(), to_path.c_str()); if (ret != 0) { const string err = strerror(errno); fprintf(stderr, "\n" "rr: Cannot move '%s' to '%s': %s\n" "\n", from_path.c_str(), to_path.c_str(), err.c_str()); return 1; } fprintf(out, "rr: Moved '%s' to '%s'\n", from_path.c_str(), to_path.c_str()); return 0; } int MvCommand::run(vector& args) { if (args.size() == 2 && verify_not_option(args)) { string from = args[0]; args.erase(args.begin()); if (verify_not_option(args)) { string to = args[0]; return mv(from, to, stdout); } } print_help(stderr); return 1; }; } // namespace rr rr-5.7.0/src/NonvirtualPerfCounterMonitor.h000066400000000000000000000011031450675474200210050ustar00rootroot00000000000000/* -*- Mode: C++; tab-width: 8; c-basic-offset: 2; indent-tabs-mode: nil; -*- */ #ifndef RR_NONVIRTUAL_PERF_COUNTER_MONITOR_H_ #define RR_NONVIRTUAL_PERF_COUNTER_MONITOR_H_ #include "FileMonitor.h" namespace rr { /** * A FileMonitor attached to (nonvirtualized) perf counter fds. This just stops those fds from * being mmaped. */ class NonvirtualPerfCounterMonitor : public FileMonitor { public: NonvirtualPerfCounterMonitor() {} virtual Type type() override { return NonvirtualPerfCounter; } }; } // namespace rr #endif /* RR_NONVIRTUAL_PERF_COUNTER_MONITOR_H_ */ rr-5.7.0/src/ODirectFileMonitor.h000066400000000000000000000011101450675474200166160ustar00rootroot00000000000000/* -*- Mode: C++; tab-width: 8; c-basic-offset: 2; indent-tabs-mode: nil; -*- */ #ifndef RR_ODIRECT_MONITOR_H_ #define RR_ODIRECT_MONITOR_H_ #include "FileMonitor.h" #include "TraceStream.h" namespace rr { /** * ODirectFileMonitor gets installed upon any use of O_DIRECT. * This monitor can get replaced by an MMappedFileMonitor if the * file is later MAP_SHARED. */ class ODirectFileMonitor : public FileMonitor { public: ODirectFileMonitor() : FileMonitor() {}; virtual Type type() override { return ODirect; } }; } // namespace rr #endif /* RR_ODIRECT_MONITOR_H_ */ rr-5.7.0/src/PackCommand.cc000066400000000000000000000552541450675474200154520ustar00rootroot00000000000000/* -*- Mode: C++; tab-width: 8; c-basic-offset: 2; indent-tabs-mode: nil; -*- */ #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include "Command.h" #include "Flags.h" #include "GdbServer.h" #include "ReplaySession.h" #include "ScopedFd.h" #include "TraceStream.h" #include "kernel_metadata.h" #include "log.h" #include "main.h" #include "../third-party/blake2/blake2.h" using namespace std; namespace rr { /** * Pack the trace directory to eliminate duplicate files and to include all * files needed for transportation. * * We try VERY HARD to avoid losing data if a PackCommand is interrupted. */ class PackCommand : public Command { public: virtual int run(vector& args) override; protected: PackCommand(const char* name, const char* help) : Command(name, help) {} static PackCommand singleton; }; PackCommand PackCommand::singleton( "pack", " rr pack [OPTION]... []\n" " --symlink Create symlinks to all mmapped files\n" " instead of copying them.\n" "\n" "Eliminates duplicate files in the trace directory, and copies files into\n" "the trace directory as necessary to ensure that all needed files are in\n" "the trace directory and none of them are links to files outside the\n" "trace directory. This makes the trace directory independent of changes\n" "to other files and ready to be transported elsewhere (e.g. by packaging\n" "it into a ZIP or tar archive).\n" "Be careful sharing traces with others; they may contain sensitive information.\n"); struct PackFlags { /* If true, insert symlinks into the trace dir which point to the original * files, rather than copying the files themselves */ bool symlink; PackFlags() : symlink(false) {} }; struct FileHash { uint8_t bytes[32]; }; bool operator<(const FileHash& h1, const FileHash& h2) { return memcmp(h1.bytes, h2.bytes, sizeof(h1)) < 0; } // Allocate a fresh FileHash different from every other // FileHash. Not thread-safe! static FileHash allocate_unique_file_hash() { static uint32_t hash = 0; FileHash result; memcpy(&result.bytes[0], &hash, sizeof(hash)); ++hash; memset(&result.bytes[4], 0, sizeof(result.bytes) - sizeof(hash)); return result; } struct FsExtentsHash { uint8_t bytes[32]; }; bool operator<(const FsExtentsHash& h1, const FsExtentsHash& h2) { return memcmp(h1.bytes, h2.bytes, sizeof(h1)) < 0; } static bool name_comparator(const TraceReader::MappedData& d1, const TraceReader::MappedData d2) { return d1.file_name < d2.file_name; } static bool names_equal(const TraceReader::MappedData& d1, const TraceReader::MappedData d2) { return d1.file_name == d2.file_name; } static bool decreasing_size_comparator(const TraceReader::MappedData* d1, const TraceReader::MappedData* d2) { return d1->file_size_bytes > d2->file_size_bytes; } static bool is_hardlink(const string& file_name) { const char* name = file_name.c_str(); const char* right_slash = strrchr(name, '/'); return right_slash && strncmp(right_slash + 1, "mmap_hardlink_", 14) == 0; } static void* process_files_thread(void* p) { // Don't use log.h macros here since they're not necessarily thread-safe auto data = static_cast>*>(p); for (auto& pair : *data) { const char* name = pair.first->c_str(); ScopedFd fd(name, O_RDONLY); if (!fd.is_open()) { fprintf(stderr, "Failed to open %s\n", name); exit(1); } blake2b_state b2_state; if (blake2b_init(&b2_state, sizeof(pair.second.bytes))) { fprintf(stderr, "blake2b_init failed"); exit(1); } while (true) { char buf[1024 * 1024]; ssize_t r = read(fd, buf, sizeof(buf)); if (r < 0) { fprintf(stderr, "Failed reading from %s\n", name); exit(1); } if (r == 0) { break; } if (blake2b_update(&b2_state, buf, r)) { fprintf(stderr, "blake2b_update failed"); exit(1); } } if (blake2b_final(&b2_state, pair.second.bytes, sizeof(pair.second.bytes))) { fprintf(stderr, "blake2b_final failed"); exit(1); } } return nullptr; } // Return a size-sorted list of all mmapped files found in the trace static vector gather_files(const string& trace_dir) { TraceReader trace(trace_dir); vector files; while (true) { TraceReader::MappedData data; bool found; trace.read_mapped_region(&data, &found, TraceReader::VALIDATE, TraceReader::ANY_TIME); if (!found) { break; } if (data.source == TraceReader::SOURCE_FILE) { files.push_back(data); } } // Eliminate duplicates stable_sort(files.begin(), files.end(), name_comparator); auto last = unique(files.begin(), files.end(), names_equal); files.erase(last, files.end()); return files; } // Returns true if FS_IOC_FIEMAP was supported and no extents are // UNKNOWN, storing a BLAKE2b hash of the extents metadata, file // size and filesystem ID in `result`. Otherwise returns false and // `result` is not initialized. `size` is always initialized. // If two files have the same FsExtentsHash then they have the same extents // and therefore the same contents. // If FS_IOC_FIEMAP is supported and the extents are known then this // deduplicates reflinked, hardlinked and symlinked files. static bool get_file_extents_hash(const string& file_name, FsExtentsHash* result, uint64_t* size) { const char* name = file_name.c_str(); ScopedFd fd(name, O_RDONLY); if (!fd.is_open()) { fprintf(stderr, "Failed to open %s\n", name); exit(1); } off_t seek_end = lseek(fd, 0, SEEK_END); if (seek_end < 0) { fprintf(stderr, "Failed to SEEK_END %s\n", name); exit(1); } *size = seek_end; blake2b_state b2_state; if (blake2b_init(&b2_state, sizeof(result->bytes))) { fprintf(stderr, "blake2b_init failed\n"); exit(1); } uint64_t offset = 0; bool saw_last = false; do { union { struct fiemap request; char bytes[16384]; } buffer; memset(&buffer.request, 0, sizeof(buffer.request)); buffer.request.fm_start = offset; buffer.request.fm_length = FIEMAP_MAX_OFFSET; buffer.request.fm_extent_count = ((char*)&buffer.bytes[sizeof(buffer.bytes)] - (char*)&buffer.request.fm_extents[0])/sizeof(buffer.request.fm_extents[0]); int ret = ioctl(fd, FS_IOC_FIEMAP, &buffer.request); if (ret < 0) { if (errno == ENOTTY || errno == EOPNOTSUPP) { return false; } fprintf(stderr, "FIEMAP ioctl failed\n"); exit(1); } if (!buffer.request.fm_mapped_extents) { break; } for (size_t i = 0; i < buffer.request.fm_mapped_extents; ++i) { const struct fiemap_extent& extent = buffer.request.fm_extents[i]; // Be super paranoid here. In btrfs at least, we see file extents where // fe_physical is 0 and FIEMAP_EXTENT_DATA_INLINE|FIEMAP_EXTENT_NOT_ALIGNED // are set; these are not real extents and the file contents are different // even though the extent records are the same. if ((extent.fe_flags & (FIEMAP_EXTENT_UNKNOWN | FIEMAP_EXTENT_DATA_INLINE | FIEMAP_EXTENT_DATA_TAIL | FIEMAP_EXTENT_NOT_ALIGNED)) || !extent.fe_physical) { return false; } // It's not clear if file holes appear in the extents list or not. // To be on the safe side, we hash the logical offsets so any holes // will change the hash. struct { uint64_t fe_logical; uint64_t fe_physical; uint64_t fe_length; uint32_t fe_flags; uint32_t padding; } buf = { extent.fe_logical, extent.fe_physical, extent.fe_length, extent.fe_flags, 0 }; if (blake2b_update(&b2_state, &buf, sizeof(buf))) { fprintf(stderr, "blake2b_update failed\n"); exit(1); } if (extent.fe_flags & FIEMAP_EXTENT_LAST) { saw_last = true; break; } offset = extent.fe_logical + extent.fe_length; } } while (!saw_last); struct statvfs vfs_buf; int ret = fstatvfs(fd, &vfs_buf); if (ret < 0) { fprintf(stderr, "fstatvfs failed\n"); exit(1); } struct { uint64_t size; uint64_t fsid; } buf = { *size, vfs_buf.f_fsid }; // Make sure the file size is hashed just in case it doesn't // show up in the extents. We also need to hash the filesystem // ID because the physical extents are local to the filesystem. if (blake2b_update(&b2_state, &buf, sizeof(buf))) { fprintf(stderr, "blake2b_update failed\n"); exit(1); } if (blake2b_final(&b2_state, result->bytes, sizeof(result->bytes))) { fprintf(stderr, "blake2b_final failed\n"); exit(1); } return true; } // Makes a list of all mmapped files and computes their BLAKE2b hashes. // BLAKE2b was chosen because it's fast and cryptographically strong (we don't // compare the actual file contents, we're relying on hash collision avoidance). // Files with the same FileHash have the same contents. // The keys of the returned map are the full file names of the mapped files. static map gather_file_info(const string& trace_dir) { vector files = gather_files(trace_dir); int use_cpus = min(20, get_num_cpus()); use_cpus = min((int)files.size(), use_cpus); // List of files indexed by their extents hash. All files // with the same FsExtentsHash have the same contents. map> extents_to_file; // All files for which we failed to get extents. We know nothing // about their contents. vector files_with_no_extents; for (const auto& file : files) { FsExtentsHash extents_hash; uint64_t size; if (get_file_extents_hash(file.file_name, &extents_hash, &size)) { extents_to_file[extents_hash].push_back(&file); } else { files_with_no_extents.push_back(&file); } if (size != file.file_size_bytes) { fprintf(stderr, "File size mismatch for %s\n", file.file_name.c_str()); exit(1); } } // Make a list of files with possibly unique contents (i.e. excluding // duplicates with the same FsExtentsHash). vector files_to_hash = files_with_no_extents; for (const auto& entry : extents_to_file) { files_to_hash.push_back(entry.second[0]); } // We'll assign files to threads in round-robin order, ordered by decreasing size. stable_sort(files_to_hash.begin(), files_to_hash.end(), decreasing_size_comparator); map file_size_to_file_count; for (auto file : files_to_hash) { ++file_size_to_file_count[file->file_size_bytes]; } map result; vector>> thread_files; thread_files.resize(use_cpus); int num_files_to_hash = 0; for (auto file : files_to_hash) { if (file_size_to_file_count[file->file_size_bytes] == 1) { // There is only one file with this size, so it can't be a duplicate // of any other files in `files_to_hash` and there is no need to hash // its contents. We'll just make up a fake, unique hash value for it. result[file->file_name] = allocate_unique_file_hash(); continue; } thread_files[num_files_to_hash % use_cpus].push_back( make_pair(&file->file_name, FileHash())); ++num_files_to_hash; } // Use multiple threads to actually hash the files we need to hash. vector threads; for (size_t i = 0; i < thread_files.size(); ++i) { pthread_t thread; pthread_create(&thread, nullptr, process_files_thread, &thread_files[i]); threads.push_back(thread); } for (pthread_t t : threads) { pthread_join(t, nullptr); } for (auto& f : thread_files) { for (auto& ff : f) { result[*ff.first] = ff.second; } } // Populate results for files we skipped because they had duplicate // FsExtentsHashes. for (const auto& entry : extents_to_file) { for (size_t i = 1; i < entry.second.size(); ++i) { // Taking a reference into `result` while we potentially // rehash it could be bad. FileHash h = result[entry.second[0]->file_name]; result[entry.second[i]->file_name] = h; } } return result; } static bool is_in_trace_dir(const string& file_name, const string& trace_dir) { return file_name.find(trace_dir) == 0; } static const char* last_filename_component(const string& file_name) { const char* last_slash = strrchr(file_name.c_str(), '/'); const char* last_component = last_slash ? last_slash + 1 : file_name.c_str(); if (strncmp(last_component, "mmap_hardlink_", 14) == 0) { last_component += 14; while (*last_component && *last_component != '_') { ++last_component; } if (*last_component == '_') { ++last_component; } } return last_component; } static string copy_into_trace(const string& file_name, const string& trace_dir, int* name_index) { // We don't bother trying to do a reflink-copy here because if that was going // to succeed, rr would probably already have used it during recording. string new_name; ScopedFd out_fd; const char* last_component = last_filename_component(file_name); while (true) { char new_name_buf[PATH_MAX]; snprintf(new_name_buf, sizeof(new_name_buf) - 1, "mmap_pack_%d_%s", *name_index, last_component); new_name_buf[sizeof(new_name_buf) - 1] = 0; new_name = trace_dir + "/" + new_name_buf; ++*name_index; out_fd = ScopedFd(new_name.c_str(), O_WRONLY | O_CREAT | O_EXCL, 0700); if (!out_fd.is_open()) { if (errno == EEXIST) { continue; } FATAL() << "Couldn't create " << new_name; } break; } ScopedFd in_fd(file_name.c_str(), O_RDONLY); if (!in_fd.is_open()) { FATAL() << "Couldn't open " << file_name; } while (true) { char buf[1024 * 1024]; ssize_t r = read(in_fd, buf, sizeof(buf)); if (r < 0) { FATAL() << "Can't read from " << file_name; } if (r == 0) { break; } ssize_t written = 0; while (written < r) { ssize_t w = write(out_fd, buf + written, r - written); if (w <= 0) { FATAL() << "Can't write to " << new_name; } written += w; } } // Try to avoid dataloss if (fsync(out_fd) < 0) { FATAL() << "Can't write to " << new_name; } return new_name; } // Generates a symlink inside the trace directory, pointing to the provided // file name. static string symlink_into_trace(const string& file_name, const string& trace_dir, int* name_index) { string new_name; ScopedFd out_fd; const char* last_component = last_filename_component(file_name); while (true) { char new_name_buf[PATH_MAX]; snprintf(new_name_buf, sizeof(new_name_buf) - 1, "mmap_symlink_%d_%s", *name_index, last_component); new_name_buf[sizeof(new_name_buf) - 1] = 0; new_name = trace_dir + "/" + new_name_buf; ++*name_index; int ret = symlink(file_name.c_str(), new_name.c_str()); if (ret < 0) { if (errno == EEXIST) { continue; } FATAL() << "Couldn't create symlink `" << new_name << "' to `" << file_name << "'."; } break; } return new_name; } // Insert symlinks into the trace directory, one for each mmapped file found in // the trace. Returns a mapping of absolute original file paths and the new // relative paths to the symlinks which are to be used in their place. Files // that already exist in the trace directory (including hardlinks) are left // in place and not symlinked. static map compute_canonical_symlink_map( const string& trace_dir) { map symlink_map; int name_index = 0; // Get all mmapped files from trace vector files = gather_files(trace_dir); for (auto& p : files) { string name = p.file_name; // If file is not in trace dir, create a symlink to it if (!is_in_trace_dir(p.file_name, trace_dir)) { name = symlink_into_trace(p.file_name, trace_dir, &name_index); } // Update the file map with the relative path of the target file symlink_map[p.file_name] = string(strrchr(name.c_str(), '/') + 1); } return symlink_map; } /** * This computes a map giving, for each file referenced in the trace, the name * of a trace file to use instead. This copies files into the * trace directory if they're not in the tracedir already, or if they're * hardlinks to files outside the trace directory. All of the copied files * will have names starting with "mmap_pack_". For files in the trace directory * that have the same hash, we select just one of the files as the destination * for all files with that hash. */ static map compute_canonical_mmapped_files( const string& trace_dir) { map file_info = gather_file_info(trace_dir); map hash_to_name; for (auto& p : file_info) { const auto& existing = hash_to_name.find(p.second); if (existing != hash_to_name.end()) { if (!is_hardlink(existing->second) && is_in_trace_dir(existing->second, trace_dir)) { continue; } } hash_to_name[p.second] = p.first; } int name_index = 0; for (auto& p : hash_to_name) { // Copy hardlinked files into the trace to avoid the possibility of someone // overwriting the original file. if (is_hardlink(p.second) || !is_in_trace_dir(p.second, trace_dir)) { p.second = copy_into_trace(p.second, trace_dir, &name_index); } } map file_map; for (auto& p : file_info) { string name = hash_to_name[p.second]; if (!is_in_trace_dir(name, trace_dir)) { FATAL() << "Internal error; file is not in trace dir"; } // Replace absolute paths with trace-relative file names file_map[p.first] = string(strrchr(name.c_str(), '/') + 1); } return file_map; } // Write out a new 'mmaps' file with the new file names and atomically // replace the existing 'mmaps' file with it. static void rewrite_mmaps(const map& file_map, const string& trace_dir) { string path = trace_dir + "/pack_mmaps"; CompressedWriter writer(path, TraceStream::mmaps_block_size(), 1); TraceReader trace(trace_dir); vector files; while (true) { TraceReader::MappedData data; bool found; vector extra_fds; bool skip_monitoring_mapped_fd; KernelMapping km = trace.read_mapped_region( &data, &found, TraceReader::VALIDATE, TraceReader::ANY_TIME, &extra_fds, &skip_monitoring_mapped_fd); if (!found) { break; } if (data.source == TraceReader::SOURCE_FILE) { auto m = file_map.find(data.file_name); if (m == file_map.end()) { FATAL() << "Internal error, didn't assign file " << data.file_name; } data.file_name = m->second; } TraceWriter::write_mapped_region_to_alternative_stream( writer, data, km, extra_fds, skip_monitoring_mapped_fd); } // Try not to lose data! writer.close(CompressedWriter::SYNC); if (!writer.good()) { FATAL() << "Error writing " << path; } // OK, now the atomic switchover to the new maps file. // BEFORE this point, we haven't altered any of the original trace files. // A crash might leave some "mmap_pack_" files around but that's OK. A retried // "rr pack" that runs to completion will clean them all up. // AFTER this point, we have altered the mmaps file and the trace remains // valid. string mmaps_path = trace_dir + "/mmaps"; if (rename(path.c_str(), mmaps_path.c_str()) < 0) { FATAL() << "Error renaming " << path << " to " << mmaps_path; } } // Delete any "mmap_" files that aren't destination files in our file_map. static void delete_unnecessary_files(const map& file_map, const string& trace_dir) { set retain; for (auto& p : file_map) { retain.insert(p.second); } DIR* dir = opendir(trace_dir.c_str()); if (!dir) { FATAL() << "Can't open directory " << trace_dir; } struct dirent* d; errno = 0; vector names; while ((d = readdir(dir)) != nullptr) { if (strncmp(d->d_name, "mmap_", 5) == 0 && retain.count(string(d->d_name)) == 0) { names.push_back(string(d->d_name)); } } if (errno) { FATAL() << "Can't read directory " << trace_dir; } closedir(dir); for (auto& n : names) { string name = trace_dir + "/" + n; if (unlink(name.c_str()) < 0) { FATAL() << "Can't delete file " << name; } } } static int pack(const string& trace_dir, const PackFlags& flags) { string dir; { // validate trace and produce default trace directory if trace_dir is empty TraceReader reader(trace_dir); dir = reader.dir(); } char buf[PATH_MAX]; char* ret = realpath(dir.c_str(), buf); if (!ret) { FATAL() << "realpath failed on " << dir; } string abspath(buf); if (flags.symlink) { map canonical_symlink_map = compute_canonical_symlink_map(abspath); rewrite_mmaps(canonical_symlink_map, abspath); delete_unnecessary_files(canonical_symlink_map, abspath); } else { map canonical_mmapped_files = compute_canonical_mmapped_files(abspath); rewrite_mmaps(canonical_mmapped_files, abspath); delete_unnecessary_files(canonical_mmapped_files, abspath); } if (!probably_not_interactive(STDOUT_FILENO)) { printf("rr: Packed trace directory `%s'.\n", dir.c_str()); } return 0; } static bool parse_pack_arg(vector& args, PackFlags& flags) { static const OptionSpec options[] = { { 0, "symlink", NO_PARAMETER }, }; ParsedOption opt; auto args_copy = args; if (!Command::parse_option(args_copy, options, &opt)) { return false; } switch (opt.short_name) { case 0: flags.symlink = true; break; default: DEBUG_ASSERT(0 && "Unknown pack option"); } args = args_copy; return true; } int PackCommand::run(vector& args) { bool found_dir = false; string trace_dir; PackFlags flags; while (parse_pack_arg(args, flags)) { } while (!args.empty()) { if (!found_dir && parse_optional_trace_dir(args, &trace_dir)) { found_dir = true; continue; } print_help(stderr); return 1; } return pack(trace_dir, flags); } } // namespace rr rr-5.7.0/src/PerfCounters.cc000066400000000000000000000731021450675474200157040ustar00rootroot00000000000000/* -*- Mode: C++; tab-width: 8; c-basic-offset: 2; indent-tabs-mode: nil; -*- */ #include "PerfCounters.h" #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include "Flags.h" #include "Session.h" #include "Task.h" #include "core.h" #include "kernel_metadata.h" #include "log.h" #include "util.h" using namespace std; namespace rr { #define PERF_COUNT_RR 0x72727272L static bool attributes_initialized; // At some point we might support multiple kinds of ticks for the same CPU arch. // At that point this will need to become more complicated. struct perf_event_attrs { // bug_flags is an architecture dependent flags to determine // what bugs need to be checked. // Current, this is simply the uarch on x86 and unused on aarch64. int bug_flags = 0; perf_event_attr ticks{}; perf_event_attr minus_ticks{}; perf_event_attr cycles{}; perf_event_attr llsc_fail{}; const char *pmu_name = nullptr; uint32_t pmu_flags = 0; uint32_t skid_size = 0; bool checked = false; bool has_ioc_period_bug = false; bool only_one_counter = false; bool activate_useless_counter = false; }; // If this contains more than one element, it's indexed by the CPU index. static std::vector perf_attrs; static uint32_t pmu_semantics_flags; /* * Find out the cpu model using the cpuid instruction. * Full list of CPUIDs at http://sandpile.org/x86/cpuid.htm * Another list at * http://software.intel.com/en-us/articles/intel-architecture-and-processor-identification-with-cpuid-model-and-family-numbers */ enum CpuMicroarch { UnknownCpu, FirstIntel, IntelMerom = FirstIntel, IntelPenryn, IntelNehalem, IntelWestmere, IntelSandyBridge, IntelIvyBridge, IntelHaswell, IntelBroadwell, IntelSkylake, IntelSilvermont, IntelGoldmont, IntelKabylake, IntelCometlake, IntelIcelake, IntelTigerlake, IntelRocketlake, IntelAlderlake, IntelRaptorlake, IntelSapphireRapid, LastIntel = IntelSapphireRapid, FirstAMD, AMDF15R30 = FirstAMD, AMDZen, LastAMD = AMDZen, FirstARM, ARMNeoverseN1 = FirstARM, ARMNeoverseE1, ARMNeoverseV1, ARMNeoverseN2, ARMCortexA55, ARMCortexA75, ARMCortexA76, ARMCortexA77, ARMCortexA78, ARMCortexX1, AppleM1Icestorm, AppleM1Firestorm, AppleM2Blizzard, AppleM2Avalanche, LastARM = AppleM2Avalanche, }; /* * Set if this CPU supports ticks counting retired conditional branches. */ #define PMU_TICKS_RCB (1<<0) /* * Some CPUs turn off the whole PMU when there are no remaining events * scheduled (perhaps as a power consumption optimization). This can be a * very expensive operation, and is thus best avoided. For cpus, where this * is a problem, we keep a cycles counter (which corresponds to one of the * fixed function counters, so we don't use up a programmable PMC) that we * don't otherwise use, but keeps the PMU active, greatly increasing * performance. */ #define PMU_BENEFITS_FROM_USELESS_COUNTER (1<<1) /* * Set if this CPU supports ticks counting all taken branches * (excluding interrupts, far branches, and rets). */ #define PMU_TICKS_TAKEN_BRANCHES (1<<3) struct PmuConfig { CpuMicroarch uarch; const char* name; unsigned rcb_cntr_event; unsigned minus_ticks_cntr_event; unsigned llsc_cntr_event; uint32_t skid_size; uint32_t flags; const char* pmu_name = nullptr; // ARM only unsigned cycle_event = PERF_COUNT_HW_CPU_CYCLES; int cycle_type = PERF_TYPE_HARDWARE; int event_type = PERF_TYPE_RAW; }; // XXX please only edit this if you really know what you're doing. // event = 0x5101c4: // - 51 = generic PMU // - 01 = umask for event BR_INST_RETIRED.CONDITIONAL // - c4 = eventsel for event BR_INST_RETIRED.CONDITIONAL // event = 0x5301cb: // - 51 = generic PMU // - 01 = umask for event HW_INTERRUPTS.RECEIVED // - cb = eventsel for event HW_INTERRUPTS.RECEIVED // See Intel 64 and IA32 Architectures Performance Monitoring Events. // See check_events from libpfm4. static const PmuConfig pmu_configs[] = { { IntelSapphireRapid, "Intel SapphireRapid", 0x5111c4, 0, 0, 125, PMU_TICKS_RCB }, { IntelRaptorlake, "Intel Raptorlake", 0x5111c4, 0, 0, 125, PMU_TICKS_RCB }, { IntelAlderlake, "Intel Alderlake", 0x5111c4, 0, 0, 125, PMU_TICKS_RCB }, { IntelRocketlake, "Intel Rocketlake", 0x5111c4, 0, 0, 100, PMU_TICKS_RCB }, { IntelTigerlake, "Intel Tigerlake", 0x5111c4, 0, 0, 100, PMU_TICKS_RCB }, { IntelIcelake, "Intel Icelake", 0x5111c4, 0, 0, 100, PMU_TICKS_RCB }, { IntelCometlake, "Intel Cometlake", 0x5101c4, 0, 0, 100, PMU_TICKS_RCB }, { IntelKabylake, "Intel Kabylake", 0x5101c4, 0, 0, 100, PMU_TICKS_RCB }, { IntelSilvermont, "Intel Silvermont", 0x517ec4, 0, 0, 100, PMU_TICKS_RCB }, { IntelGoldmont, "Intel Goldmont", 0x517ec4, 0, 0, 100, PMU_TICKS_RCB }, { IntelSkylake, "Intel Skylake", 0x5101c4, 0, 0, 100, PMU_TICKS_RCB }, { IntelBroadwell, "Intel Broadwell", 0x5101c4, 0, 0, 100, PMU_TICKS_RCB }, { IntelHaswell, "Intel Haswell", 0x5101c4, 0, 0, 100, PMU_TICKS_RCB }, { IntelIvyBridge, "Intel Ivy Bridge", 0x5101c4, 0, 0, 100, PMU_TICKS_RCB }, { IntelSandyBridge, "Intel Sandy Bridge", 0x5101c4, 0, 0, 100, PMU_TICKS_RCB }, { IntelNehalem, "Intel Nehalem", 0x5101c4, 0, 0, 100, PMU_TICKS_RCB }, { IntelWestmere, "Intel Westmere", 0x5101c4, 0, 0, 100, PMU_TICKS_RCB }, { IntelPenryn, "Intel Penryn", 0, 0, 0, 100, 0 }, { IntelMerom, "Intel Merom", 0, 0, 0, 100, 0 }, { AMDF15R30, "AMD Family 15h Revision 30h", 0xc4, 0xc6, 0, 250, PMU_TICKS_TAKEN_BRANCHES }, // 0xd1 == RETIRED_CONDITIONAL_BRANCH_INSTRUCTIONS - Number of retired conditional branch instructions // 0x2c == INTERRUPT_TAKEN - Counts the number of interrupts taken // Both counters are available on Zen, Zen+ and Zen2. { AMDZen, "AMD Zen", 0x5100d1, 0, 0, 10000, PMU_TICKS_RCB }, // Performance cores from ARM from cortex-a76 on (including neoverse-n1 and later) // have the following counters that are reliable enough for us. // 0x21 == BR_RETIRED - Architecturally retired taken branches // 0x6F == STREX_SPEC - Speculatively executed strex instructions // 0x11 == CPU_CYCLES - Cycle { ARMNeoverseN1, "ARM Neoverse N1", 0x21, 0, 0x6F, 1000, PMU_TICKS_TAKEN_BRANCHES, "armv8_pmuv3_0", 0x11, -1, -1 }, { ARMNeoverseV1, "ARM Neoverse V1", 0x21, 0, 0x6F, 1000, PMU_TICKS_TAKEN_BRANCHES, "armv8_pmuv3_0", 0x11, -1, -1 }, { ARMNeoverseN2, "ARM Neoverse N2", 0x21, 0, 0x6F, 1000, PMU_TICKS_TAKEN_BRANCHES, "armv8_pmuv3_0", 0x11, -1, -1 }, { ARMCortexA76, "ARM Cortex A76", 0x21, 0, 0x6F, 10000, PMU_TICKS_TAKEN_BRANCHES, "armv8_pmuv3", 0x11, -1, -1 }, { ARMCortexA77, "ARM Cortex A77", 0x21, 0, 0x6F, 10000, PMU_TICKS_TAKEN_BRANCHES, "armv8_pmuv3", 0x11, -1, -1 }, { ARMCortexA78, "ARM Cortex A78", 0x21, 0, 0x6F, 10000, PMU_TICKS_TAKEN_BRANCHES, "armv8_pmuv3", 0x11, -1, -1 }, { ARMCortexX1, "ARM Cortex X1", 0x21, 0, 0x6F, 10000, PMU_TICKS_TAKEN_BRANCHES, "armv8_pmuv3", 0x11, -1, -1 }, // cortex-a55, cortex-a75 and neoverse-e1 counts uarch ISB // as retired branches so the BR_RETIRED counter is not reliable. // There are some counters that are somewhat more reliable than // the total branch count (0x21) including // 0x0D (BR_IMMED_RETIRED) 0x0E (BR_RETURN_RETIRED) // 0xCD (BR_INDIRECT_ADDR_PRED) 0x76 (PC_WRITE_SPEC) // 0x78 (BR_IMMED_SPEC), 0xC9 (BR_COND_PRED) // 0xCD (BR_INDIRECT_ADDR_PRED) // but according to tests on the LITTLE core on a snapdragon 865 // none of them (including the sums) seems to be useful/reliable enough. { ARMNeoverseE1, "ARM Neoverse E1", 0, 0, 0, 0, 0 }, { ARMCortexA55, "ARM Cortex A55", 0, 0, 0, 0, 0 }, { ARMCortexA75, "ARM Cortex A75", 0, 0, 0, 0, 0 }, { AppleM1Icestorm, "Apple M1 Icestorm", 0x90, 0, 0, 1000, PMU_TICKS_TAKEN_BRANCHES, "apple_icestorm_pmu", 0x8c, -1, -1 }, { AppleM1Firestorm, "Apple M1 Firestorm", 0x90, 0, 0, 1000, PMU_TICKS_TAKEN_BRANCHES, "apple_firestorm_pmu", 0x8c, -1, -1 }, { AppleM2Blizzard, "Apple M2 Blizzard", 0x90, 0, 0, 1000, PMU_TICKS_TAKEN_BRANCHES, "apple_blizzard_pmu", 0x8c, -1, -1 }, { AppleM2Avalanche, "Apple M2 Avalanche", 0x90, 0, 0, 1000, PMU_TICKS_TAKEN_BRANCHES, "apple_avalanche_pmu", 0x8c, -1, -1 }, }; #define RR_SKID_MAX 10000 static string lowercase(const string& s) { string c = s; transform(c.begin(), c.end(), c.begin(), ::tolower); return c; } // The index of the PMU we are using within perf_attrs. // This is always 0 if we detected a single PMU type // and will be the same as the CPU index if we detected multiple PMU types. static int get_pmu_index(int cpu_binding) { if (cpu_binding < 0) { if (perf_attrs.size() > 1) { CLEAN_FATAL() << "\nMultiple PMU types detected. Unbinding CPU is not supported."; } return 0; } if (!PerfCounters::support_cpu(cpu_binding)) { CLEAN_FATAL() << "\nPMU on cpu " << cpu_binding << " is not supported."; } if (perf_attrs.size() == 1) { // Single PMU type. return 0; } if ((size_t)cpu_binding > perf_attrs.size()) { CLEAN_FATAL() << "\nUnable to find PMU type for CPU " << cpu_binding; } return cpu_binding; } static void init_perf_event_attr(struct perf_event_attr* attr, unsigned type, unsigned config) { memset(attr, 0, sizeof(*attr)); attr->type = perf_type_id(type); attr->size = sizeof(*attr); attr->config = config; // rr requires that its events count userspace tracee code // only. attr->exclude_kernel = 1; attr->exclude_guest = 1; } static const uint64_t IN_TX = 1ULL << 32; static const uint64_t IN_TXCP = 1ULL << 33; static int64_t read_counter(ScopedFd& fd) { int64_t val; ssize_t nread = read(fd, &val, sizeof(val)); DEBUG_ASSERT(nread == sizeof(val)); return val; } static ScopedFd start_counter(pid_t tid, int group_fd, struct perf_event_attr* attr, bool* disabled_txcp = nullptr) { if (disabled_txcp) { *disabled_txcp = false; } attr->pinned = group_fd == -1; int fd = syscall(__NR_perf_event_open, attr, tid, -1, group_fd, PERF_FLAG_FD_CLOEXEC); if (0 >= fd && errno == EINVAL && attr->type == PERF_TYPE_RAW && (attr->config & IN_TXCP)) { // The kernel might not support IN_TXCP, so try again without it. struct perf_event_attr tmp_attr = *attr; tmp_attr.config &= ~IN_TXCP; fd = syscall(__NR_perf_event_open, &tmp_attr, tid, -1, group_fd, PERF_FLAG_FD_CLOEXEC); if (fd >= 0) { if (disabled_txcp) { *disabled_txcp = true; } LOG(warn) << "kernel does not support IN_TXCP"; if ((cpuid(CPUID_GETEXTENDEDFEATURES, 0).ebx & HLE_FEATURE_FLAG) && !Flags::get().suppress_environment_warnings) { fprintf(stderr, "Your CPU supports Hardware Lock Elision but your kernel does\n" "not support setting the IN_TXCP PMU flag. Record and replay\n" "of code that uses HLE will fail unless you update your\n" "kernel.\n"); } } } if (0 >= fd) { if (errno == EACCES) { CLEAN_FATAL() << "Permission denied to use 'perf_event_open'; are hardware perf events " "available? See https://github.com/rr-debugger/rr/wiki/Will-rr-work-on-my-system"; } if (errno == ENOENT) { CLEAN_FATAL() << "Unable to open performance counter with 'perf_event_open'; " "are hardware perf events available? See https://github.com/rr-debugger/rr/wiki/Will-rr-work-on-my-system"; } FATAL() << "Failed to initialize counter"; } return ScopedFd(fd); } static void check_for_ioc_period_bug(perf_event_attrs &perf_attr) { // Start a cycles counter struct perf_event_attr attr = perf_attr.ticks; attr.sample_period = 0xffffffff; attr.exclude_kernel = 1; ScopedFd bug_fd = start_counter(0, -1, &attr); uint64_t new_period = 1; if (ioctl(bug_fd, PERF_EVENT_IOC_PERIOD, &new_period)) { FATAL() << "ioctl(PERF_EVENT_IOC_PERIOD) failed"; } struct pollfd poll_bug_fd = {.fd = bug_fd, .events = POLL_IN, .revents = 0 }; poll(&poll_bug_fd, 1, 0); perf_attr.has_ioc_period_bug = poll_bug_fd.revents == 0; LOG(debug) << "has_ioc_period_bug=" << perf_attr.has_ioc_period_bug; } static const int NUM_BRANCHES = 500; volatile uint32_t accumulator_sink = 0; static void do_branches() { // Do NUM_BRANCHES conditional branches that can't be optimized out. // 'accumulator' is always odd and can't be zero uint32_t accumulator = uint32_t(rand()) * 2 + 1; for (int i = 0; i < NUM_BRANCHES && accumulator; ++i) { accumulator = ((accumulator * 7) + 2) & 0xffffff; } // Use 'accumulator' so it can't be optimized out. accumulator_sink = accumulator; } // Architecture specific detection code #if defined(__i386__) || defined(__x86_64__) #include "PerfCounters_x86.h" #elif defined(__aarch64__) #include "PerfCounters_aarch64.h" #else #error Must define microarchitecture detection code for this architecture #endif static void check_working_counters(perf_event_attrs &perf_attr) { struct perf_event_attr attr = perf_attr.ticks; attr.sample_period = 0; struct perf_event_attr attr2 = perf_attr.cycles; attr2.sample_period = 0; ScopedFd fd = start_counter(0, -1, &attr); ScopedFd fd2 = start_counter(0, -1, &attr2); do_branches(); int64_t events = read_counter(fd); int64_t events2 = read_counter(fd2); if (events < NUM_BRANCHES) { char config[100]; sprintf(config, "%llx", (long long)perf_attr.ticks.config); std::string perf_cmdline = "perf stat -e "; if (perf_attr.pmu_name) { perf_cmdline = perf_cmdline + perf_attr.pmu_name + "/r" + config + "/ true"; } else { perf_cmdline = perf_cmdline + "r" + config + " true"; } FATAL() << "\nGot " << events << " branch events, expected at least " << NUM_BRANCHES << ".\n" "\nThe hardware performance counter seems to not be working. Check\n" "that hardware performance counters are working by running\n" " " << perf_cmdline << "\n" "and checking that it reports a nonzero number of events.\n" "If performance counters seem to be working with 'perf', file an\n" "rr issue, otherwise check your hardware/OS/VM configuration. Also\n" "check that other software is not using performance counters on\n" "this CPU."; } perf_attr.only_one_counter = events2 == 0; LOG(debug) << "only_one_counter=" << perf_attr.only_one_counter; if (perf_attr.only_one_counter) { arch_check_restricted_counter(); } } static void check_for_bugs(perf_event_attrs &perf_attr) { DEBUG_ASSERT(!running_under_rr()); check_for_ioc_period_bug(perf_attr); check_working_counters(perf_attr); check_for_arch_bugs(perf_attr); } static std::vector get_cpu_microarchs() { string forced_uarch = lowercase(Flags::get().forced_uarch); if (!forced_uarch.empty()) { for (size_t i = 0; i < array_length(pmu_configs); ++i) { const PmuConfig& pmu = pmu_configs[i]; string name = lowercase(pmu.name); if (name.npos != name.find(forced_uarch)) { LOG(info) << "Using forced uarch " << pmu.name; return { pmu.uarch }; } } CLEAN_FATAL() << "Forced uarch " << Flags::get().forced_uarch << " isn't known."; } return compute_cpu_microarchs(); } // Similar to rr::perf_attrs, if this contains more than one element, // it's indexed by the CPU index. static std::vector get_pmu_microarchs() { std::vector pmu_uarchs; auto uarchs = get_cpu_microarchs(); bool found_working_pmu = false; for (auto uarch : uarchs) { bool found = false; for (size_t i = 0; i < array_length(pmu_configs); ++i) { if (uarch == pmu_configs[i].uarch) { found = true; if (pmu_configs[i].flags & (PMU_TICKS_RCB | PMU_TICKS_TAKEN_BRANCHES)) { found_working_pmu |= true; } pmu_uarchs.push_back(pmu_configs[i]); break; } } DEBUG_ASSERT(found); } if (!found_working_pmu) { CLEAN_FATAL() << "No supported microarchitectures found."; } DEBUG_ASSERT(!pmu_uarchs.empty()); // Note that the `uarch` field after processed by `post_init_pmu_uarchs` // is used to store the bug_flags and may not be the actual uarch. post_init_pmu_uarchs(pmu_uarchs); return pmu_uarchs; } static void init_attributes() { if (attributes_initialized) { return; } attributes_initialized = true; auto pmu_uarchs = get_pmu_microarchs(); pmu_semantics_flags = PMU_TICKS_RCB | PMU_TICKS_TAKEN_BRANCHES; for (auto &pmu_uarch : pmu_uarchs) { if (!(pmu_uarch.flags & (PMU_TICKS_RCB | PMU_TICKS_TAKEN_BRANCHES))) { continue; } pmu_semantics_flags = pmu_semantics_flags & pmu_uarch.flags; } if (!(pmu_semantics_flags & (PMU_TICKS_RCB | PMU_TICKS_TAKEN_BRANCHES))) { if (pmu_uarchs.size() == 1) { FATAL() << "Microarchitecture `" << pmu_uarchs[0].name << "' currently unsupported."; } else { std::string uarch_list; for (auto &pmu_uarch : pmu_uarchs) { uarch_list += "\n "; uarch_list += pmu_uarch.name; } FATAL() << "Microarchitecture combination currently unsupported:" << uarch_list; } } if (running_under_rr()) { perf_attrs.resize(1); init_perf_event_attr(&perf_attrs[0].ticks, PERF_TYPE_HARDWARE, PERF_COUNT_RR); perf_attrs[0].skid_size = RR_SKID_MAX; perf_attrs[0].pmu_flags = pmu_semantics_flags; } else { auto npmus = pmu_uarchs.size(); perf_attrs.resize(npmus); for (size_t i = 0; i < npmus; i++) { auto &perf_attr = perf_attrs[i]; auto &pmu_uarch = pmu_uarchs[i]; if (!(pmu_uarch.flags & (PMU_TICKS_RCB | PMU_TICKS_TAKEN_BRANCHES))) { perf_attr.pmu_flags = 0; // Mark as unsupported continue; } perf_attr.pmu_name = pmu_uarch.pmu_name; perf_attr.skid_size = pmu_uarch.skid_size; perf_attr.pmu_flags = pmu_uarch.flags; perf_attr.bug_flags = (int)pmu_uarch.uarch; init_perf_event_attr(&perf_attr.ticks, pmu_uarch.event_type, pmu_uarch.rcb_cntr_event); if (pmu_uarch.minus_ticks_cntr_event != 0) { init_perf_event_attr(&perf_attr.minus_ticks, pmu_uarch.event_type, pmu_uarch.minus_ticks_cntr_event); } init_perf_event_attr(&perf_attr.cycles, pmu_uarch.cycle_type, pmu_uarch.cycle_event); init_perf_event_attr(&perf_attr.llsc_fail, pmu_uarch.event_type, pmu_uarch.llsc_cntr_event); } } } bool PerfCounters::support_cpu(int cpu) { // We could probably make cpu=-1 mean whether all CPUs are supported // if there's a need for it... DEBUG_ASSERT(cpu >= 0); init_attributes(); auto nattrs = (int)perf_attrs.size(); if (nattrs == 1) { cpu = 0; } if (cpu >= nattrs) { return false; } auto &perf_attr = perf_attrs[cpu]; return perf_attr.pmu_flags & (PMU_TICKS_RCB | PMU_TICKS_TAKEN_BRANCHES); } static void check_pmu(int pmu_index) { auto &perf_attr = perf_attrs[pmu_index]; if (perf_attr.checked) { return; } perf_attr.checked = true; // Under rr we emulate idealized performance counters, so we can assume // none of the bugs apply. if (running_under_rr()) { return; } check_for_bugs(perf_attr); /* * For maintainability, and since it doesn't impact performance when not * needed, we always activate this. If it ever turns out to be a problem, * this can be set to pmu->flags & PMU_BENEFITS_FROM_USELESS_COUNTER, * instead. * * We also disable this counter when running under rr. Even though it's the * same event for the same task as the outer rr, the linux kernel does not * coalesce them and tries to schedule the new one on a general purpose PMC. * On CPUs with only 2 general PMCs (e.g. KNL), we'd run out. */ perf_attr.activate_useless_counter = perf_attr.has_ioc_period_bug; } bool PerfCounters::is_rr_ticks_attr(const perf_event_attr& attr) { return attr.type == PERF_TYPE_HARDWARE && attr.config == PERF_COUNT_RR; } bool PerfCounters::supports_ticks_semantics(TicksSemantics ticks_semantics) { init_attributes(); switch (ticks_semantics) { case TICKS_RETIRED_CONDITIONAL_BRANCHES: return (pmu_semantics_flags & PMU_TICKS_RCB) != 0; case TICKS_TAKEN_BRANCHES: return (pmu_semantics_flags & PMU_TICKS_TAKEN_BRANCHES) != 0; default: FATAL() << "Unknown ticks_semantics " << ticks_semantics; return false; } } TicksSemantics PerfCounters::default_ticks_semantics() { init_attributes(); if (pmu_semantics_flags & PMU_TICKS_TAKEN_BRANCHES) { return TICKS_TAKEN_BRANCHES; } if (pmu_semantics_flags & PMU_TICKS_RCB) { return TICKS_RETIRED_CONDITIONAL_BRANCHES; } FATAL() << "Unsupported architecture"; return TICKS_TAKEN_BRANCHES; } uint32_t PerfCounters::skid_size() { DEBUG_ASSERT(attributes_initialized); DEBUG_ASSERT(perf_attrs[pmu_index].checked); return perf_attrs[pmu_index].skid_size; } PerfCounters::PerfCounters(pid_t tid, int cpu_binding, TicksSemantics ticks_semantics, bool enable) : tid(tid), pmu_index(get_pmu_index(cpu_binding)), ticks_semantics_(ticks_semantics), enable(enable), started(false), counting(false) { if (!supports_ticks_semantics(ticks_semantics)) { FATAL() << "Ticks semantics " << ticks_semantics << " not supported"; } } static void make_counter_async(ScopedFd& fd, int signal) { if (fcntl(fd, F_SETFL, O_ASYNC) || fcntl(fd, F_SETSIG, signal)) { FATAL() << "Failed to make ticks counter ASYNC with sig" << signal_name(signal); } } void PerfCounters::reset(Ticks ticks_period) { if (!enable) { return; } DEBUG_ASSERT(ticks_period >= 0); check_pmu(pmu_index); auto &perf_attr = perf_attrs[pmu_index]; if (ticks_period == 0 && !always_recreate_counters(perf_attr)) { // We can't switch a counter between sampling and non-sampling via // PERF_EVENT_IOC_PERIOD so just turn 0 into a very big number. ticks_period = uint64_t(1) << 60; } if (!started) { LOG(debug) << "Recreating counters with period " << ticks_period; struct perf_event_attr attr = perf_attr.ticks; struct perf_event_attr minus_attr = perf_attr.minus_ticks; attr.sample_period = ticks_period; fd_ticks_interrupt = start_counter(tid, -1, &attr); if (minus_attr.config != 0) { fd_minus_ticks_measure = start_counter(tid, fd_ticks_interrupt, &minus_attr); } if (!perf_attr.only_one_counter && !running_under_rr()) { reset_arch_extras(); } if (perf_attr.activate_useless_counter && !fd_useless_counter.is_open()) { // N.B.: This is deliberately not in the same group as the other counters // since we want to keep it scheduled at all times. fd_useless_counter = start_counter(tid, -1, &perf_attr.cycles); } struct f_owner_ex own; own.type = F_OWNER_TID; own.pid = tid; if (fcntl(fd_ticks_interrupt, F_SETOWN_EX, &own)) { FATAL() << "Failed to SETOWN_EX ticks event fd"; } make_counter_async(fd_ticks_interrupt, PerfCounters::TIME_SLICE_SIGNAL); } else { LOG(debug) << "Resetting counters with period " << ticks_period; if (ioctl(fd_ticks_interrupt, PERF_EVENT_IOC_RESET, 0)) { FATAL() << "ioctl(PERF_EVENT_IOC_RESET) failed"; } if (ioctl(fd_ticks_interrupt, PERF_EVENT_IOC_PERIOD, &ticks_period)) { FATAL() << "ioctl(PERF_EVENT_IOC_PERIOD) failed with period " << ticks_period; } if (ioctl(fd_ticks_interrupt, PERF_EVENT_IOC_ENABLE, 0)) { FATAL() << "ioctl(PERF_EVENT_IOC_ENABLE) failed"; } if (fd_minus_ticks_measure.is_open()) { if (ioctl(fd_minus_ticks_measure, PERF_EVENT_IOC_RESET, 0)) { FATAL() << "ioctl(PERF_EVENT_IOC_RESET) failed"; } if (ioctl(fd_minus_ticks_measure, PERF_EVENT_IOC_ENABLE, 0)) { FATAL() << "ioctl(PERF_EVENT_IOC_ENABLE) failed"; } } if (fd_ticks_measure.is_open()) { if (ioctl(fd_ticks_measure, PERF_EVENT_IOC_RESET, 0)) { FATAL() << "ioctl(PERF_EVENT_IOC_RESET) failed"; } if (ioctl(fd_ticks_measure, PERF_EVENT_IOC_ENABLE, 0)) { FATAL() << "ioctl(PERF_EVENT_IOC_ENABLE) failed"; } } if (fd_ticks_in_transaction.is_open()) { if (ioctl(fd_ticks_in_transaction, PERF_EVENT_IOC_RESET, 0)) { FATAL() << "ioctl(PERF_EVENT_IOC_RESET) failed"; } if (ioctl(fd_ticks_in_transaction, PERF_EVENT_IOC_ENABLE, 0)) { FATAL() << "ioctl(PERF_EVENT_IOC_ENABLE) failed"; } } } started = true; counting = true; counting_period = ticks_period; } void PerfCounters::set_tid(pid_t tid) { stop(); this->tid = tid; } void PerfCounters::stop() { if (!started) { return; } started = false; fd_ticks_interrupt.close(); fd_ticks_measure.close(); fd_minus_ticks_measure.close(); fd_useless_counter.close(); fd_ticks_in_transaction.close(); } void PerfCounters::stop_counting() { if (!counting) { return; } counting = false; if (always_recreate_counters(perf_attrs[pmu_index])) { stop(); } else { ioctl(fd_ticks_interrupt, PERF_EVENT_IOC_DISABLE, 0); if (fd_minus_ticks_measure.is_open()) { ioctl(fd_minus_ticks_measure, PERF_EVENT_IOC_DISABLE, 0); } if (fd_ticks_measure.is_open()) { ioctl(fd_ticks_measure, PERF_EVENT_IOC_DISABLE, 0); } if (fd_ticks_in_transaction.is_open()) { ioctl(fd_ticks_in_transaction, PERF_EVENT_IOC_DISABLE, 0); } } } // Note that on aarch64 this is also used to get the count for `ret` Ticks PerfCounters::ticks_for_unconditional_indirect_branch(Task*) { DEBUG_ASSERT(attributes_initialized); return (pmu_semantics_flags & PMU_TICKS_TAKEN_BRANCHES) ? 1 : 0; } Ticks PerfCounters::ticks_for_unconditional_direct_branch(Task*) { DEBUG_ASSERT(attributes_initialized); return (pmu_semantics_flags & PMU_TICKS_TAKEN_BRANCHES) ? 1 : 0; } Ticks PerfCounters::ticks_for_direct_call(Task*) { DEBUG_ASSERT(attributes_initialized); return (pmu_semantics_flags & PMU_TICKS_TAKEN_BRANCHES) ? 1 : 0; } Ticks PerfCounters::read_ticks(Task* t) { if (!started || !counting) { return 0; } if (fd_ticks_in_transaction.is_open()) { uint64_t transaction_ticks = read_counter(fd_ticks_in_transaction); if (transaction_ticks > 0) { LOG(debug) << transaction_ticks << " IN_TX ticks detected"; if (!Flags::get().force_things) { ASSERT(t, false) << transaction_ticks << " IN_TX ticks detected while HLE not supported due to KVM PMU\n" "virtualization bug. See " "http://marc.info/?l=linux-kernel&m=148582794808419&w=2\n" "Aborting. Retry with -F to override, but it will probably\n" "fail."; } } } if (fd_strex_counter.is_open()) { uint64_t strex_count = read_counter(fd_strex_counter); if (strex_count > 0) { LOG(debug) << strex_count << " strex detected"; if (!Flags::get().force_things) { ASSERT(t, false) << strex_count << " (speculatively) executed strex instructions detected. \n" "On aarch64, rr only supports applications making use of LSE\n" "atomics rather than legacy LL/SC-based atomics.\n" "Aborting. Retry with -F to override, but replaying such\n" "a recording will probably fail."; } } } uint64_t adjusted_counting_period = counting_period + (t->session().is_recording() ? recording_skid_size() : skid_size()); uint64_t interrupt_val = read_counter(fd_ticks_interrupt); if (!fd_ticks_measure.is_open()) { if (fd_minus_ticks_measure.is_open()) { uint64_t minus_measure_val = read_counter(fd_minus_ticks_measure); interrupt_val -= minus_measure_val; } if (t->session().is_recording()) { if (counting_period && interrupt_val > adjusted_counting_period) { LOG(warn) << "Recorded ticks of " << interrupt_val << " overshot requested ticks target by " << interrupt_val - counting_period << " ticks.\n" "On AMD systems this is known to occur occasionally for unknown reasons.\n" "Recording should continue normally. Please report any unexpected rr failures\n" "received after this warning, any conditions that reliably reproduce it,\n" "or sightings of this warning on non-AMD systems."; } } else { ASSERT(t, !counting_period || interrupt_val <= adjusted_counting_period) << "Detected " << interrupt_val << " ticks, expected no more than " << adjusted_counting_period; } return interrupt_val; } uint64_t measure_val = read_counter(fd_ticks_measure); if (measure_val > interrupt_val) { // There is some kind of kernel or hardware bug that means we sometimes // see more events with IN_TXCP set than without. These are clearly // spurious events :-(. For now, work around it by returning the // interrupt_val. That will work if HLE hasn't been used in this interval. // Note that interrupt_val > measure_val is valid behavior (when HLE is // being used). LOG(debug) << "Measured too many ticks; measure=" << measure_val << ", interrupt=" << interrupt_val; ASSERT(t, !counting_period || interrupt_val <= adjusted_counting_period) << "Detected " << interrupt_val << " ticks, expected no more than " << adjusted_counting_period; return interrupt_val; } ASSERT(t, !counting_period || measure_val <= adjusted_counting_period) << "Detected " << measure_val << " ticks, expected no more than " << adjusted_counting_period; return measure_val; } } // namespace rr rr-5.7.0/src/PerfCounters.h000066400000000000000000000110251450675474200155420ustar00rootroot00000000000000/* -*- Mode: C++; tab-width: 8; c-basic-offset: 2; indent-tabs-mode: nil; -*- */ #ifndef RR_PERF_COUNTERS_H_ #define RR_PERF_COUNTERS_H_ #ifndef _GNU_SOURCE #define _GNU_SOURCE 1 #endif #include #include #include #include "ScopedFd.h" #include "Ticks.h" struct perf_event_attr; namespace rr { class Task; enum TicksSemantics { TICKS_RETIRED_CONDITIONAL_BRANCHES, TICKS_TAKEN_BRANCHES, }; /** * A class encapsulating the performance counters we use to monitor * each task during recording and replay. * * Normally we monitor a single kind of event that we use as a proxy * for progress, which we call "ticks". Currently this is the count of retired * conditional branches. We support dispatching a signal when the counter * reaches a particular value. * * When extra_perf_counters_enabled() returns true, we monitor additional * counters of interest. */ class PerfCounters { public: /** * Create performance counters monitoring the given task. * When enable is false, we always report 0 and don't do any interrupts. */ PerfCounters(pid_t tid, int cpu_binding, TicksSemantics ticks_semantics, bool enable); ~PerfCounters() { stop(); } void set_tid(pid_t tid); /** * Reset all counter values to 0 and program the counters to send * TIME_SLICE_SIGNAL when 'ticks_period' tick events have elapsed. (In reality * the hardware triggers its interrupt some time after that. We also allow * the interrupt to fire early.) * This must be called while the task is stopped, and it must be called * before the task is allowed to run again. * `ticks_period` of zero means don't interrupt at all. */ void reset(Ticks ticks_period); template void reset_arch_extras(); /** * Close the perfcounter fds. They will be automatically reopened if/when * reset is called again. */ void stop(); /** * Suspend counting until the next reset. This may or may not actually stop * the performance counters, depending on whether or not this is required * for correctness on this kernel version. */ void stop_counting(); /** * Return the number of ticks we need for an emulated branch. */ static Ticks ticks_for_unconditional_indirect_branch(Task*); /** * Return the number of ticks we need for an emulated direct branch. */ static Ticks ticks_for_unconditional_direct_branch(Task*); /** * Return the number of ticks we need for a direct call. */ static Ticks ticks_for_direct_call(Task*); /** * Whether PMU on core i is supported. */ static bool support_cpu(int cpu); /** * Read the current value of the ticks counter. * `t` is used for debugging purposes. */ Ticks read_ticks(Task* t); /** * Returns what ticks mean for these counters. */ TicksSemantics ticks_semantics() const { return ticks_semantics_; } /** * Return the fd we last used to generate the ticks-counter signal. */ int ticks_interrupt_fd() const { return fd_ticks_interrupt.get(); } /* This choice is fairly arbitrary; linux doesn't use SIGSTKFLT so we * hope that tracees don't either. */ enum { TIME_SLICE_SIGNAL = SIGSTKFLT }; static bool is_rr_ticks_attr(const perf_event_attr& attr); static bool supports_ticks_semantics(TicksSemantics ticks_semantics); static TicksSemantics default_ticks_semantics(); /** * When an interrupt is requested, at most this many ticks may elapse before * the interrupt is delivered. */ uint32_t skid_size(); /** * Use a separate skid_size for recording since we seem to see more skid * in practice during recording, in particular during the * async_signal_syscalls tests */ uint32_t recording_skid_size() { return skid_size() * 5; } private: // Only valid while 'counting' is true Ticks counting_period; pid_t tid; int pmu_index; // We use separate fds for counting ticks and for generating interrupts. The // former ignores ticks in aborted transactions, and does not support // sample_period; the latter does not ignore ticks in aborted transactions, // but does support sample_period. ScopedFd fd_ticks_measure; ScopedFd fd_minus_ticks_measure; ScopedFd fd_ticks_interrupt; ScopedFd fd_useless_counter; // x86(_64) specific counter to support recording HLE ScopedFd fd_ticks_in_transaction; // aarch64 specific counter to detect use of ll/sc instructions ScopedFd fd_strex_counter; TicksSemantics ticks_semantics_; bool enable; bool started; bool counting; }; } // namespace rr #endif /* RR_PERF_COUNTERS_H_ */ rr-5.7.0/src/PerfCounters_aarch64.h000066400000000000000000000263241450675474200170620ustar00rootroot00000000000000/* -*- Mode: C++; tab-width: 8; c-basic-offset: 2; indent-tabs-mode: nil; -*- */ // This file is included from PerfCounters.cc struct CPUID { uint8_t implementer = 0; uint8_t variant = 0; uint16_t part = 0; operator bool() const { return implementer || variant || part; } // bool operator==(const CPUID&) const = default; // c++20 bool operator==(const CPUID &other) const { return implementer == other.implementer && variant == other.variant && part == other.part; } bool operator!=(const CPUID &other) const { return !(*this == other); } }; static std::ostream &operator<<(std::ostream &stm, const CPUID &cpuid) { stm << std::hex << "implementer: 0x" << int(cpuid.implementer) << ", variant: 0x" << int(cpuid.variant) << " part: 0x" << int(cpuid.part); return stm; } /** * Return the detected, known microarchitecture of this CPU, or don't * return; i.e. never return UnknownCpu. */ static CpuMicroarch compute_cpu_microarch(const CPUID &cpuid) { switch (cpuid.implementer) { case 0x41: // ARM switch (cpuid.part) { case 0xd05: return ARMCortexA55; case 0xd0a: return ARMCortexA75; case 0xd0b: return ARMCortexA76; case 0xd0c: return ARMNeoverseN1; case 0xd0d: return ARMCortexA77; case 0xd40: return ARMNeoverseV1; case 0xd41: case 0xd4b: // ARM Cortex A78C return ARMCortexA78; case 0xd44: case 0xd4c: // ARM Cortex X1C return ARMCortexX1; case 0xd49: return ARMNeoverseN2; case 0xd4a: return ARMNeoverseE1; } break; case 0x51: // Qualcomm switch (cpuid.part) { case 0x802: return ARMCortexA75; case 0x803: return ARMCortexA55; case 0x804: return ARMCortexA76; case 0x805: return ARMCortexA55; } break; case 0x61: // Apple switch (cpuid.part) { case 0x22: case 0x24: case 0x28: return AppleM1Icestorm; case 0x23: case 0x25: case 0x29: return AppleM1Firestorm; case 0x32: return AppleM2Blizzard; case 0x33: return AppleM2Avalanche; } break; } CLEAN_FATAL() << "Unknown aarch64 CPU type " << cpuid; return UnknownCpu; // not reached } static void set_cpuid(std::vector &cpuids, unsigned long cpuidx, CPUID cpuid) { if (cpuids.size() <= cpuidx) { cpuids.resize(cpuidx + 1); } if (cpuids[cpuidx]) { CLEAN_FATAL() << "Duplicated CPUID for core " << cpuidx; } cpuids[cpuidx] = cpuid; } /** * The new interface to get ID register values on AArch64 * `/sys/devices/system/cpu/cpu([0-9]+)/regs/identification/midr_el1` * The register value is stored in hex. */ static inline void get_cpuinfo_sysfs(std::vector &res) { const std::string cpu_dir = "/sys/devices/system/cpu/"; const std::regex cpuname_regex("cpu([0-9]+)"); auto dir = opendir(cpu_dir.c_str()); if (!dir) { return; } while (auto entry = readdir(dir)) { std::cmatch match; if (entry->d_type != DT_DIR || !std::regex_match(entry->d_name, match, cpuname_regex)) { continue; } auto cpuidx = std::stoul(match[1].str()); std::string name = cpu_dir + entry->d_name + "/regs/identification/midr_el1"; std::ifstream file(name); if (!file) { CLEAN_FATAL() << "Failed to read midr register from kernel"; } uint64_t val = 0; file >> std::hex >> val; if (!file) { CLEAN_FATAL() << "Failed to read midr register from kernel"; } set_cpuid(res, cpuidx, { uint8_t(val >> 24), uint8_t((val >> 20) & 0xf), uint16_t((val >> 4) & 0xfff) }); } closedir(dir); } /** * A line we care about in /proc/cpuinfo starts with a prefix followed by * `:` and some white space characters, then followed by the value we care about. * Return true if we've found the prefix. Set `flag` to `false` * if the value parsing failed. * * Use an external template since lambda's can't be templated in C++11 */ template static inline bool try_read_procfs_line(const std::string &line, const char *prefix, T &out, bool &flag, F &&reset) { size_t prefix_len = strlen(prefix); if (line.size() < prefix_len) { return false; } if (memcmp(&line[0], prefix, prefix_len) != 0) { return false; } if (flag) { // We've seen this already, // i.e. we didn't see a new line between the processor lines reset(); } const char *p = &line[prefix_len]; // Skip blank and `:`. while (*p == '\t' || *p == ' ' || *p == ':') { p++; } char *str_end; auto num = std::strtoull(p, &str_end, 0); out = (T)num; if (str_end == p) { flag = false; } else if (num > (unsigned long long)std::numeric_limits::max()) { flag = false; } else { flag = true; } return true; } /** * /proc/cpuinfo reader * The cpuinfo file contains blocks of text for each core. * The blocks are separated by empty lines and it should start with a * `processor : ` line followed by lines showing properties of the core. * The three property lines we are looking for starts with * `CPU implementer`, `CPU variant` and `CPU part`. */ static inline void get_cpuinfo_procfs(std::vector &res) { std::ifstream file("/proc/cpuinfo"); CPUID cpuid = {0, 0, 0}; unsigned cpuidx = 0; bool has_cpuidx = false; bool has_impl = false; bool has_part = false; bool has_var = false; auto reset = [&] () { // Few (none) of the detection code care about the variant number // so we'll accept it if we couldn't read it. if (has_cpuidx && has_impl && has_part) { set_cpuid(res, cpuidx, cpuid); } has_cpuidx = false; has_impl = false; has_part = false; has_var = false; cpuid = {0, 0, 0}; }; for (std::string line; std::getline(file, line);) { // Empty lines means that we've finished processing of a block if (line.empty()) { reset(); continue; } // First find the processor line if (try_read_procfs_line(line, "processor", cpuidx, has_cpuidx, reset)) { continue; } // and ignore the line until we found the processor line. if (!has_cpuidx) { continue; } // Try parsing as one of the data lines. // Short circuiting after the first hit. try_read_procfs_line(line, "CPU implementer", cpuid.implementer, has_impl, reset) || try_read_procfs_line(line, "CPU variant", cpuid.variant, has_var, reset) || try_read_procfs_line(line, "CPU part", cpuid.part, has_part, reset); } reset(); } static std::vector compute_cpu_microarchs() { std::vector cpuids; get_cpuinfo_sysfs(cpuids); if (cpuids.empty()) { LOG(warn) << "Unable to read CPU type from sysfs, trying procfs instead."; get_cpuinfo_procfs(cpuids); } if (cpuids.empty()) { CLEAN_FATAL() << "Failed to read midr register from kernel"; } for (auto &cpuid : cpuids) { if (!cpuid) { CLEAN_FATAL() << "Unable to find CPU id for core " << &cpuid - &cpuids[0]; } } auto cpuid0 = cpuids[0]; bool single_uarch = true; for (auto &cpuid : cpuids) { if (cpuid != cpuid0) { single_uarch = false; break; } } if (single_uarch) { return { compute_cpu_microarch(cpuid0) }; } std::vector uarchs; for (auto &cpuid : cpuids) { uarchs.push_back(compute_cpu_microarch(cpuid)); } return uarchs; } static void arch_check_restricted_counter() { if (!Flags::get().suppress_environment_warnings) { fprintf(stderr, "Your CPU supports only one performance counter.\n" "Use of LL/SC instructions will not be detected and will\n" "cause silently corrupt recordings. It is highly recommended\n" "that you alter your configuration to enable additional performance\n" "counters.\n"); } } static bool always_recreate_counters(__attribute__((unused)) const perf_event_attrs &perf_attr) { return false; } static void check_for_arch_bugs(__attribute__((unused)) perf_event_attrs &perf_attr) {} static void post_init_pmu_uarchs(std::vector &pmu_uarchs) { std::map pmu_types; size_t npmus = pmu_uarchs.size(); int pmu_type_failed = 0; auto fallback_pmu = [] (PmuConfig &pmu_uarch) { pmu_uarch.pmu_name = nullptr; if (pmu_uarch.cycle_type != PERF_TYPE_HARDWARE) { pmu_uarch.cycle_type = PERF_TYPE_HARDWARE; pmu_uarch.cycle_event = PERF_COUNT_HW_CPU_CYCLES; } if (pmu_uarch.event_type != PERF_TYPE_RAW) { pmu_uarch.event_type = PERF_TYPE_RAW; } }; auto set_pmu_type = [] (PmuConfig &pmu_uarch, int type) { if (pmu_uarch.cycle_type != PERF_TYPE_HARDWARE) { pmu_uarch.cycle_type = type; } if (pmu_uarch.event_type != PERF_TYPE_RAW) { pmu_uarch.event_type = type; } }; bool has_unknown = false; for (size_t i = 0; i < npmus; i++) { auto &pmu_uarch = pmu_uarchs[i]; if (!(pmu_uarch.flags & (PMU_TICKS_RCB | PMU_TICKS_TAKEN_BRANCHES))) { has_unknown = true; continue; } if (!pmu_uarch.pmu_name) { CLEAN_FATAL() << "Unknown PMU name for core " << i; continue; } std::string pmu_name(pmu_uarch.pmu_name); auto &pmu_type = pmu_types[pmu_name]; if (pmu_type == -1) { fallback_pmu(pmu_uarch); continue; } if (pmu_type) { set_pmu_type(pmu_uarch, pmu_type); continue; } auto filename = "/sys/bus/event_source/devices/" + pmu_name + "/type"; std::ifstream file(filename); int val = 0; bool failed = false; if (!file) { failed = true; LOG(warn) << "Cannot open " << filename; } else { file >> val; if (!file) { failed = true; LOG(warn) << "Cannot read " << filename; } } if (failed) { // Record the failure and fallback to the kernel raw and hardware events instead pmu_type_failed++; fallback_pmu(pmu_uarch); pmu_type = -1; } else { set_pmu_type(pmu_uarch, val); pmu_type = val; } } if (pmu_types.size() == 1 && !has_unknown) { bool single_type = true; auto &pmu_uarch0 = pmu_uarchs[0]; // Apparently the same PMU type doesn't actually mean the same PMU events... for (auto &pmu_uarch: pmu_uarchs) { if (&pmu_uarch == &pmu_uarch0) { // Skip first continue; } if (pmu_uarch.rcb_cntr_event != pmu_uarch0.rcb_cntr_event || pmu_uarch.minus_ticks_cntr_event != pmu_uarch0.minus_ticks_cntr_event || pmu_uarch.llsc_cntr_event != pmu_uarch0.llsc_cntr_event) { single_type = false; break; } } if (single_type) { // Single PMU type pmu_uarchs.resize(1); } } if (pmu_uarchs.size() != 1 && pmu_type_failed) { // If reading PMU type failed, we only allow a single PMU type to be sure // that we get what we want from the kernel events. CLEAN_FATAL() << "Unable to read PMU event types"; } } template <> void PerfCounters::reset_arch_extras() { // LL/SC can't be recorded reliably. Start a counter to detect // any usage, such that we can give an intelligent error message. struct perf_event_attr attr = perf_attrs[pmu_index].llsc_fail; attr.sample_period = 0; fd_strex_counter = start_counter(tid, fd_ticks_interrupt, &attr); } rr-5.7.0/src/PerfCounters_x86.h000066400000000000000000000346731450675474200162650ustar00rootroot00000000000000/* -*- Mode: C++; tab-width: 8; c-basic-offset: 2; indent-tabs-mode: nil; -*- */ // This file is included from PerfCounters.cc static bool has_kvm_in_txcp_bug; static bool has_xen_pmi_bug; static bool supports_txcp; /** * Return the detected, known microarchitecture of this CPU, or don't * return; i.e. never return UnknownCpu. */ static CpuMicroarch compute_cpu_microarch() { auto cpuid_vendor = cpuid(CPUID_GETVENDORSTRING, 0); char vendor[12]; memcpy(&vendor[0], &cpuid_vendor.ebx, 4); memcpy(&vendor[4], &cpuid_vendor.edx, 4); memcpy(&vendor[8], &cpuid_vendor.ecx, 4); if (strncmp(vendor, "GenuineIntel", sizeof(vendor)) && strncmp(vendor, "AuthenticAMD", sizeof(vendor))) { CLEAN_FATAL() << "Unknown CPU vendor '" << vendor << "'"; } auto cpuid_data = cpuid(CPUID_GETFEATURES, 0); unsigned int cpu_type = cpuid_data.eax & 0xF0FF0; unsigned int ext_family = (cpuid_data.eax >> 20) & 0xff; switch (cpu_type) { case 0x006F0: case 0x10660: return IntelMerom; case 0x10670: case 0x106D0: return IntelPenryn; case 0x106A0: case 0x106E0: case 0x206E0: return IntelNehalem; case 0x20650: case 0x206C0: case 0x206F0: return IntelWestmere; case 0x206A0: case 0x206D0: case 0x306e0: return IntelSandyBridge; case 0x306A0: return IntelIvyBridge; case 0x306C0: /* Devil's Canyon */ case 0x306F0: case 0x40650: case 0x40660: return IntelHaswell; case 0x306D0: case 0x40670: case 0x406F0: case 0x50660: return IntelBroadwell; case 0x406e0: case 0x50650: case 0x506e0: return IntelSkylake; case 0x30670: case 0x406c0: case 0x50670: return IntelSilvermont; case 0x506f0: case 0x706a0: case 0x506c0: case 0x906c0: return IntelGoldmont; case 0x706e0: case 0x606a0: return IntelIcelake; case 0x806c0: case 0x806d0: return IntelTigerlake; case 0x806e0: case 0x906e0: return IntelKabylake; case 0xa0650: case 0xa0660: return IntelCometlake; case 0xa0670: return IntelRocketlake; case 0x90670: case 0x906a0: return IntelAlderlake; case 0xb0670: return IntelRaptorlake; case 0x806f0: return IntelSapphireRapid; case 0x30f00: return AMDF15R30; case 0x00f10: // Naples, Whitehaven, Summit Ridge, Snowy Owl (Zen), Milan (Zen 3) (UNTESTED) case 0x10f10: // Raven Ridge, Great Horned Owl (Zen) (UNTESTED) case 0x10f80: // Banded Kestrel (Zen), Picasso (Zen+) (UNTESTED) case 0x20f00: // Dali (Zen) (UNTESTED) case 0x00f80: // Colfax, Pinnacle Ridge (Zen+) (UNTESTED) case 0x30f10: // Rome, Castle Peak (Zen 2) case 0x60f00: // Renoir (Zen 2) (UNTESTED) case 0x70f10: // Matisse (Zen 2) (UNTESTED) case 0x60f80: // Lucienne case 0x90f00: // Van Gogh (Zen 2) if (ext_family == 8 || ext_family == 0xa) { return AMDZen; } else if (ext_family == 3) { return AMDF15R30; } break; case 0x20f10: // Vermeer (Zen 3) case 0x50f00: // Cezanne (Zen 3) case 0x40f40: // Rembrandt (Zen 3+) case 0x60f10: // Raphael (Zen 4) if (ext_family == 0xa) { return AMDZen; } default: break; } if (!strncmp(vendor, "AuthenticAMD", sizeof(vendor))) { CLEAN_FATAL() << "AMD CPU type " << HEX(cpu_type) << " (ext family " << HEX(ext_family) << ") unknown"; } else { CLEAN_FATAL() << "Intel CPU type " << HEX(cpu_type) << " unknown"; } return UnknownCpu; // not reached } static std::vector compute_cpu_microarchs() { return { compute_cpu_microarch() }; } static void check_for_kvm_in_txcp_bug(const perf_event_attrs &perf_attr) { int64_t count = 0; struct perf_event_attr attr = perf_attr.ticks; attr.config |= IN_TXCP; attr.sample_period = 0; bool disabled_txcp; ScopedFd fd = start_counter(0, -1, &attr, &disabled_txcp); if (fd.is_open() && !disabled_txcp) { ioctl(fd, PERF_EVENT_IOC_DISABLE, 0); ioctl(fd, PERF_EVENT_IOC_ENABLE, 0); do_branches(); count = read_counter(fd); } supports_txcp = count > 0; has_kvm_in_txcp_bug = supports_txcp && count < NUM_BRANCHES; LOG(debug) << "supports txcp=" << supports_txcp; LOG(debug) << "has_kvm_in_txcp_bug=" << has_kvm_in_txcp_bug << " count=" << count; } static void check_for_xen_pmi_bug(const perf_event_attrs &perf_attr) { int32_t count = -1; struct perf_event_attr attr = perf_attr.ticks; attr.sample_period = NUM_BRANCHES - 1; ScopedFd fd = start_counter(0, -1, &attr); if (fd.is_open()) { // Do NUM_BRANCHES conditional branches that can't be optimized out. // 'accumulator' is always odd and can't be zero uint32_t accumulator = uint32_t(rand()) * 2 + 1; int raw_fd = fd; asm volatile( #if defined(__x86_64__) "mov %[_SYS_ioctl], %%rax;" "mov %[raw_fd], %%edi;" "xor %%rdx, %%rdx;" "mov %[_PERF_EVENT_IOC_ENABLE], %%rsi;" "syscall;" "cmp $-4095, %%rax;" "jae 2f;" "mov %[_SYS_ioctl], %%rax;" "mov %[_PERF_EVENT_IOC_RESET], %%rsi;" "syscall;" // From this point on all conditional branches count! "cmp $-4095, %%rax;" "jae 2f;" // Reset the counter period to the desired value. "mov %[_SYS_ioctl], %%rax;" "mov %[_PERF_EVENT_IOC_PERIOD], %%rsi;" "mov %[period], %%rdx;" "syscall;" "cmp $-4095, %%rax;" "jae 2f;" "mov %[_iterations], %%rax;" "1: dec %%rax;" // Multiply by 7. "mov %[accumulator], %%edx;" "shl $3, %[accumulator];" "sub %%edx, %[accumulator];" // Add 2. "add $2, %[accumulator];" // Mask off bits. "and $0xffffff, %[accumulator];" // And loop. "test %%rax, %%rax;" "jnz 1b;" "mov %[_PERF_EVENT_IOC_DISABLE], %%rsi;" "mov %[_SYS_ioctl], %%rax;" "xor %%rdx, %%rdx;" // We didn't touch rdi. "syscall;" "cmp $-4095, %%rax;" "jae 2f;" "movl $0, %[count];" "2: nop;" #elif defined(__i386__) "mov %[_SYS_ioctl], %%eax;" "mov %[raw_fd], %%ebx;" "xor %%edx, %%edx;" "mov %[_PERF_EVENT_IOC_ENABLE], %%ecx;" "int $0x80;" "cmp $-4095, %%eax;" "jae 2f;" "mov %[_SYS_ioctl], %%eax;" "mov %[_PERF_EVENT_IOC_RESET], %%ecx;" "int $0x80;" // From this point on all conditional branches count! "cmp $-4095, %%eax;" "jae 2f;" // Reset the counter period to the desired value. "mov %[_SYS_ioctl], %%eax;" "mov %[_PERF_EVENT_IOC_PERIOD], %%ecx;" "mov %[period], %%edx;" "int $0x80;" "cmp $-4095, %%eax;" "jae 2f;" "mov %[_iterations], %%eax;" "1: dec %%eax;" // Multiply by 7. "mov %[accumulator], %%edx;" "shll $3, %[accumulator];" "sub %%edx, %[accumulator];" // Add 2. "addl $2, %[accumulator];" // Mask off bits. "andl $0xffffff, %[accumulator];" // And loop. "test %%eax, %%eax;" "jnz 1b;" "mov %[_PERF_EVENT_IOC_DISABLE], %%ecx;" "mov %[_SYS_ioctl], %%eax;" "xor %%edx, %%edx;" // We didn't touch rdi. "int $0x80;" "cmp $-4095, %%eax;" "jae 2f;" "movl $0, %[count];" "2: nop;" #else #error unknown CPU architecture #endif : [accumulator] "+rm"(accumulator), [count] "=rm"(count) : [_SYS_ioctl] "i"(SYS_ioctl), [_PERF_EVENT_IOC_DISABLE] "i"(PERF_EVENT_IOC_DISABLE), [_PERF_EVENT_IOC_ENABLE] "i"(PERF_EVENT_IOC_ENABLE), [_PERF_EVENT_IOC_PERIOD] "i"(PERF_EVENT_IOC_PERIOD), [_PERF_EVENT_IOC_RESET] "i"(PERF_EVENT_IOC_RESET), // The check for the failure of some of our ioctls is in // the measured region, so account for that when looping. [_iterations] "i"(NUM_BRANCHES - 2), [period] "rm"(&attr.sample_period), [raw_fd] "rm"(raw_fd) : #if defined(__x86_64__) "rax", "rdx", "rdi", "rsi" // `syscall` clobbers rcx and r11. , "rcx", "r11" #elif defined(__i386__) "eax", "ebx", "ecx", "edx" #else #error unknown CPU architecture #endif ); // If things worked above, `count` should have been set to 0. if (count == 0) { count = read_counter(fd); } // Use 'accumulator' so it can't be optimized out. accumulator_sink = accumulator; } has_xen_pmi_bug = count > NUM_BRANCHES || count == -1; if (has_xen_pmi_bug) { LOG(debug) << "has_xen_pmi_bug=" << has_xen_pmi_bug << " count=" << count; if (!Flags::get().force_things) { FATAL() << "Overcount triggered by PMU interrupts detected due to Xen PMU " "virtualization bug.\n" "Aborting. Retry with -F to override, but it will probably\n" "fail."; } } } static void check_for_zen_speclockmap() { // When the SpecLockMap optimization is not disabled, rr will not work // reliably (e.g. it would work fine on a single process with a single // thread, but not more). When the optimization is disabled, the // perf counter for retired lock instructions of type SpecLockMapCommit // (on PMC 0x25) stays at 0. // See more details at https://github.com/rr-debugger/rr/issues/2034. struct perf_event_attr attr; // 0x25 == RETIRED_LOCK_INSTRUCTIONS - Counts the number of retired locked instructions // + 0x08 == SPECLOCKMAPCOMMIT init_perf_event_attr(&attr, PERF_TYPE_RAW, 0x510825); ScopedFd fd = start_counter(0, -1, &attr); if (fd.is_open()) { int atomic = 0; int64_t count = read_counter(fd); // A lock add is known to increase the perf counter we're looking at. asm volatile("lock addl $1, %0": "+m" (atomic)); if (read_counter(fd) == count) { LOG(debug) << "SpecLockMap is disabled"; } else { LOG(debug) << "SpecLockMap is not disabled"; fprintf(stderr, "On Zen CPUs, rr will not work reliably unless you disable the " "hardware SpecLockMap optimization.\nFor instructions on how to " "do this, see https://github.com/rr-debugger/rr/wiki/Zen\n"); } } } static void check_for_freeze_on_smi() { ScopedFd fd = ScopedFd("/sys/devices/cpu/freeze_on_smi", O_RDONLY); if (!fd.is_open()) { LOG(debug) << "/sys/devices/cpu/freeze_on_smi not present"; return; } char freeze_on_smi = 0; ssize_t ret = read(fd, &freeze_on_smi, 1); if (ret != 1) { FATAL() << "Can't read freeze_on_smi"; } if (freeze_on_smi == 0) { LOG(warn) << "Failed to read freeze_on_smi"; } else if (freeze_on_smi == '1') { LOG(debug) << "freeze_on_smi is set"; } else if (freeze_on_smi == '0') { LOG(warn) << "freeze_on_smi is not set"; if (!Flags::get().suppress_environment_warnings) { fprintf(stderr, "Freezing performance counters on SMIs should be enabled for maximum rr\n" "reliability on Comet Lake and later CPUs. To manually enable this setting, run\n" "\techo 1 | sudo tee /sys/devices/cpu/freeze_on_smi\n" "On systemd systems, consider putting\n" "'w /sys/devices/cpu/freeze_on_smi - - - - 1' into /etc/tmpfiles.d/10-rr.conf\n" "to automatically apply this setting on every reboot.\n" "See 'man 5 sysfs', 'man 5 tmpfiles.d'.\n" "If you are seeing this message, the setting has not been enabled.\n"); } } else { LOG(warn) << "Unrecognized freeze_on_smi value " << freeze_on_smi; } } static void check_for_arch_bugs(perf_event_attrs &perf_attr) { DEBUG_ASSERT(rr::perf_attrs.size() == 1); CpuMicroarch uarch = (CpuMicroarch)perf_attr.bug_flags; if (uarch >= FirstIntel && uarch <= LastIntel) { check_for_kvm_in_txcp_bug(perf_attr); check_for_xen_pmi_bug(perf_attr); } if (uarch >= IntelCometlake && uarch <= LastIntel) { check_for_freeze_on_smi(); } if (uarch == AMDZen) { check_for_zen_speclockmap(); } } static void post_init_pmu_uarchs(std::vector &pmu_uarchs) { if (pmu_uarchs.size() != 1) { CLEAN_FATAL() << "rr only support a single PMU on x86, " << pmu_uarchs.size() << " specified."; } } static bool always_recreate_counters(const perf_event_attrs &perf_attr) { // When we have the KVM IN_TXCP bug, reenabling the TXCP counter after // disabling it does not work. DEBUG_ASSERT(perf_attr.checked); return perf_attr.has_ioc_period_bug || has_kvm_in_txcp_bug; } static void arch_check_restricted_counter() { if ((cpuid(CPUID_GETEXTENDEDFEATURES, 0).ebx & HLE_FEATURE_FLAG) && !Flags::get().suppress_environment_warnings) { fprintf(stderr, "Your CPU supports Hardware Lock Elision but you only have one\n" "hardware performance counter available. Record and replay\n" "of code that uses HLE will fail unless you alter your\n" "configuration to make more than one hardware performance counter\n" "available.\n"); } } template void PerfCounters::reset_arch_extras() { DEBUG_ASSERT(rr::perf_attrs.size() == 1); if (supports_txcp) { struct perf_event_attr attr = rr::perf_attrs[0].ticks; if (has_kvm_in_txcp_bug) { // IN_TXCP isn't going to work reliably. Assume that HLE/RTM are not // used, // and check that. attr.sample_period = 0; attr.config |= IN_TX; fd_ticks_in_transaction = start_counter(tid, fd_ticks_interrupt, &attr); } else { // Set up a separate counter for measuring ticks, which does not have // a sample period and does not count events during aborted // transactions. // We have to use two separate counters here because the kernel does // not support setting a sample_period with IN_TXCP, apparently for // reasons related to this Intel note on IA32_PERFEVTSEL2: // ``When IN_TXCP=1 & IN_TX=1 and in sampling, spurious PMI may // occur and transactions may continuously abort near overflow // conditions. Software should favor using IN_TXCP for counting over // sampling. If sampling, software should use large “sample-after“ // value after clearing the counter configured to use IN_TXCP and // also always reset the counter even when no overflow condition // was reported.'' attr.sample_period = 0; attr.config |= IN_TXCP; fd_ticks_measure = start_counter(tid, fd_ticks_interrupt, &attr); } } } rr-5.7.0/src/PidFdMonitor.cc000066400000000000000000000011101450675474200156110ustar00rootroot00000000000000/* -*- Mode: C++; tab-width: 8; c-basic-offset: 2; indent-tabs-mode: nil; -*- */ #include "PidFdMonitor.h" #include "Session.h" namespace rr { /* static */ PidFdMonitor* PidFdMonitor::get(FdTable* fd_table, int fd) { FileMonitor* monitor = fd_table->get_monitor(fd); if (!monitor) { return NULL; } if (monitor->type() == PidFd) { return static_cast(monitor); } return NULL; } FdTable::shr_ptr PidFdMonitor::fd_table(Session& session) const { Task* t = session.find_task(tuid); if (!t) { return NULL; } return t->fd_table(); } } rr-5.7.0/src/PidFdMonitor.h000066400000000000000000000013241450675474200154620ustar00rootroot00000000000000/* -*- Mode: C++; tab-width: 8; c-basic-offset: 2; indent-tabs-mode: nil; -*- */ #ifndef RR_PID_FD_MONITOR_H_ #define RR_PID_FD_MONITOR_H_ #include "FdTable.h" #include "FileMonitor.h" #include "TaskishUid.h" struct perf_event_attr; namespace rr { class Session; /** * A FileMonitor to handle pidfd fds */ class PidFdMonitor : public FileMonitor { public: PidFdMonitor(TaskUid tuid) : tuid(tuid) {} virtual Type type() override { return PidFd; } static PidFdMonitor* get(FdTable* fd_table, int fd); FdTable::shr_ptr fd_table(Session& session) const; private: // 0 if this doesn't object doesn't refer to a tracee's proc-mem. TaskUid tuid; }; } // namespace rr #endif /* RR_PID_FD_MONITOR_H_ */ rr-5.7.0/src/PreserveFileMonitor.h000066400000000000000000000015521450675474200170720ustar00rootroot00000000000000/* -*- Mode: C++; tab-width: 8; c-basic-offset: 2; indent-tabs-mode: nil; -*- */ #ifndef RR_PRESERVE_FILE_MONITOR_H_ #define RR_PRESERVE_FILE_MONITOR_H_ #include "FileMonitor.h" namespace rr { /** * A FileMonitor that does no monitoring of I/O itself, but prevents the file * descriptor from being closed (except via privileged syscalls made by * preload.c) or seen in /proc/pid/fd/. * * The mere existence of this monitor disables syscall buffering for the fd, so * we get syscall traps for close() etc on the fd. Then * rec_prepare_syscall_arch calls allow_close() to check whether closing is * allowed. */ class PreserveFileMonitor : public FileMonitor { public: PreserveFileMonitor() {} virtual Type type() override { return Preserve; } virtual bool is_rr_fd() override { return true; } }; } // namespace rr #endif /* RR_PRESERVE_FILE_MONITOR_H_ */ rr-5.7.0/src/ProcFdDirMonitor.cc000066400000000000000000000070041450675474200164470ustar00rootroot00000000000000/* -*- Mode: C++; tab-width: 8; c-basic-offset: 2; indent-tabs-mode: nil; -*- */ #include "ProcFdDirMonitor.h" #include #include #include "AutoRemoteSyscalls.h" #include "RecordSession.h" #include "RecordTask.h" #include "log.h" using namespace std; namespace rr { ProcFdDirMonitor::ProcFdDirMonitor(Task* t, const string& pathname) { // XXX this makes some assumptions about namespaces... Probably fails // if |t| is not the same pid namespace as rr int tid = parse_tid_from_proc_path(pathname, "/fd"); if (tid > 0) { Task* target = t->session().find_task(tid); if (target) { tuid = target->tuid(); } } } // returns the number of valid dirent structs left in the buffer template static int filter_dirent_structs(RecordTask* t, uint8_t* buf, size_t size) { int bytes = 0; size_t current_offset = 0; while (1) { if (current_offset == size) { break; } D* current_struct = reinterpret_cast(buf + current_offset); auto next_off = current_offset + current_struct->d_reclen; char* fname = (char*)current_struct->d_name; char* end; int fd = strtol(fname, &end, 10); if (!*end && t->fd_table()->is_rr_fd(fd)) { // Skip this entry. memmove(current_struct, buf + next_off, size - next_off); size -= (next_off - current_offset); next_off = current_offset; } else { // Either this is a tracee fd or not an fd at all (e.g. '.') bytes += current_struct->d_reclen; } current_offset = next_off; } return bytes; } template static void filter_dirents_arch(RecordTask* t) { auto regs = t->regs(); remote_ptr ptr(regs.arg2()); size_t len = regs.arg3(); if (regs.syscall_failed() || !regs.syscall_result()) { return; } while (1) { vector buf = t->read_mem(ptr, len); int bytes = regs.syscall_result(); if (regs.original_syscallno() == Arch::getdents64) { bytes = filter_dirent_structs(t, buf.data(), bytes); } else { bytes = filter_dirent_structs(t, buf.data(), bytes); } if (bytes > 0) { t->write_mem(ptr, buf.data(), bytes); regs.set_syscall_result(bytes); t->set_regs(regs); // Explicitly record what the kernel may have touched and we discarded, // because it's userspace modification that will not be caught otherwise. if (len > (size_t)bytes) { t->record_remote(ptr + bytes, len - bytes); } return; } // We filtered out all the entries, so we need to repeat the syscall. { AutoRemoteSyscalls remote(t); remote.syscall(regs.original_syscallno(), regs.orig_arg1(), regs.arg2(), regs.arg3()); // Only copy over the syscall result. In particular, we don't want to // copy the AutoRemoteSyscalls ip(). regs.set_syscall_result(t->regs().syscall_result()); } if (regs.syscall_failed() || regs.syscall_result() == 0) { // Save the new syscall result, and record the buffer we will otherwise // ignore. t->record_remote(ptr, len); t->set_regs(regs); return; } } } static void filter_dirents(RecordTask* t) { RR_ARCH_FUNCTION(filter_dirents_arch, t->arch(), t); } void ProcFdDirMonitor::filter_getdents(RecordTask* t) { ASSERT(t, !t->session().is_replaying()); auto* target = static_cast(t->session().find_task(tuid)); if (!target) { return; } filter_dirents(t); } } // namespace rr rr-5.7.0/src/ProcFdDirMonitor.h000066400000000000000000000013441450675474200163120ustar00rootroot00000000000000/* -*- Mode: C++; tab-width: 8; c-basic-offset: 2; indent-tabs-mode: nil; -*- */ #ifndef RR_PROC_FD_DIR_MONITOR_H_ #define RR_PROC_FD_DIR_MONITOR_H_ #include "FileMonitor.h" #include "TaskishUid.h" namespace rr { /** * A FileMonitor to intercept enumerations of /proc//fd so that entries * for rr's private fds can be hidden when is a tracee. */ class ProcFdDirMonitor : public FileMonitor { public: ProcFdDirMonitor(Task* t, const std::string& pathname); virtual Type type() override { return ProcFd; } virtual void filter_getdents(RecordTask* t) override; private: // 0 if this doesn't object doesn't refer to a tracee's proc-mem. TaskUid tuid; }; } // namespace rr #endif /* RR_PROC_FD_DIR_MONITOR_H_ */ rr-5.7.0/src/ProcMemMonitor.cc000066400000000000000000000035401450675474200161760ustar00rootroot00000000000000/* -*- Mode: C++; tab-width: 8; c-basic-offset: 2; indent-tabs-mode: nil; -*- */ #include "ProcMemMonitor.h" #include #include "AutoRemoteSyscalls.h" #include "RecordSession.h" #include "ReplaySession.h" #include "ReplayTask.h" #include "log.h" using namespace std; namespace rr { ProcMemMonitor::ProcMemMonitor(Task* t, const string& pathname) { // XXX this makes some assumptions about namespaces... Probably fails // if |t| is not the same pid namespace as rr int tid = parse_tid_from_proc_path(pathname, "/mem"); if (tid > 0) { Task* target = t->session().find_task(tid); if (target) { auid = target->vm()->uid(); } } } void ProcMemMonitor::did_write(Task* t, const std::vector& ranges, LazyOffset& lazy_offset) { if (ranges.empty()) { return; } int64_t offset = lazy_offset.retrieve(true); // In prior versions of rr, we recorded this directly into the trace. // If so, there's nothing to do here. if (t->session().is_replaying() && t->session().as_replay()->has_trace_quirk(TraceReader::ExplicitProcMem)) { return; } if (t->session().is_recording()) { // Nothing to do now (though we may have just recorded the offset) return; } auto* target = t->session().find_address_space(auid); if (!target) { return; } ReplayTask* task = static_cast(target->first_running_task()); if (!task) { return; } for (auto& r : ranges) { auto bytes = t->read_mem(r.data.cast(), r.length); remote_ptr target_addr = offset; task->write_mem(target_addr, bytes.data(), r.length); target->maybe_update_breakpoints(task, target_addr, r.length); offset += r.length; } } bool ProcMemMonitor::target_is_vm(AddressSpace *vm) { return auid == vm->uid(); } } // namespace rr rr-5.7.0/src/ProcMemMonitor.h000066400000000000000000000021361450675474200160400ustar00rootroot00000000000000/* -*- Mode: C++; tab-width: 8; c-basic-offset: 2; indent-tabs-mode: nil; -*- */ #ifndef RR_PROC_MEM_MONITOR_H_ #define RR_PROC_MEM_MONITOR_H_ #include "FileMonitor.h" #include "TaskishUid.h" namespace rr { /** * A FileMonitor to track writes to /proc//mem so they can be replayed * when is a replayed tracee. */ class ProcMemMonitor : public FileMonitor { public: ProcMemMonitor(Task* t, const std::string& pathname); virtual Type type() override { return ProcMem; } // We need to PREVENT_SWITCH, since the timing of the write is otherwise // unpredictable from our perspective. virtual Switchable will_write(Task*) override { return PREVENT_SWITCH; } virtual void did_write(Task* t, const std::vector& ranges, LazyOffset& lazy_offset) override; virtual enum syscallbuf_fd_classes get_syscallbuf_class() override { return FD_CLASS_PROC_MEM; } bool target_is_vm(AddressSpace *t); private: // 0 if this doesn't object doesn't refer to a tracee's proc-mem. AddressSpaceUid auid; }; } // namespace rr #endif /* RR_PROC_MEM_MONITOR_H_ */ rr-5.7.0/src/ProcStatMonitor.cc000066400000000000000000000037161450675474200164000ustar00rootroot00000000000000/* -*- Mode: C++; tab-width: 8; c-basic-offset: 2; indent-tabs-mode: nil; -*- */ #include #include #include #include #include "ProcStatMonitor.h" #include "RecordTask.h" #include "RecordSession.h" #include "Scheduler.h" #include "log.h" #include "util.h" using namespace std; namespace rr { // Skip any lines that contain CPUs not in our cpu mask static void filter_proc_stat(string& data, const cpu_set_t& active) { string::iterator pos = data.begin(); while (pos + 4 < data.end()) { const char *cur_data = &*pos; static char cpu_str[] = "cpu"; if (memcmp(cur_data, cpu_str, sizeof(cpu_str)-1) == 0 && isdigit(*(cur_data + 3))) { unsigned long cpu = strtoul((char*)cur_data + 3, NULL, 10); if (!CPU_ISSET(cpu, &active)) { pos = data.erase(pos, ++std::find(pos, data.end(), '\n')); continue; } } pos = ++std::find(pos, data.end(), '\n'); } } ProcStatMonitor::ProcStatMonitor(Task* t, const string&) { if (t->session().is_replaying()) return; // Grab all the data now and buffer it for later access. This matches what the // kernel does (except that it does the buffering on first access) and is // required to give userspace code a consistent view of the file. std::ifstream proc_stat("/proc/stat"); if (!proc_stat.is_open()) { FATAL() << "Failed to process /proc/stat"; } data = string( (std::istreambuf_iterator(proc_stat)), (std::istreambuf_iterator())); const cpu_set_t cpus = static_cast(t)->session().scheduler().pretend_affinity_mask(); filter_proc_stat(data, cpus); } bool ProcStatMonitor::emulate_read( RecordTask* t, const vector& ranges, LazyOffset& lazy_offset, uint64_t* result) { int64_t offset = lazy_offset.retrieve(false); *result = t->write_ranges(ranges, (uint8_t*)data.data() + offset, (offset > (ssize_t)data.size()) ? 0 : data.size() - offset); return true; } } // namespace rr rr-5.7.0/src/ProcStatMonitor.h000066400000000000000000000016351450675474200162400ustar00rootroot00000000000000/* -*- Mode: C++; tab-width: 8; c-basic-offset: 2; indent-tabs-mode: nil; -*- */ #ifndef RR_PROC_STAT_MONITOR_H_ #define RR_PROC_STAT_MONITOR_H_ #include "FileMonitor.h" namespace rr { /** * A FileMonitor to intercept /proc/stat in order to pretend to the * tracee that it only has the CPUs that rr is willing to give it. * This is necessary on top of the SysCpuMonitor, because some versions * of glibc have bugs that cause it to fail to parse the * /sys/devices/system/cpu/online format, causing them to fallback to /proc/stat */ class ProcStatMonitor : public FileMonitor { public: ProcStatMonitor(Task* t, const std::string& pathname); virtual Type type() override { return ProcStat; } bool emulate_read(RecordTask* t, const std::vector& ranges, LazyOffset&, uint64_t* result) override; private: std::string data; }; } // namespace rr #endif /* RR_PROC_STAT_MONITOR_H_ */ rr-5.7.0/src/PsCommand.cc000066400000000000000000000116001450675474200151410ustar00rootroot00000000000000/* -*- Mode: C++; tab-width: 8; c-basic-offset: 2; indent-tabs-mode: nil; -*- */ #include #include "Command.h" #include "TraceStream.h" #include "TraceTaskEvent.h" #include "core.h" #include "main.h" using namespace std; namespace rr { class PsCommand : public Command { public: virtual int run(vector& args) override; protected: PsCommand(const char* name, const char* help) : Command(name, help) {} static PsCommand singleton; }; PsCommand PsCommand::singleton("ps", " rr ps []\n"); static void print_exec_cmd_line(const TraceTaskEvent& event, FILE* out) { bool first = true; for (auto& word : event.cmd_line()) { fprintf(out, "%s%s", first ? "" : " ", word.c_str()); first = false; } fprintf(out, "\n"); } static void update_tid_to_pid_map(map& tid_to_pid, const TraceTaskEvent& e) { if (e.type() == TraceTaskEvent::CLONE) { if (e.clone_flags() & CLONE_THREAD) { // thread clone. Record thread's pid. tid_to_pid[e.tid()] = tid_to_pid[e.parent_tid()]; } else { // Some kind of fork. This task is its own pid. tid_to_pid[e.tid()] = e.tid(); } } else if (e.type() == TraceTaskEvent::EXIT) { tid_to_pid.erase(e.tid()); } } static int count_tids_for_pid(const std::map tid_to_pid, pid_t pid) { int count = 0; for (auto& tp : tid_to_pid) { if (tp.second == pid) { ++count; } } return count; } static ssize_t find_cmd_line(pid_t pid, const vector& events, size_t current_event, const map current_tid_to_pid) { map tid_to_pid = current_tid_to_pid; for (size_t i = current_event; i < events.size(); ++i) { const TraceTaskEvent& e = events[i]; if (e.type() == TraceTaskEvent::EXEC && tid_to_pid[e.tid()] == pid) { return i; } if (e.type() == TraceTaskEvent::EXIT && tid_to_pid[e.tid()] == pid && count_tids_for_pid(tid_to_pid, pid) == 1) { return -1; } update_tid_to_pid_map(tid_to_pid, e); } return -1; } string find_exit_code(pid_t pid, const vector& events, size_t current_event, const map current_tid_to_pid) { map tid_to_pid = current_tid_to_pid; for (size_t i = current_event; i < events.size(); ++i) { const TraceTaskEvent& e = events[i]; if (e.type() == TraceTaskEvent::EXIT && tid_to_pid[e.tid()] == pid && count_tids_for_pid(tid_to_pid, pid) == 1) { WaitStatus status = e.exit_status(); if (status.type() == WaitStatus::EXIT) { return to_string(status.exit_code()); } DEBUG_ASSERT(status.type() == WaitStatus::FATAL_SIGNAL); return to_string(-status.fatal_sig()); } else if (e.type() == TraceTaskEvent::DETACH && tid_to_pid[e.tid()] == pid && count_tids_for_pid(tid_to_pid, pid) == 1) { return string("detach"); } update_tid_to_pid_map(tid_to_pid, e); } return string("none"); } static int ps(const string& trace_dir, FILE* out) { TraceReader trace(trace_dir); fprintf(out, "PID\tPPID\tEXIT\tCMD\n"); vector events; while (true) { TraceTaskEvent r = trace.read_task_event(); if (r.type() == TraceTaskEvent::NONE) { break; } events.push_back(r); } if (events.empty() || events[0].type() != TraceTaskEvent::EXEC) { fprintf(stderr, "Invalid trace\n"); return 1; } map tid_to_pid; pid_t initial_tid = events[0].tid(); tid_to_pid[initial_tid] = initial_tid; fprintf(out, "%d\t--\t%s\t", initial_tid, find_exit_code(initial_tid, events, 0, tid_to_pid).c_str()); print_exec_cmd_line(events[0], out); for (size_t i = 1; i < events.size(); ++i) { auto& e = events[i]; update_tid_to_pid_map(tid_to_pid, e); if (e.type() == TraceTaskEvent::CLONE && !(e.clone_flags() & CLONE_THREAD)) { pid_t pid = tid_to_pid[e.tid()]; fprintf(out, "%d", e.tid()); if (e.own_ns_tid() != e.tid()) { fprintf(out, " (%d)", e.own_ns_tid()); } fprintf(out, "\t%d\t%s\t", tid_to_pid[e.parent_tid()], find_exit_code(pid, events, i, tid_to_pid).c_str()); ssize_t cmd_line_index = find_cmd_line(pid, events, i, tid_to_pid); if (cmd_line_index < 0) { // The main thread exited. All other threads must too, so there // is no more opportunity for e's pid to exec. fprintf(out, "(forked without exec)\n"); } else { print_exec_cmd_line(events[cmd_line_index], out); } } } return 0; } int PsCommand::run(vector& args) { while (parse_global_option(args)) { } string trace_dir; if (!parse_optional_trace_dir(args, &trace_dir)) { print_help(stderr); return 1; } return ps(trace_dir, stdout); } } // namespace rr rr-5.7.0/src/RRPageMonitor.h000066400000000000000000000013451450675474200156170ustar00rootroot00000000000000/* -*- Mode: C++; tab-width: 8; c-basic-offset: 2; indent-tabs-mode: nil; -*- */ #ifndef RR_RR_PAGE_MONITOR_H_ #define RR_RR_PAGE_MONITOR_H_ #include "FileMonitor.h" #include "TraceStream.h" namespace rr { /** * RRPageMonitor gets installed upon any open of the librrpage.so preload library. * If this file gets mmaped, rr will attempt to map it to coincide with the * required fixed location for the rr page. */ class RRPageMonitor : public FileMonitor { public: RRPageMonitor() : FileMonitor() {}; virtual Type type() override { return RRPage; } }; static_assert(TraceReader::SpecialLibRRpage != 0, "Remember to delete this if support for the quirk is ever dropped"); } // namespace rr #endif /* RR_RR_PAGE_MONITOR_H_ */ rr-5.7.0/src/RecordCommand.cc000066400000000000000000000713271450675474200160110ustar00rootroot00000000000000/* -*- Mode: C++; tab-width: 8; c-basic-offset: 2; indent-tabs-mode: nil; -*- */ #include "RecordCommand.h" #include #include #include #include #include #include #include "preload/preload_interface.h" #include "Flags.h" #include "RecordSession.h" #include "StringVectorToCharArray.h" #include "WaitManager.h" #include "WaitStatus.h" #include "core.h" #include "git_revision.h" #include "kernel_metadata.h" #include "log.h" #include "main.h" #include "util.h" using namespace std; namespace rr { RecordCommand RecordCommand::singleton( "record", " rr record [OPTION]... [exe-args]...\n" " -c, --num-cpu-ticks= maximum number of 'CPU ticks' (currently \n" " retired conditional branches) to allow a \n" " task to run before interrupting it\n" " --disable-avx-512 Masks out the CPUID bits for AVX512\n" " This can improve trace portability\n" " --disable-cpuid-features [,]\n" " Mask out CPUID EAX=1 feature bits\n" " : Bitmask of bits to clear from ECX\n" " : Bitmask of bits to clear from EDX\n" " --disable-cpuid-features-ext [,[,]]\n" " Mask out CPUID EAX=7,ECX=0 feature bits\n" " : Bitmask of bits to clear from EBX\n" " : Bitmask of bits to clear from ECX\n" " : Bitmask of bits to clear from EDX\n" " --disable-cpuid-features-xsave \n" " Mask out CPUID EAX=0xD,ECX=1 feature bits\n" " : Bitmask of bits to clear from EAX\n" " -h, --chaos randomize scheduling decisions to try to \n" " reproduce bugs\n" " -n, --no-syscall-buffer disable the syscall buffer preload \n" " library even if it would otherwise be used\n" " --no-file-cloning disable file cloning for mmapped files\n" " --no-read-cloning disable file-block cloning for syscallbuf\n" " reads\n" " --num-cores=N pretend to have N cores (rr will still\n" " only run on a single core). Overrides\n" " random setting from --chaos.\n" " -o, --output-trace-dir

set the output trace directory.\n" " _RR_TRACE_DIR gets ignored.\n" " Directory name is given name, not the\n" " application name.\n" " -p --print-trace-dir= print trace directory followed by a newline\n" " to given file descriptor\n" " --syscall-buffer-sig= the signal used for communication with the\n" " syscall buffer. SIGPWR by default, unused\n" " if --no-syscall-buffer is passed\n" " -t, --continue-through-signal=\n" " Unhandled signals will be ignored\n" " instead of terminating the program. The\n" " signal will still be delivered for user\n" " handlers and debugging.\n" " -u, --cpu-unbound allow tracees to run on any virtual CPU.\n" " Default is to bind to a random CPU. This " "option\n" " can cause replay divergence: use with\n" " caution.\n" " --bind-to-cpu= Bind to a particular CPU\n" " instead of a randomly chosen one.\n" " -v, --env=NAME=VALUE value to add to the environment of the\n" " tracee. There can be any number of these.\n" " -w, --wait Wait for all child processes to exit, not\n" " just the initial process.\n" " --nested= Control behavior when run inside an outer\n" " rr recording. Default: exit with error\n" " --nested=ignore Directly start child process so it's part\n" " of the outer recording\n" " --nested=detach Start a separate recording session.\n" " Must not share memory with the outer.\n" " --nested=release Run the child without recording it.\n" " Must not share memory with the outer.\n" " --setuid-sudo If running under sudo, pretend to be the\n" " user that ran sudo rather than root. This\n" " allows recording setuid/setcap binaries.\n" " --trace-id Sets the trace id to the specified id.\n" " --copy-preload-src Copy preload sources to trace dir\n" " --stap-sdt Enables the use of SystemTap statically-\n" " defined tracepoints\n" " --asan Override heuristics and always enable ASAN\n" " compatibility.\n" " --tsan Override heuristics and always enable TSAN\n" " compatibility.\n"); struct RecordFlags { vector extra_env; /* Max counter value before the scheduler interrupts a tracee. */ Ticks max_ticks; /* Whenever |ignore_sig| is pending for a tracee, decline to * deliver it. */ int ignore_sig; /* Whenever |continue_through_sig| is delivered to a tracee, if there is no * user handler and the signal would terminate the program, just ignore it. */ int continue_through_sig; /* Whether to use syscall buffering optimization during recording. */ RecordSession::SyscallBuffering use_syscall_buffer; /* If nonzero, the desired syscall buffer size. Must be a multiple of the page * size. */ size_t syscall_buffer_size; /* CPUID features to disable */ DisableCPUIDFeatures disable_cpuid_features; int print_trace_dir; string output_trace_dir; /* Whether to use file-cloning optimization during recording. */ bool use_file_cloning; /* Whether to use read-cloning optimization during recording. */ bool use_read_cloning; /* Whether tracee processes in record and replay are allowed * to run on any logical CPU. */ BindCPU bind_cpu; /* True if we should context switch after every rr event */ bool always_switch; /* Whether to enable chaos mode in the scheduler */ bool chaos; /* Controls number of cores reported to recorded process. */ int num_cores; /* True if we should wait for all processes to exit before finishing * recording. */ bool wait_for_all; /* Start child process directly if run under nested rr recording */ NestedBehavior nested; bool scarce_fds; bool setuid_sudo; unique_ptr trace_id; /* Copy preload sources to trace dir */ bool copy_preload_src; /* The signal to use for syscallbuf desched events */ int syscallbuf_desched_sig; /* True if we should load the audit library for SystemTap SDT support. */ bool stap_sdt; /* True if we should unmap the vdso */ bool unmap_vdso; /* True if we should always enable ASAN compatibility. */ bool asan; /* True if we should always enable TSAN compatibility. */ bool tsan; RecordFlags() : max_ticks(Scheduler::DEFAULT_MAX_TICKS), ignore_sig(0), continue_through_sig(0), use_syscall_buffer(RecordSession::ENABLE_SYSCALL_BUF), syscall_buffer_size(0), print_trace_dir(-1), output_trace_dir(""), use_file_cloning(true), use_read_cloning(true), bind_cpu(BIND_CPU), always_switch(false), chaos(false), num_cores(0), wait_for_all(false), nested(NESTED_ERROR), scarce_fds(false), setuid_sudo(false), copy_preload_src(false), syscallbuf_desched_sig(SYSCALLBUF_DEFAULT_DESCHED_SIGNAL), stap_sdt(false), unmap_vdso(false), asan(false), tsan(false) {} }; static void parse_signal_name(ParsedOption& opt) { if (opt.int_value != INT64_MIN) { return; } for (int i = 1; i < _NSIG; i++) { std::string signame = signal_name(i); if (signame == opt.value) { opt.int_value = i; return; } DEBUG_ASSERT(signame[0] == 'S' && signame[1] == 'I' && signame[2] == 'G'); if (signame.substr(3) == opt.value) { opt.int_value = i; return; } } } static vector parse_feature_bits(ParsedOption& opt) { vector ret; const char* p = opt.value.c_str(); while (*p) { char* endptr; unsigned long long v = strtoull(p, &endptr, 0); if (v > UINT32_MAX || (*endptr && *endptr != ',')) { return vector(); } ret.push_back(v); p = *endptr == ',' ? endptr + 1 : endptr; } return ret; } static bool parse_record_arg(vector& args, RecordFlags& flags) { if (parse_global_option(args)) { return true; } static const OptionSpec options[] = { { 0, "no-read-cloning", NO_PARAMETER }, { 1, "no-file-cloning", NO_PARAMETER }, { 2, "syscall-buffer-size", HAS_PARAMETER }, { 3, "nested", HAS_PARAMETER }, { 4, "scarce-fds", NO_PARAMETER }, { 5, "setuid-sudo", NO_PARAMETER }, { 6, "bind-to-cpu", HAS_PARAMETER }, { 7, "disable-cpuid-features", HAS_PARAMETER }, { 8, "disable-cpuid-features-ext", HAS_PARAMETER }, { 9, "disable-cpuid-features-xsave", HAS_PARAMETER }, { 10, "num-cores", HAS_PARAMETER }, { 11, "trace-id", HAS_PARAMETER }, { 12, "copy-preload-src", NO_PARAMETER }, { 13, "syscall-buffer-sig", HAS_PARAMETER }, { 14, "stap-sdt", NO_PARAMETER }, { 15, "unmap-vdso", NO_PARAMETER }, { 16, "disable-avx-512", NO_PARAMETER }, { 17, "asan", NO_PARAMETER }, { 18, "tsan", NO_PARAMETER }, { 'c', "num-cpu-ticks", HAS_PARAMETER }, { 'h', "chaos", NO_PARAMETER }, { 'i', "ignore-signal", HAS_PARAMETER }, { 'n', "no-syscall-buffer", NO_PARAMETER }, { 'p', "print-trace-dir", HAS_PARAMETER }, { 'o', "output-trace-dir", HAS_PARAMETER }, { 's', "always-switch", NO_PARAMETER }, { 't', "continue-through-signal", HAS_PARAMETER }, { 'u', "cpu-unbound", NO_PARAMETER }, { 'v', "env", HAS_PARAMETER }, { 'w', "wait", NO_PARAMETER }}; ParsedOption opt; auto args_copy = args; if (!Command::parse_option(args_copy, options, &opt)) { return false; } switch (opt.short_name) { case 'c': if (!opt.verify_valid_int(1, Scheduler::MAX_MAX_TICKS)) { return false; } flags.max_ticks = opt.int_value; break; case 'h': LOG(info) << "Enabled chaos mode"; flags.chaos = true; break; case 'i': parse_signal_name(opt); if (!opt.verify_valid_int(1, _NSIG - 1)) { return false; } flags.ignore_sig = opt.int_value; break; case 'n': flags.use_syscall_buffer = RecordSession::DISABLE_SYSCALL_BUF; break; case 'p': if (!opt.verify_valid_int(0, INT32_MAX)) { return false; } flags.print_trace_dir = opt.int_value; break; case 'o': flags.output_trace_dir = opt.value; break; case 0: flags.use_read_cloning = false; break; case 1: flags.use_file_cloning = false; break; case 2: if (!opt.verify_valid_int(4, 1024 * 1024)) { return false; } flags.syscall_buffer_size = ceil_page_size(opt.int_value * 1024); break; case 3: if (opt.value == "default" || opt.value == "error") { flags.nested = NESTED_ERROR; } else if (opt.value == "ignore") { flags.nested = NESTED_IGNORE; } else if (opt.value == "detach") { flags.nested = NESTED_DETACH; } else if (opt.value == "release") { flags.nested = NESTED_RELEASE; } else { LOG(warn) << "Unknown nesting behavior `" << opt.value << "`"; flags.nested = NESTED_ERROR; } break; case 4: flags.scarce_fds = true; break; case 5: flags.setuid_sudo = true; break; case 6: if (!opt.verify_valid_int(0, INT32_MAX)) { return false; } flags.bind_cpu = BindCPU(opt.int_value); break; case 7: { vector bits = parse_feature_bits(opt); if (bits.empty() || bits.size() > 2) { return false; } flags.disable_cpuid_features.features_ecx = bits[0]; if (bits.size() > 1) { flags.disable_cpuid_features.features_edx = bits[1]; } break; } case 8: { vector bits = parse_feature_bits(opt); if (bits.empty() || bits.size() > 3) { return false; } flags.disable_cpuid_features.extended_features_ebx = bits[0]; if (bits.size() > 1) { flags.disable_cpuid_features.extended_features_ecx = bits[1]; if (bits.size() > 2) { flags.disable_cpuid_features.extended_features_edx = bits[2]; } } break; } case 9: { vector bits = parse_feature_bits(opt); if (bits.size() != 1) { return false; } flags.disable_cpuid_features.xsave_features_eax = bits[0]; break; } case 10: { if (!opt.verify_valid_int(1, 128)) { return false; } flags.num_cores = opt.int_value; break; } case 11: { const uint8_t SUM_GROUP_LENS[5] = { 8, 12, 16, 20, 32 }; /* Parse UUIDs from string form optionally with hyphens */ uint8_t digit = 0; // This counts only hex digits (i.e. not hyphens) uint8_t group = 0; uint8_t acc = 0; unique_ptr buf(new TraceUuid); auto it = opt.value.begin(); while (it < opt.value.end()) { auto c = *it; if (digit > SUM_GROUP_LENS[4]) { return false; } if (digit % 2 == 0) { // First digit of the byte. if ('0' <= c && c <= '9') { acc = c - '0'; } else if ('a' <= c && c <= 'f') { acc = c - 'a' + 10; } else if ('A' <= c && c <= 'F') { acc = c - 'A' + 10; } else if (c == '-') { // Group delimiter. if (SUM_GROUP_LENS[group] != digit) { return false; } ++group; ++it; continue; } else { return false; } } else { // Second digit of the byte. acc <<= 4; if ('0' <= c && c <= '9') { acc += c - '0'; } else if ('a' <= c && c <= 'f') { acc += c - 'a' + 10; } else if ('A' <= c && c <= 'F') { acc += c - 'A' + 10; } else { return false; } buf->bytes[digit / 2] = acc; } ++digit; ++it; } if (SUM_GROUP_LENS[4] != digit) { return false; } flags.trace_id.swap(buf); break; } case 12: flags.copy_preload_src = true; break; case 13: parse_signal_name(opt); if (!opt.verify_valid_int(1, _NSIG - 1)) { return false; } flags.syscallbuf_desched_sig = opt.int_value; break; case 14: flags.stap_sdt = true; break; case 15: flags.unmap_vdso = true; break; case 16: flags.disable_cpuid_features.extended_features_ebx |= 0xdc230000; flags.disable_cpuid_features.extended_features_ecx |= 0x00002c42; flags.disable_cpuid_features.extended_features_edx |= 0x0000000c; break; case 17: flags.asan = true; break; case 18: flags.tsan = true; break; case 's': flags.always_switch = true; break; case 't': parse_signal_name(opt); if (!opt.verify_valid_int(1, _NSIG - 1)) { return false; } flags.continue_through_sig = opt.int_value; break; case 'u': flags.bind_cpu = UNBOUND_CPU; break; case 'v': flags.extra_env.push_back(opt.value); break; case 'w': flags.wait_for_all = true; break; default: DEBUG_ASSERT(0 && "Unknown option"); } args = args_copy; return true; } static volatile double term_requested; static bool did_print_reassurance = false; static const double TRACEE_SIGTERM_RESPONSE_MAX_TIME = 5; static const double RR_SIGKILL_GRACE_TIME = 5; /** * A terminating signal was received. * * First we forward it to the tracee. Then if the tracee is still * running after TRACEE_SIGTERM_RESPONSE_MAX_TIME, we kill it with SIGKILL. * If a term request remains pending for more than one second, * then assume rr is wedged and abort(). * * Note that this is called in a signal handler and could also * be called off the main thread. */ static void handle_SIGTERM(__attribute__((unused)) int sig) { // Don't use LOG() here because we're in a signal handler. If we do anything // that could allocate, we could deadlock. if (term_requested > 0) { double now = monotonic_now_sec(); if (now - term_requested > 1 + TRACEE_SIGTERM_RESPONSE_MAX_TIME) { if (!did_print_reassurance) { static const char msg[] = "[rr] Tracee failed to exit within 1s after SIGKILL. Recording will forcibly terminate in 4s.\n"; did_print_reassurance = true; write_all(STDERR_FILENO, msg, sizeof(msg) - 1); } else if (now - term_requested > RR_SIGKILL_GRACE_TIME + TRACEE_SIGTERM_RESPONSE_MAX_TIME) { notifying_abort(); } } } else { term_requested = monotonic_now_sec(); } } /** * Something segfaulted - this is probably a bug in rr. Try to at least * give a stacktrace. */ static void handle_SIGSEGV(__attribute__((unused)) int sig) { static const char msg[] = "rr itself crashed (SIGSEGV). This shouldn't happen!\n"; write_all(STDERR_FILENO, msg, sizeof(msg) - 1); notifying_abort(); } static void install_signal_handlers(void) { struct sigaction sa; memset(&sa, 0, sizeof(sa)); sa.sa_handler = handle_SIGTERM; sigaction(SIGTERM, &sa, nullptr); sa.sa_handler = handle_SIGSEGV; sigaction(SIGSEGV, &sa, nullptr); sa.sa_handler = SIG_IGN; sigaction(SIGHUP, &sa, nullptr); sigaction(SIGINT, &sa, nullptr); sigaction(SIGABRT, &sa, nullptr); sigaction(SIGQUIT, &sa, nullptr); } static void setup_session_from_flags(RecordSession& session, const RecordFlags& flags) { session.scheduler().set_max_ticks(flags.max_ticks); session.scheduler().set_always_switch(flags.always_switch); session.set_enable_chaos(flags.chaos); if (flags.num_cores) { // Set the number of cores reported, possibly overriding the chaos mode // setting. session.set_num_cores(flags.num_cores); } session.set_use_read_cloning(flags.use_read_cloning); session.set_use_file_cloning(flags.use_file_cloning); session.set_ignore_sig(flags.ignore_sig); session.set_continue_through_sig(flags.continue_through_sig); session.set_wait_for_all(flags.wait_for_all); if (flags.syscall_buffer_size > 0) { session.set_syscall_buffer_size(flags.syscall_buffer_size); } if (flags.scarce_fds) { for (int i = 0; i < 950; ++i) { open("/dev/null", O_RDONLY); } } } static RecordSession* static_session; // This can be called during debugging to close the trace so it can be used // later. void force_close_record_session() { if (static_session) { static_session->close_trace_writer(TraceWriter::CLOSE_ERROR); } } static void copy_preload_sources_to_trace(const string& trace_dir) { string files_dir = trace_dir + "/files.rr"; mkdir(files_dir.c_str(), 0700); pid_t pid; string dest_path = files_dir + "/librrpreload.zip"; string src_path = resource_path() + "share/rr/src"; char zip[] = "zip"; char r[] = "-r"; char j[] = "-j"; char* argv[] = { zip, r, j, const_cast(dest_path.c_str()), const_cast(src_path.c_str()), NULL }; posix_spawn_file_actions_t actions; posix_spawn_file_actions_init(&actions); posix_spawn_file_actions_addopen(&actions, STDOUT_FILENO, "/dev/null", O_RDONLY, 0); posix_spawn_file_actions_addopen(&actions, STDERR_FILENO, "/dev/null", O_RDONLY, 0); int ret = posix_spawnp(&pid, argv[0], &actions, NULL, argv, environ); if (ret) { FATAL() << "Can't spawn 'zip'"; } posix_spawn_file_actions_destroy(&actions); WaitResult result = WaitManager::wait_exit(WaitOptions(pid)); if (result.code != WAIT_OK) { FATAL() << "Wait failed"; } LOG(info) << "Got zip status " << result.status; } static void save_rr_git_revision(const string& trace_dir) { string files_dir = trace_dir + "/files.rr"; mkdir(files_dir.c_str(), 0700); string dest_path = files_dir + "/rr_git_revision"; ScopedFd fd(dest_path.c_str(), O_CREAT | O_WRONLY, 0600); ssize_t written = write(fd, GIT_REVISION, sizeof(GIT_REVISION) - 1); if (written != sizeof(GIT_REVISION) - 1) { FATAL() << "Can't write GIT_REVISION"; } } static void* repeat_SIGTERM(__attribute__((unused)) void* p) { sleep_time(TRACEE_SIGTERM_RESPONSE_MAX_TIME); /* send another SIGTERM so we wake up and SIGKILL our tracees */ kill(getpid(), SIGTERM); sleep_time(RR_SIGKILL_GRACE_TIME); /* Ok, now we're really wedged, just repeatedly SIGTERM until we're out */ while (1) { kill(getpid(), SIGTERM); sleep_time(0.01); } } static WaitStatus record(const vector& args, const RecordFlags& flags) { LOG(info) << "Start recording..."; auto session = RecordSession::create( args, flags.extra_env, flags.disable_cpuid_features, flags.use_syscall_buffer, flags.syscallbuf_desched_sig, flags.bind_cpu, flags.output_trace_dir, flags.trace_id.get(), flags.stap_sdt, flags.unmap_vdso, flags.asan, flags.tsan); setup_session_from_flags(*session, flags); static_session = session.get(); if (flags.print_trace_dir >= 0) { const string& dir = session->trace_writer().dir(); write_all(flags.print_trace_dir, dir.c_str(), dir.size()); write_all(flags.print_trace_dir, "\n", 1); } if (flags.copy_preload_src) { const string& dir = session->trace_writer().dir(); copy_preload_sources_to_trace(dir); save_rr_git_revision(dir); } // Install signal handlers after creating the session, to ensure they're not // inherited by the tracee. install_signal_handlers(); RecordSession::RecordResult step_result; bool did_forward_SIGTERM = false; bool did_term_detached_tasks = false; pthread_t term_repeater_thread; do { bool done_initial_exec = session->done_initial_exec(); step_result = session->record_step(); // Only create latest-trace symlink if --output-trace-dir is not being used if (!done_initial_exec && session->done_initial_exec() && flags.output_trace_dir.empty()) { session->trace_writer().make_latest_trace(); } if (term_requested) { if (monotonic_now_sec() - term_requested > TRACEE_SIGTERM_RESPONSE_MAX_TIME) { /* time ran out for the tracee to respond to SIGTERM; kill everything */ session->terminate_tracees(); } else if (!did_forward_SIGTERM) { session->forward_SIGTERM(); // Start a thread to send a SIGTERM to ourselves (again) // in case the tracee doesn't respond to SIGTERM. pthread_create(&term_repeater_thread, NULL, repeat_SIGTERM, NULL); did_forward_SIGTERM = true; } /* Forward SIGTERM to detached tasks immediately */ if (!did_term_detached_tasks) { session->term_detached_tasks(); did_term_detached_tasks = true; } } } while (step_result.status == RecordSession::STEP_CONTINUE); session->close_trace_writer(TraceWriter::CLOSE_OK); static_session = nullptr; switch (step_result.status) { case RecordSession::STEP_CONTINUE: // SIGTERM interrupted us. return WaitStatus::for_fatal_sig(SIGTERM); case RecordSession::STEP_EXITED: return step_result.exit_status; case RecordSession::STEP_SPAWN_FAILED: cerr << "\n" << step_result.failure_message << "\n"; return WaitStatus::for_exit_code(EX_UNAVAILABLE); default: DEBUG_ASSERT(0 && "Unknown exit status"); return WaitStatus(); } } static void exec_child(vector& args) { execvp(args[0].c_str(), StringVectorToCharArray(args).get()); // That failed. Try executing the file directly. execv(args[0].c_str(), StringVectorToCharArray(args).get()); switch (errno) { case ENOENT: fprintf(stderr, "execv failed: '%s' (or interpreter) not found (%s)", args[0].c_str(), errno_name(errno).c_str()); break; default: fprintf(stderr, "execv of '%s' failed (%s)", args[0].c_str(), errno_name(errno).c_str()); break; } _exit(1); // Never returns! } static void reset_uid_sudo() { // Let's change our uids now. We do keep capabilities though, since that's // the point of the exercise. The first exec will reset both the keepcaps, // and the capabilities in the child std::string sudo_uid = getenv("SUDO_UID"); std::string sudo_gid = getenv("SUDO_GID"); DEBUG_ASSERT(!sudo_uid.empty() && !sudo_gid.empty()); uid_t tracee_uid = stoi(sudo_uid); gid_t tracee_gid = stoi(sudo_gid); // Setuid will drop effective capabilities. Save them now and set them // back after struct NativeArch::cap_header header = {.version = _LINUX_CAPABILITY_VERSION_3, .pid = 0 }; struct NativeArch::cap_data data[2]; if (syscall(NativeArch::capget, &header, data) != 0) { FATAL() << "FAILED to read capabilities"; } if (prctl(PR_SET_KEEPCAPS, 1, 0, 0, 0)) { FATAL() << "FAILED to set keepcaps"; } if (setgid(tracee_gid) != 0) { FATAL() << "FAILED to setgid to sudo group"; } if (setuid(tracee_uid) != 0) { FATAL() << "FAILED to setuid to sudo user"; } if (syscall(NativeArch::capset, &header, data) != 0) { FATAL() << "FAILED to set capabilities"; } // Just make sure the ambient set is cleared, to avoid polluting the tracee prctl(PR_CAP_AMBIENT, PR_CAP_AMBIENT_CLEAR_ALL, 0, 0, 0); } int RecordCommand::run(vector& args) { RecordFlags flags; while (parse_record_arg(args, flags)) { } if (running_under_rr()) { switch (flags.nested) { case NESTED_IGNORE: exec_child(args); return 1; case NESTED_DETACH: case NESTED_RELEASE: { int ret = syscall(SYS_rrcall_detach_teleport, (uintptr_t)0, (uintptr_t)0, (uintptr_t)0, (uintptr_t)0, (uintptr_t)0, (uintptr_t)0); if (ret < 0) { FATAL() << "Failed to detach from parent rr"; } if (running_under_rr(false)) { FATAL() << "Detaching from parent rr did not work"; } if (flags.nested == NESTED_RELEASE) { exec_child(args); return 1; } // running_under_rr() changed - respect the log specification from RR_LOG // just as if we hadn't been running under rr. apply_log_spec_from_env(); break; } default: fprintf(stderr, "rr: cannot run rr recording under rr. Exiting.\n" "Use `rr record --nested=ignore` to start the child " "process directly.\n"); return 1; } } if (!verify_not_option(args) || args.size() == 0) { print_help(stderr); return 1; } assert_prerequisites(flags.use_syscall_buffer); if (flags.setuid_sudo) { if (geteuid() != 0 || getenv("SUDO_UID") == NULL) { fprintf(stderr, "rr: --setuid-sudo option may only be used under sudo.\n" "Re-run as `sudo -EP --preserve-env=HOME rr record --setuid-sudo` to" "record privileged executables.\n"); return 1; } reset_uid_sudo(); } if (flags.chaos) { // Add up to one page worth of random padding to the environment to induce // a variety of possible stack pointer offsets vector chars; chars.resize(random() % page_size()); memset(chars.data(), '0', chars.size()); chars.push_back(0); string padding = string("RR_CHAOS_PADDING=") + chars.data(); flags.extra_env.push_back(padding); } WaitStatus status = record(args, flags); // Everything should have been cleaned up by now. check_for_leaks(); switch (status.type()) { case WaitStatus::EXIT: return status.exit_code(); case WaitStatus::FATAL_SIGNAL: signal(status.fatal_sig(), SIG_DFL); prctl(PR_SET_DUMPABLE, 0); kill(getpid(), status.fatal_sig()); break; default: FATAL() << "Don't know why we exited: " << status; break; } return 1; } } // namespace rr rr-5.7.0/src/RecordCommand.h000066400000000000000000000010521450675474200156370ustar00rootroot00000000000000/* -*- Mode: C++; tab-width: 8; c-basic-offset: 2; indent-tabs-mode: nil; -*- */ #ifndef RR_RECORD_COMMAND_H_ #define RR_RECORD_COMMAND_H_ #include "Command.h" namespace rr { void force_close_record_session(); class RecordCommand : public Command { public: virtual int run(std::vector& args) override; static RecordCommand* get() { return &singleton; } protected: RecordCommand(const char* name, const char* help) : Command(name, help) {} static RecordCommand singleton; }; } // namespace rr #endif // RR_RECORD_COMMAND_H_ rr-5.7.0/src/RecordSession.cc000066400000000000000000003214421450675474200160520ustar00rootroot00000000000000/* -*- Mode: C++; tab-width: 8; c-basic-offset: 2; indent-tabs-mode: nil; -*- */ #include "RecordSession.h" #include #include #include #include #include #include #include #include #include #include "AutoRemoteSyscalls.h" #include "ElfReader.h" #include "Flags.h" #include "RecordTask.h" #include "TraceeAttentionSet.h" #include "VirtualPerfCounterMonitor.h" #include "WaitManager.h" #include "core.h" #include "ftrace.h" #include "kernel_metadata.h" #include "kernel_supplement.h" #include "log.h" #include "record_signal.h" #include "record_syscall.h" #include "seccomp-bpf.h" namespace rr { // Undef si_addr_lsb since it's an alias for a field name that doesn't exist, // and we need to use the actual field name. #ifdef si_addr_lsb #undef si_addr_lsb #endif using namespace rr; using namespace std; template static remote_ptr mask_low_bit(remote_ptr p) { return p.as_int() & ~uintptr_t(1); } template static void record_robust_futex_change( RecordTask* t, const typename Arch::robust_list_head& head, remote_ptr base) { if (base.is_null()) { return; } remote_ptr futex_void_ptr = base + head.futex_offset; auto futex_ptr = futex_void_ptr.cast(); // We can't just record the current futex value because at this point // in task exit the robust futex handling has not happened yet. So we have // to emulate what the kernel will do! bool ok = true; uint32_t val = t->read_mem(futex_ptr, &ok); if (!ok) { return; } if (pid_t(val & FUTEX_TID_MASK) != t->own_namespace_rec_tid) { return; } val = (val & FUTEX_WAITERS) | FUTEX_OWNER_DIED; // Update memory now so that the kernel doesn't decide to do it later, at // a time that might race with other tracee execution. t->write_mem(futex_ptr, val); t->record_local(futex_ptr, &val); } /** * Any user-space writes performed by robust futex handling are captured here. * They must be emulated during replay; the kernel will not do it for us * during replay because the TID value in each futex is the recorded * TID, not the actual TID of the dying task. */ template static void record_robust_futex_changes_arch(RecordTask* t) { if (t->did_record_robust_futex_changes) { return; } t->did_record_robust_futex_changes = true; auto head_ptr = t->robust_list().cast(); if (head_ptr.is_null()) { return; } ASSERT(t, t->robust_list_len() == sizeof(typename Arch::robust_list_head)); bool ok = true; auto head = t->read_mem(head_ptr, &ok); if (!ok) { return; } record_robust_futex_change(t, head, mask_low_bit(head.list_op_pending.rptr())); for (auto current = mask_low_bit(head.list.next.rptr()); current.as_int() != head_ptr.as_int();) { record_robust_futex_change(t, head, current); auto next = t->read_mem(current, &ok); if (!ok) { return; } current = mask_low_bit(next.next.rptr()); } } static void record_robust_futex_changes(RecordTask* t) { RR_ARCH_FUNCTION(record_robust_futex_changes_arch, t->arch(), t); } static void record_exit_trace_event(RecordTask* t, WaitStatus exit_status) { t->session().trace_writer().write_task_event( TraceTaskEvent::for_exit(t->tid, exit_status)); if (t->thread_group()->tgid == t->tid) { t->thread_group()->exit_status = exit_status; } } static bool looks_like_syscall_entry(RecordTask* t) { bool ok; bool at_syscall = is_at_syscall_instruction(t, t->regs().ip().decrement_by_syscall_insn_length(t->arch()), &ok); // It's possible for the task to have died (e.g. if it got signaled twice // in rapid succession). In that case, try to just go by register contents. if (ok && !at_syscall) { return false; } if (is_x86ish(t->arch())) { // On x86 rax gets set to ENOSYS on entry. Elsewhere this does not happen. // Further, even if we did ask about the syscallno, it might have been // reset by the signal handler. However, on non-x86 platforms we currently // count taken braches, rather than only conditional ones, so it should // be impossible to see the same syscall ip twice without intervening // ticks, so the check that follows these conditions, should be sufficient // there. return t->regs().original_syscallno() >= 0 && t->regs().syscall_result_signed() == -ENOSYS; } else if (t->arch() == aarch64) { // We recorded when we saw the last syscall entry // so just use that to determine if we've already save it in the trace. if (t->ticks_at_last_syscall_entry == t->tick_count() && t->ip_at_last_syscall_entry == t->regs().ip()) { return !t->last_syscall_entry_recorded; } } // Getting a sched event here is better than a spurious syscall event. // Syscall entry does not cause visible register modification, so upon // hitting the sched event the register state would indeed match. return ok; } /** * Return true if we handle a ptrace exit event for task t. When this returns * true, t may have been deleted. */ static bool handle_ptrace_exit_event(RecordTask* t) { if (t->was_reaped()) { if (t->handled_ptrace_exit_event()) { t->did_reach_zombie(); return true; } } else if (t->ptrace_event() != PTRACE_EVENT_EXIT) { return false; } if (t->stable_exit || t->was_reaped()) { LOG(debug) << "stable exit"; } else { if (!t->may_be_blocked()) { // might have been hit by a SIGKILL or a SECCOMP_RET_KILL, in which case // there might be some execution since its last recorded event that we // need to replay. // There's a weird case (in 4.13.5-200.fc26.x86_64 at least) where the // task can enter the kernel but instead of receiving a syscall ptrace // event, we receive a PTRACE_EVENT_EXIT due to a concurrent execve // (and probably a concurrent SIGKILL could do the same). The task state // has been updated to reflect syscall entry. If we record a SCHED in // that state replay of the SCHED will fail. So detect that state and fix // it up. // If we got killed in an untraced syscall on AArch64, // it is difficult/impossible to tell if the value of x0 has been overwritten // with the syscall result/error number // and it's even harder to recover the correct value of x0. // Simply ignore these since we weren't going to record them anyway. if (looks_like_syscall_entry(t) && !t->is_in_untraced_syscall()) { // Either we're in a syscall, or we're immediately after a syscall // and it exited. if (t->ticks_at_last_recorded_syscall_exit == t->tick_count() && t->regs().ip() == t->ip_at_last_recorded_syscall_exit) { LOG(debug) << "Nothing to record after PTRACE_EVENT_EXIT"; // It's the latter case; do nothing. } else { // It's the former case ... probably. Theoretically we could have // re-executed a syscall without any ticks in between, but that seems // highly improbable. // Record the syscall-entry event that we otherwise failed to record. t->canonicalize_regs(t->arch()); auto r = t->regs(); if (t->arch() == aarch64) { // On AArch64, when we get here, there are 3 different cases, // 1. EXIT before we hit the syscall entry stop // 2. EXIT after syscall entry stop but // before the result (X0) is overwritten // 3. EXIT after syscall entry stop and // after the result (X0) is overwritten // (i.e. after the syscall but we got an EXIT // before the syscall exit stop.) // We detect the first case based on `*_at_last_syscall_entry` // set by `apply_syscall_entry_regs` and trust the current values // `x0` and `x8`. // For the second and third cases, we rely on the syscall enter stop // to set the orig_arg1 and original_syscallno correctly. if (t->ticks_at_last_syscall_entry == t->tick_count() && t->ip_at_last_syscall_entry == r.ip()) { // We need to rely on the saved `orig_arg1` since in the third case // the `x0` may already be overwritten. // The assertion here assumes that // `apply_syscall_entry_regs` is called when we enter the syscall // and `x8` still holds the correct syscall number // when we hit the process exit stop. ASSERT(t, r.original_syscallno() == r.syscallno()) << "syscallno not saved by syscall enter handler: " << r; r.set_arg1(r.orig_arg1()); } else { r.set_original_syscallno(r.syscallno()); } } // Assume it's a native-arch syscall. If it isn't, it doesn't matter // all that much since we aren't actually going to do anything with it // in this task. // Avoid calling detect_syscall_arch here since it could fail if the // task is already completely dead and gone. SyscallEvent event(r.original_syscallno(), t->arch()); event.state = ENTERING_SYSCALL; // Don't try to reset the syscallbuf here. The task may be exiting // while in arbitrary syscallbuf code. And of course, because it's // exiting, it doesn't matter if we don't reset the syscallbuf. t->record_event(event, RecordTask::FLUSH_SYSCALLBUF, RecordTask::DONT_RESET_SYSCALLBUF, &r); } } else { // Don't try to reset the syscallbuf here. The task may be exiting // while in arbitrary syscallbuf code. And of course, because it's // exiting, it doesn't matter if we don't reset the syscallbuf. // XXX flushing the syscallbuf may be risky too... auto event = Event::sched(); // When replaying this SCHED, we won't proceed past the `syscall_hook` // entry point. Code inside the syscallbuf may be in a bad state during // replay because we didn't save buffered syscalls. event.Sched().in_syscallbuf_syscall_hook = t->syscallbuf_code_layout.syscallbuf_syscall_hook; t->record_event(event, RecordTask::FLUSH_SYSCALLBUF, RecordTask::DONT_RESET_SYSCALLBUF); } } /* XXX: We could try to find some tasks here to unmap our buffers, but it * seems hardly worth it. * Mark buffers as gone after recording events, in case they need to flush the syscallbuf. */ t->destroy_buffers(nullptr, nullptr); } WaitStatus exit_status; if (t->was_reaped()) { exit_status = t->status(); } else { record_robust_futex_changes(t); unsigned long msg = 0; // If ptrace_if_stopped fails, then the task has been killed by SIGKILL // or equivalent. if (t->ptrace_if_stopped(PTRACE_GETEVENTMSG, nullptr, &msg)) { exit_status = WaitStatus(msg); } else { exit_status = WaitStatus::for_fatal_sig(SIGKILL); } } t->did_handle_ptrace_exit_event(); // If we died because of a coredumping signal, that is a barrier event, and // every task in the address space needs to pass its PTRACE_EXIT_EVENT before // they proceed to (potentially hidden) zombie state, so we can't wait for // that to happen. // Similarly we can't wait for this task to exit if there are other // tasks in its pid namespace that need to exit and this is the last thread // of pid-1 in that namespace, because the kernel must reap them before // letting this task complete its exit. bool may_wait_exit = !t->was_reaped() && !is_coredumping_signal(exit_status.fatal_sig()) && !t->waiting_for_pid_namespace_tasks_to_exit(); record_exit_trace_event(t, exit_status); t->record_exit_event( (!t->was_reaped() && !may_wait_exit) ? RecordTask::WRITE_CHILD_TID : RecordTask::KERNEL_WRITES_CHILD_TID); if (!t->was_reaped()) { t->proceed_to_exit(may_wait_exit); } t->do_ptrace_exit_stop(exit_status); if (may_wait_exit) { t->did_reach_zombie(); } else if (!t->was_reaped()) { t->waiting_for_reap = true; } return true; } static void note_entering_syscall(RecordTask* t) { ASSERT(t, EV_SYSCALL == t->ev().type()); t->ev().Syscall().state = ENTERING_SYSCALL; if (!t->ev().Syscall().is_restart) { /* Save a copy of the arg registers so that we * can use them to detect later restarted * syscalls, if this syscall ends up being * restarted. We have to save the registers * in this rather awkward place because we * need the original registers; the restart * (if it's not a SYS_restart_syscall restart) * will use the original registers. */ t->ev().Syscall().regs = t->regs(); } else { t->ev().Syscall().regs.set_syscallno(t->regs().syscallno()); // We may have intentionally stored the syscall result here. // Now that we're safely past the signal delivery, make the // registers look like they did at the original syscall entry // again. t->ev().Syscall().regs.set_arg1(t->ev().Syscall().regs.orig_arg1()); if (t->arch() == aarch64) { // We probably got here with a PTRACE_SYSCALL. The x7 // value will be wrong due to the aarch64 kernel bug. // Get it from the syscall event. Registers r = t->regs(); r.set_x7(t->ev().Syscall().regs.x7()); t->set_regs(r); } } } #if defined (__x86_64__) static bool is_in_vsyscall(remote_code_ptr ip) { // This is hardcoded by the Linux ABI remote_code_ptr vsyscall_start = 0xffffffffff600000; remote_code_ptr vsyscall_end = 0xffffffffff601000; return vsyscall_start <= ip && ip < vsyscall_end; } #else static bool is_in_vsyscall(remote_code_ptr) { return false; } #endif void RecordSession::handle_seccomp_traced_syscall(RecordTask* t, StepState* step_state, RecordResult* result, bool* did_enter_syscall) { *did_enter_syscall = false; // Special case: If the tracee issues a vsyscall, we will get a seccomp trap, // but no syscall traps whatsoever. In particular, we wouldn't see it during // replay either. We try to monkeypatch the caller on the assumption that known // callers of this (deprecated) interface all follow a common pattern. If we // can't patch the caller, this is a fatal error, since the recording will // otherwise be broken. remote_code_ptr ip = t->regs().ip(); if (is_in_vsyscall(ip)) { remote_ptr sp = t->regs().sp(); // The kernel assumes the return address is on the stack - we do the same remote_ptr ret_addr_addr = sp.cast(); remote_code_ptr ret_addr = t->read_mem(ret_addr_addr); // Skip this syscall. We will attempt to patch it to the vdso entry and // let the tracee retry there. Registers regs = t->regs(); regs.set_original_syscallno(-1); // We can't modify the ip here, the kernel will kill the tracee with // SIGSYS. Instead, we set a breakpoint at the return instruction. t->set_regs(regs); t->vm()->add_breakpoint(ret_addr, BKPT_INTERNAL); while (true) { if (!t->resume_execution(RESUME_SYSCALL, RESUME_WAIT_NO_EXIT, RESUME_NO_TICKS)) { // Tracee exited unexpectedly return; } ASSERT(t, !t->ptrace_event()); if (t->stop_sig() == syscallbuf_desched_sig()) { continue; } if (t->stop_sig() == SIGTRAP && is_kernel_trap(t->get_siginfo().si_code)) { // Hit the breakpoint break; } t->stash_sig(); } t->vm()->remove_breakpoint(ret_addr, BKPT_INTERNAL); ASSERT(t, t->regs().ip().undo_executed_bkpt(t->arch()) == ret_addr); // Now that we're in a sane state, ask the Monkeypatcher to try and patch // that. bool patch_ok = t->vm()->monkeypatcher().try_patch_vsyscall_caller(t, ret_addr); ASSERT(t, patch_ok) << "The tracee issues a vsyscall to " << ip << " but we failed to monkeypatch the caller (return address " << ret_addr << ", sp=" << sp << "). Recording will not succeed. Exiting."; // Reset to the start of the region and continue regs = t->regs(); regs.set_ip(ret_addr.decrement_by_vsyscall_entry_length(t->arch())); t->set_regs(regs); // We patched this syscall, record that auto ev = Event::patch_syscall(); ev.PatchSyscall().patch_vsyscall = true; t->record_event(ev); step_state->continue_type = RecordSession::CONTINUE; return; } int syscallno = t->regs().original_syscallno(); if (syscallno < 0) { // negative syscall numbers after a SECCOMP event // are treated as "skip this syscall". There will be one syscall event // reported instead of two. So fake an enter-syscall event now. // It doesn't really matter what the syscall-arch is. t->canonicalize_regs(t->arch()); if (syscall_seccomp_ordering_ == SECCOMP_BEFORE_PTRACE_SYSCALL) { // If the ptrace entry stop hasn't happened yet, we're at a weird // intermediate state where the behavior of the next PTRACE_SYSCALL // will depend on the register state (i.e. whether we see an entry // trap or proceed right to the exit trap). To make things easier // on the rest of the system, do a fake syscall entry, then reset // the register state. Registers orig_regs = t->regs(); Registers r = orig_regs; r.set_original_syscallno(syscall_number_for_gettid(t->arch())); t->set_regs(r); if (!t->resume_execution(RESUME_SYSCALL, RESUME_WAIT_NO_EXIT, RESUME_NO_TICKS)) { // Tracee died unexpectedly. We did not enter a syscall. // We shouldn't try to resume it now. step_state->continue_type = RecordSession::DONT_CONTINUE; return; } t->set_regs(orig_regs); } // Don't continue yet. At the next iteration of record_step, we'll // enter syscall_state_changed and that will trigger a continue to // the syscall exit. step_state->continue_type = RecordSession::DONT_CONTINUE; if (!process_syscall_entry(t, step_state, result, t->arch())) { return; } *did_enter_syscall = true; return; } if (syscall_seccomp_ordering_ == SECCOMP_BEFORE_PTRACE_SYSCALL) { // The next continue needs to be a PTRACE_SYSCALL to observe // the enter-syscall event. step_state->continue_type = RecordSession::CONTINUE_SYSCALL; } else { ASSERT(t, syscall_seccomp_ordering_ == PTRACE_SYSCALL_BEFORE_SECCOMP); if (t->ev().is_syscall_event() && t->ev().Syscall().state == PROCESSING_SYSCALL) { // We did PTRACE_SYSCALL and already saw a syscall trap. Just ignore this. LOG(debug) << "Ignoring SECCOMP syscall trap since we already got a " "PTRACE_SYSCALL trap"; // The next continue needs to be a PTRACE_SYSCALL to observe // the exit-syscall event. step_state->continue_type = RecordSession::CONTINUE_SYSCALL; // Need to restore last_task_switchable since it will have been // reset to PREVENT_SWITCH last_task_switchable = t->ev().Syscall().switchable; } else { // We've already passed the PTRACE_SYSCALL trap for syscall entry, so // we need to handle that now. SupportedArch syscall_arch = t->detect_syscall_arch(); t->canonicalize_regs(syscall_arch); if (!process_syscall_entry(t, step_state, result, syscall_arch)) { step_state->continue_type = RecordSession::DONT_CONTINUE; return; } *did_enter_syscall = true; } } } static void seccomp_trap_done(RecordTask* t) { t->pop_seccomp_trap(); // It's safe to reset the syscall buffer now. t->delay_syscallbuf_reset_for_seccomp_trap = false; t->write_and_record(REMOTE_PTR_FIELD(t->syscallbuf_child, failed_during_preparation), (uint8_t)1); if (EV_DESCHED == t->ev().type()) { // Desched processing will do the rest for us return; } // Abort the current syscallbuf record, which corresponds to the syscall that // wasn't actually executed due to seccomp. t->write_mem(REMOTE_PTR_FIELD(t->syscallbuf_child, abort_commit), (uint8_t)1); t->record_event(Event::syscallbuf_abort_commit()); // In fact, we need to. Running the syscall exit hook will ensure we // reset the buffer before we try to buffer another a syscall. t->write_mem( REMOTE_PTR_FIELD(t->syscallbuf_child, notify_on_syscall_hook_exit), (uint8_t)1); } static void handle_seccomp_trap(RecordTask* t, RecordSession::StepState* step_state, uint16_t seccomp_data) { // The architecture may be wrong, but that's ok, because an actual syscall // entry did happen, so the registers are already updated according to the // architecture of the system call. t->canonicalize_regs(t->detect_syscall_arch()); t->apply_syscall_entry_regs(); Registers r = t->regs(); int syscallno = r.original_syscallno(); // Cause kernel processing to skip the syscall r.set_original_syscallno(SECCOMP_MAGIC_SKIP_ORIGINAL_SYSCALLNO); t->set_regs(r); bool syscall_entry_already_recorded = false; if (t->ev().is_syscall_event()) { // A syscall event was already pushed, probably because we did a // PTRACE_SYSCALL to enter the syscall during handle_desched_event. Cancel // that event now since the seccomp SIGSYS aborts it completely. ASSERT(t, t->ev().Syscall().number == syscallno); // Make sure any prepared syscall state is discarded and any temporary // effects (e.g. redirecting pointers to scratch) undone. rec_abort_prepared_syscall(t); if (t->ev().type() == EV_SYSCALL_INTERRUPTION) { // The event could be a syscall-interruption if it was pushed by // `handle_desched_event`. In that case, it has not been recorded yet. t->pop_syscall_interruption(); } else { t->pop_syscall(); syscall_entry_already_recorded = true; } } if (t->is_in_untraced_syscall()) { ASSERT(t, !t->delay_syscallbuf_reset_for_seccomp_trap); // Don't reset the syscallbuf immediately after delivering the trap. We have // to wait until this buffered syscall aborts completely before resetting // the buffer. t->delay_syscallbuf_reset_for_seccomp_trap = true; t->push_event(Event::seccomp_trap()); // desched may be armed but we're not going to execute the syscall, let // alone block. If it fires, ignore it. t->write_mem( REMOTE_PTR_FIELD(t->syscallbuf_child, desched_signal_may_be_relevant), (uint8_t)0); } t->push_syscall_event(syscallno); t->ev().Syscall().failed_during_preparation = true; note_entering_syscall(t); if (t->is_in_untraced_syscall() && !syscall_entry_already_recorded) { t->record_current_event(); } // Use NativeArch here because different versions of system headers // have inconsistent field naming. union { NativeArch::siginfo_t native_api; siginfo_t linux_api; } si; memset(&si, 0, sizeof(si)); si.native_api.si_signo = SIGSYS; si.native_api.si_errno = seccomp_data; si.native_api.si_code = SYS_SECCOMP; si.native_api._sifields._sigsys._arch = to_audit_arch(r.arch()); si.native_api._sifields._sigsys._syscall = syscallno; // Documentation says that si_call_addr is the address of the syscall // instruction, but in tests it's immediately after the syscall // instruction. si.native_api._sifields._sigsys._call_addr = t->ip().to_data_ptr(); LOG(debug) << "Synthesizing " << si.linux_api; t->stash_synthetic_sig(si.linux_api, DETERMINISTIC_SIG); // Tests show that the current registers are preserved (on x86, eax/rax // retains the syscall number). r.set_syscallno(syscallno); t->set_regs(r); t->maybe_restore_original_syscall_registers(); if (t->is_in_untraced_syscall()) { // For buffered syscalls, go ahead and record the exit state immediately. t->ev().Syscall().state = EXITING_SYSCALL; t->record_current_event(); t->pop_syscall(); // The tracee is currently in the seccomp ptrace-stop. Advance it to the // syscall-exit stop so that when we try to deliver the SIGSYS via // PTRACE_SINGLESTEP, that doesn't trigger a SIGTRAP stop. // If this fails, that's fine, we're not going to deliver the SIGSYS. t->resume_execution(RESUME_SYSCALL, RESUME_WAIT_NO_EXIT, RESUME_NO_TICKS); } // Don't continue yet. At the next iteration of record_step, if we // recorded the syscall-entry we'll enter syscall_state_changed and // that will trigger a continue to the syscall exit. If we recorded the // syscall-exit we'll go straight into signal delivery. step_state->continue_type = RecordSession::DONT_CONTINUE; } static void handle_seccomp_errno(RecordTask* t, RecordSession::StepState* step_state, uint16_t seccomp_data) { t->canonicalize_regs(t->detect_syscall_arch()); Registers r = t->regs(); int syscallno = r.original_syscallno(); // Cause kernel processing to skip the syscall r.set_original_syscallno(SECCOMP_MAGIC_SKIP_ORIGINAL_SYSCALLNO); t->set_regs(r); if (!t->is_in_untraced_syscall()) { t->push_syscall_event(syscallno); // Note that the syscall failed. prepare_clone() needs to know // this during replay of the syscall entry. t->ev().Syscall().failed_during_preparation = true; note_entering_syscall(t); } r.set_syscall_result(-seccomp_data); t->set_regs(r); // Don't continue yet. At the next iteration of record_step, if we // recorded the syscall-entry we'll enter syscall_state_changed and // that will trigger a continue to the syscall exit. step_state->continue_type = RecordSession::DONT_CONTINUE; } bool RecordSession::handle_ptrace_event(RecordTask** t_ptr, StepState* step_state, RecordResult* result, bool* did_enter_syscall) { *did_enter_syscall = false; RecordTask* t = *t_ptr; if (t->status().group_stop() || t->has_stashed_group_stop()) { t->clear_stashed_group_stop(); last_task_switchable = ALLOW_SWITCH; step_state->continue_type = DONT_CONTINUE; return true; } int event = t->ptrace_event(); if (!event) { return false; } LOG(debug) << " " << t->tid << ": handle_ptrace_event " << ptrace_event_name(event) << ": event " << t->ev(); switch (event) { case PTRACE_EVENT_SECCOMP_OBSOLETE: case PTRACE_EVENT_SECCOMP: { if (syscall_seccomp_ordering_ == PTRACE_SYSCALL_BEFORE_SECCOMP_UNKNOWN) { syscall_seccomp_ordering_ = SECCOMP_BEFORE_PTRACE_SYSCALL; } int seccomp_data = t->get_ptrace_eventmsg_seccomp_data(); // We need to set the orig_* values before we let the process continue to exit // since the handler for the exit event will need them. // See `handle_ptrace_exit_event` above. t->apply_syscall_entry_regs(); if (seccomp_data < 0) { // Process just died. Urk. Just wait for the exit event and pretend this stop never happened! last_task_switchable = ALLOW_SWITCH; step_state->continue_type = DONT_CONTINUE; return true; } int syscallno = t->regs().original_syscallno(); if (seccomp_data == SECCOMP_RET_DATA) { LOG(debug) << " traced syscall entered: " << syscall_name(syscallno, t->arch()); handle_seccomp_traced_syscall(t, step_state, result, did_enter_syscall); } else { // Note that we make no attempt to patch the syscall site when the // user handle does not return ALLOW. Apart from the ERRNO case, // handling these syscalls is necessarily slow anyway. uint32_t real_result; if (!seccomp_filter_rewriter().map_filter_data_to_real_result( t, seccomp_data, &real_result)) { LOG(debug) << "Process terminated unexpectedly during PTRACE_GETEVENTMSG"; step_state->continue_type = RecordSession::CONTINUE; break; } uint16_t real_result_data = real_result & SECCOMP_RET_DATA; switch (real_result & SECCOMP_RET_ACTION) { case SECCOMP_RET_TRAP: LOG(debug) << " seccomp trap for syscall: " << syscall_name(syscallno, t->arch()); handle_seccomp_trap(t, step_state, real_result_data); break; case SECCOMP_RET_ERRNO: LOG(debug) << " seccomp errno " << errno_name(real_result_data) << " for syscall: " << syscall_name(syscallno, t->arch()); handle_seccomp_errno(t, step_state, real_result_data); break; case SECCOMP_RET_KILL: LOG(debug) << " seccomp kill for syscall: " << syscall_name(syscallno, t->arch()); t->tgkill(SIGKILL); // Rely on the SIGKILL to bump us out of the ptrace stop. step_state->continue_type = RecordSession::DONT_CONTINUE; // Now wait for us to actually exit our ptrace-stop and proceed // to the PTRACE_EVENT_EXIT. This avoids the race where our // PTRACE_CONT might kick us out of the PTRACE_EVENT_EXIT before // we can process it. // If this fails because of *another* SIGKILL that's fine. t->wait(); break; default: ASSERT(t, false) << "Seccomp result not handled"; break; } } break; } case PTRACE_EVENT_EXEC: { if (t->thread_group()->task_set().size() > 1) { // All tasks but the task that did the execve should have exited by // now and notified us of their exits. However, it's possible that // while running the thread-group leader, our PTRACE_CONT raced with its // PTRACE_EVENT_EXIT and it exited, and the next event we got is this // PTRACE_EVENT_EXEC after the exec'ing task changed its tid to the // leader's tid. Or maybe there are kernel bugs; on // 4.2.0-42-generic running exec_from_other_thread, we reproducibly // enter PTRACE_EVENT_EXEC for the thread-group leader without seeing // its PTRACE_EVENT_EXIT. // So, record this task's exit and destroy it. // XXX We can't do record_robust_futex_changes here because the address // space has already gone. That would only matter if some of them were // in memory accessible to another process even after exec, i.e. a // shared-memory mapping or two different thread-groups sharing the same // address space. pid_t tid = t->rec_tid; WaitStatus status = t->status(); record_exit_trace_event(t, WaitStatus(0)); t->record_exit_event(); // Don't call RecordTask::destroy() because we don't want to // PTRACE_DETACH. delete t; // Steal the exec'ing task and make it the thread-group leader, and // carry on! t = revive_task_for_exec(tid); scheduler().set_current(t); *t_ptr = t; // Tell t that it is actually stopped, because the stop we got is really // for this task, not the old dead task. if (!t->did_waitpid(status)) { // This is totally untested and almost certainly broken, but if the // task was SIGKILLed out of the EXEC stop then we should probably // just pretend the exec never happened. step_state->continue_type = CONTINUE_SYSCALL; break; } } t->post_exec(); t->session().scheduler().did_exit_execve(t); // Forward ptrace exec notification if (t->emulated_ptracer) { if (t->emulated_ptrace_options & PTRACE_O_TRACEEXEC) { t->emulate_ptrace_stop( WaitStatus::for_ptrace_event(PTRACE_EVENT_EXEC)); } else if (!t->emulated_ptrace_seized) { // Inject legacy SIGTRAP-after-exec t->tgkill(SIGTRAP); } } if (t->emulated_stop_pending) { step_state->continue_type = DONT_CONTINUE; } else { // Skip past the ptrace event. step_state->continue_type = CONTINUE_SYSCALL; } break; } default: ASSERT(t, false) << "Unhandled ptrace event " << ptrace_event_name(event) << "(" << event << ")"; break; } return true; } static void debug_exec_state(const char* msg, RecordTask* t) { LOG(debug) << msg << ": status=" << t->status(); } template static bool is_ptrace_any_singlestep_arch(int command) { return command >= 0 && (command == PTRACE_SINGLESTEP || command == Arch::PTRACE_SYSEMU_SINGLESTEP); } static bool is_ptrace_any_singlestep(SupportedArch arch, int command) { RR_ARCH_FUNCTION(is_ptrace_any_singlestep_arch, arch, command); } void RecordSession::task_continue(const StepState& step_state) { RecordTask* t = scheduler().current(); ASSERT(t, step_state.continue_type != DONT_CONTINUE); // A task in an emulated ptrace-stop must really stay stopped ASSERT(t, !t->emulated_stop_pending); bool may_restart = t->at_may_restart_syscall(); if (may_restart && t->seccomp_bpf_enabled) { LOG(debug) << " PTRACE_SYSCALL to possibly-restarted " << t->ev(); } if (!t->vm()->first_run_event()) { t->vm()->set_first_run_event(trace_writer().time()); } if (!t->thread_group()->first_run_event()) { t->thread_group()->set_first_run_event(trace_writer().time()); } TicksRequest ticks_request; ResumeRequest resume; if (step_state.continue_type == CONTINUE_SYSCALL) { ticks_request = RESUME_NO_TICKS; resume = RESUME_SYSCALL; } else { if (t->has_stashed_sig(PerfCounters::TIME_SLICE_SIGNAL)) { // timeslice signal already stashed, no point in generating another one // (and potentially slow) ticks_request = RESUME_UNLIMITED_TICKS; } else if (scheduler().may_use_unlimited_ticks()) { ticks_request = RESUME_UNLIMITED_TICKS; } else { ticks_request = (TicksRequest)max( 0, scheduler().current_timeslice_end() - t->tick_count()); } // Clear any lingering state, then see if we need to stop earlier for a // tracee-requested pmc interrupt on the virtualized performance counter. t->next_pmc_interrupt_is_for_user = false; if (auto vpmc = VirtualPerfCounterMonitor::interrupting_virtual_pmc_for_task(t)) { ASSERT(t, vpmc->target_tuid() == t->tuid()); Ticks after = max(vpmc->target_ticks() - t->tick_count(), 0); if ((uint64_t)after < (uint64_t)ticks_request) { LOG(debug) << "ticks_request constrained from " << ticks_request << " to " << after << " for vpmc"; ticks_request = (TicksRequest)after; t->next_pmc_interrupt_is_for_user = true; } } // Override requested by the tracee for testing purposes if (t->tick_request_override != (TicksRequest)0) { ASSERT(t, !t->next_pmc_interrupt_is_for_user); ticks_request = t->tick_request_override; t->tick_request_override = (TicksRequest)0; } bool singlestep = is_ptrace_any_singlestep(t->arch(), t->emulated_ptrace_cont_command); if (singlestep && is_at_syscall_instruction(t, t->ip())) { // We're about to singlestep into a syscall instruction. // Act like we're NOT singlestepping since doing a PTRACE_SINGLESTEP would // skip over the system call. LOG(debug) << "Clearing singlestep because we're about to enter a syscall"; singlestep = false; } if (singlestep) { resume = RESUME_SINGLESTEP; } else { /* We won't receive PTRACE_EVENT_SECCOMP events until * the seccomp filter is installed by the * syscall_buffer lib in the child, therefore we must * record in the traditional way (with PTRACE_SYSCALL) * until it is installed. */ /* Kernel commit https://github.com/torvalds/linux/commit/93e35efb8de45393cf61ed07f7b407629bf698ea makes PTRACE_SYSCALL traps be delivered *before* seccomp RET_TRACE traps. Detect and handle this. */ if (!t->seccomp_bpf_enabled || may_restart || syscall_seccomp_ordering_ == PTRACE_SYSCALL_BEFORE_SECCOMP_UNKNOWN) { resume = RESUME_SYSCALL; } else { /* When the seccomp filter is on, instead of capturing * syscalls by using PTRACE_SYSCALL, the filter will * generate the ptrace events. This means we allow the * process to run using PTRACE_CONT, and rely on the * seccomp filter to generate the special * PTRACE_EVENT_SECCOMP event once a syscall happens. * This event is handled here by simply allowing the * process to continue to the actual entry point of * the syscall (using cont_syscall_block()) and then * using the same logic as before. */ resume = RESUME_CONT; } } } t->resume_execution(resume, RESUME_NONBLOCKING, ticks_request); } /** * Step |t| forward until the tracee syscall that disarms the desched * event. If a signal becomes pending in the interim, we stash it. * This allows the caller to deliver the signal after this returns. * (In reality the desched event will already have been disarmed before we * enter this function.) */ static void advance_to_disarm_desched_syscall(RecordTask* t) { int old_sig = 0; LOG(debug) << "desched: DISARMING_DESCHED_EVENT"; /* TODO: send this through main loop. */ /* TODO: mask off signals and avoid this loop. */ do { if (!t->resume_execution(RESUME_SYSCALL, RESUME_WAIT_NO_EXIT, RESUME_UNLIMITED_TICKS)) { return; } if (t->status().is_syscall()) { t->apply_syscall_entry_regs(); } /* We can safely ignore TIME_SLICE_SIGNAL while trying to * reach the disarm-desched ioctl: once we reach it, * the desched'd syscall will be "done" and the tracee * will be at a preemption point. In fact, we *want* * to ignore this signal. Syscalls like read() can * have large buffers passed to them, and we have to * copy-out the buffered out data to the user's * buffer. This happens in the interval where we're * reaching the disarm-desched ioctl, so that code is * susceptible to receiving TIME_SLICE_SIGNAL. */ int sig = t->stop_sig(); if (PerfCounters::TIME_SLICE_SIGNAL == sig) { continue; } // We should not receive SYSCALLBUF_DESCHED_SIGNAL since it should already // have been disarmed. However, we observe these being received here when // we arm the desched signal before we restart a blocking syscall, which // completes successfully, then we disarm, then we see a desched signal // here. if (t->session().syscallbuf_desched_sig() == sig) { continue; } if (sig && sig == old_sig) { LOG(debug) << " coalescing pending " << signal_name(sig); continue; } if (sig) { LOG(debug) << " " << signal_name(sig) << " now pending"; t->stash_sig(); } } while (!t->is_disarm_desched_event_syscall()); // Exit the syscall. If this fails, that's fine, we can ignore it. t->resume_execution(RESUME_SYSCALL, RESUME_WAIT_NO_EXIT, RESUME_NO_TICKS); } /** * |t| is at a desched event and some relevant aspect of its state * changed. (For now, changes except the original desched'd syscall * being restarted.) */ void RecordSession::desched_state_changed(RecordTask* t) { LOG(debug) << "desched: IN_SYSCALL"; /* We need to ensure that the syscallbuf code doesn't * try to commit the current record; we've already * recorded that syscall. The following event sets * the abort-commit bit. */ t->write_mem(REMOTE_PTR_FIELD(t->syscallbuf_child, abort_commit), (uint8_t)1); t->record_event(Event::syscallbuf_abort_commit()); advance_to_disarm_desched_syscall(t); t->pop_desched(); /* The tracee has just finished sanity-checking the * aborted record, and won't touch the syscallbuf * during this (aborted) transaction again. So now * is a good time for us to reset the record counter. */ t->delay_syscallbuf_reset_for_desched = false; // Run the syscallbuf exit hook. This ensures we'll be able to reset // the syscallbuf before trying to buffer another syscall. t->write_mem( REMOTE_PTR_FIELD(t->syscallbuf_child, notify_on_syscall_hook_exit), (uint8_t)1); } static void syscall_not_restarted(RecordTask* t) { LOG(debug) << " " << t->tid << ": popping abandoned interrupted " << t->ev() << "; pending events:"; if (IS_LOGGING(debug)) { t->log_pending_events(); } t->pop_syscall_interruption(); } /** * "Thaw" a frozen interrupted syscall if |t| is restarting it. * Return true if a syscall is indeed restarted. * * A postcondition of this function is that |t->ev| is no longer a * syscall interruption, whether or whether not a syscall was * restarted. */ static bool maybe_restart_syscall(RecordTask* t) { if (is_restart_syscall_syscall(t->regs().original_syscallno(), t->arch())) { LOG(debug) << " " << t->tid << ": SYS_restart_syscall'ing " << t->ev(); } if (t->is_syscall_restart()) { t->ev().transform(EV_SYSCALL); Registers regs = t->regs(); regs.set_original_syscallno(t->ev().Syscall().regs.original_syscallno()); t->set_regs(regs); t->canonicalize_regs(t->arch()); return true; } if (EV_SYSCALL_INTERRUPTION == t->ev().type()) { syscall_not_restarted(t); } return false; } /** * After a SYS_sigreturn "exit" of task |t| with return value |ret|, * check to see if there's an interrupted syscall that /won't/ be * restarted, and if so, pop it off the pending event stack. */ static void maybe_discard_syscall_interruption(RecordTask* t, intptr_t ret) { int syscallno; if (EV_SYSCALL_INTERRUPTION != t->ev().type()) { /* We currently don't track syscalls interrupted with * ERESTARTSYS or ERESTARTNOHAND, so it's possible for * a sigreturn not to affect the event stack. */ LOG(debug) << " (no interrupted syscall to retire)"; return; } syscallno = t->ev().Syscall().number; if (0 > ret) { syscall_not_restarted(t); } else if (t->arch() == x86 || t->arch() == x86_64) { // On x86, we would have expected this to get restored to the syscallno. // Since the syscallno is in a different register on other platforms, this // assert does not apply. ASSERT(t, syscallno == ret) << "Interrupted call was " << t->ev().Syscall().syscall_name() << " and sigreturn claims to be restarting " << syscall_name(ret, t->ev().Syscall().arch()); } } /** * Copy the registers used for syscall arguments (not including * syscall number) from |from| to |to|. */ static void copy_syscall_arg_regs(Registers* to, const Registers& from) { to->set_orig_arg1(from.arg1()); to->set_arg2(from.arg2()); to->set_arg3(from.arg3()); to->set_arg4(from.arg4()); to->set_arg5(from.arg5()); to->set_arg6(from.arg6()); } static void maybe_trigger_emulated_ptrace_syscall_exit_stop(RecordTask* t) { if (t->emulated_ptrace_cont_command == PTRACE_SYSCALL) { t->emulate_ptrace_stop(WaitStatus::for_syscall(t), SYSCALL_EXIT_STOP); } else if (is_ptrace_any_singlestep(t->arch(), t->emulated_ptrace_cont_command)) { // Deliver the singlestep trap now that we've finished executing the // syscall. t->emulate_ptrace_stop(WaitStatus::for_stop_sig(SIGTRAP), SIGNAL_DELIVERY_STOP, nullptr, SI_KERNEL); } } static void save_interrupted_syscall_ret_in_syscallbuf(RecordTask* t, intptr_t retval) { // Record storing the return value in the syscallbuf record, where // we expect to find it during replay. auto child_rec = t->next_syscallbuf_record(); // Also store it there now so that our memory checksums are correct. // It will be overwritten by the tracee's syscallbuf code. t->write_and_record(REMOTE_PTR_FIELD(child_rec, ret), static_cast(retval)); } static bool is_in_privileged_syscall(RecordTask* t) { auto type = AddressSpace::rr_page_syscall_from_exit_point(t->arch(), t->ip()); return type && type->privileged == AddressSpace::PRIVILEGED; } void RecordSession::syscall_state_changed(RecordTask* t, StepState* step_state) { switch (t->ev().Syscall().state) { case ENTERING_SYSCALL_PTRACE: debug_exec_state("EXEC_SYSCALL_ENTRY_PTRACE", t); step_state->continue_type = DONT_CONTINUE; last_task_switchable = ALLOW_SWITCH; if (t->emulated_stop_type != NOT_STOPPED) { // Don't go any further. return; } if (t->ev().Syscall().in_sysemu) { // We'll have recorded just the ENTERING_SYSCALL_PTRACE event and // nothing else. Resume with an invalid syscall to ensure no real // syscall runs. t->pop_syscall(); Registers r = t->regs(); Registers orig_regs = r; r.set_original_syscallno(-1); t->set_regs(r); // If this fails because of premature exit, don't mess with the // task anymore. if (t->resume_execution(RESUME_SYSCALL, RESUME_WAIT_NO_EXIT, RESUME_NO_TICKS)) { ASSERT(t, t->ip() == r.ip()); t->set_regs(orig_regs); maybe_trigger_emulated_ptrace_syscall_exit_stop(t); } return; } last_task_switchable = PREVENT_SWITCH; t->ev().Syscall().regs = t->regs(); t->ev().Syscall().state = ENTERING_SYSCALL; // The syscallno may have been changed by the ptracer t->ev().Syscall().number = t->regs().original_syscallno(); return; case ENTERING_SYSCALL: { debug_exec_state("EXEC_SYSCALL_ENTRY", t); ASSERT(t, !t->emulated_stop_pending); // Flush syscallbuf now so that anything recorded by // rec_prepare_syscall is associated with the syscall event t->maybe_flush_syscallbuf(); last_task_switchable = t->ev().Syscall().switchable = rec_prepare_syscall(t); t->record_event(t->ev(), RecordTask::DONT_FLUSH_SYSCALLBUF, RecordTask::ALLOW_RESET_SYSCALLBUF, &t->ev().Syscall().regs); debug_exec_state("after cont", t); t->ev().Syscall().state = PROCESSING_SYSCALL; if (t->emulated_stop_pending) { step_state->continue_type = DONT_CONTINUE; } else { // Resume the syscall execution in the kernel context. step_state->continue_type = CONTINUE_SYSCALL; } if (t->session().done_initial_exec() && Flags::get().check_cached_mmaps) { t->vm()->verify(t); } if (t->desched_rec() && t->is_in_untraced_syscall() && t->has_stashed_sig()) { // We have a signal to deliver but we're about to (re?)enter an untraced // syscall that may block and the desched event has been disarmed. // Rearm the desched event so if the syscall blocks, it will be // interrupted and we'll have a chance to deliver our signal. LOG(debug) << "Rearming desched event so we'll get a chance to deliver " "stashed signal"; arm_desched_event(t); } if (t->detached_proxy) { // We detached. Record that. t->record_event(Event::exit(), RecordTask::DONT_FLUSH_SYSCALLBUF, RecordTask::DONT_RESET_SYSCALLBUF); t->session().trace_writer().write_task_event( TraceTaskEvent::for_detach(t->tid)); step_state->continue_type = DONT_CONTINUE; } return; } case PROCESSING_SYSCALL: debug_exec_state("EXEC_IN_SYSCALL", t); // Linux kicks tasks out of syscalls before delivering // signals. ASSERT(t, !t->stop_sig()) << "Signal " << signal_name(t->stop_sig()) << " pending while in syscall???"; t->ev().Syscall().state = EXITING_SYSCALL; step_state->continue_type = DONT_CONTINUE; return; case EXITING_SYSCALL: { debug_exec_state("EXEC_SYSCALL_DONE", t); DEBUG_ASSERT(t->stop_sig() == 0); SupportedArch syscall_arch = t->ev().Syscall().arch(); int syscallno = t->ev().Syscall().number; intptr_t retval = t->regs().syscall_result_signed(); if (t->desched_rec()) { // If we enabled the desched event above, disable it. disarm_desched_event(t); // Write syscall return value to the syscallbuf now. This lets replay // get the correct value even though we're aborting the commit. This // value affects register values in the preload code (which must be // correct since register values may escape). save_interrupted_syscall_ret_in_syscallbuf(t, retval); } // sigreturn is a special snowflake, because it // doesn't actually return. Instead, it undoes the // setup for signal delivery, which possibly includes // preparing the tracee for a restart-syscall. So we // take this opportunity to possibly pop an // interrupted-syscall event. if (is_sigreturn(syscallno, syscall_arch)) { if (is_x86ish(t->arch())) { ASSERT(t, t->regs().original_syscallno() == -1); } rec_did_sigreturn(t); t->record_current_event(); t->pop_syscall(); // We've finished processing this signal now. t->pop_signal_handler(); t->invalidate_sigmask(); maybe_discard_syscall_interruption(t, retval); if (EV_SECCOMP_TRAP == t->ev().type()) { LOG(debug) << " exiting seccomp trap"; save_interrupted_syscall_ret_in_syscallbuf(t, retval); seccomp_trap_done(t); } if (EV_DESCHED == t->ev().type()) { LOG(debug) << " exiting desched critical section"; // The signal handler could have modified the apparent syscall // return handler. Save that value into the syscall buf again so // replay will pick it up later. save_interrupted_syscall_ret_in_syscallbuf(t, retval); desched_state_changed(t); } } else { LOG(debug) << " original_syscallno:" << t->regs().original_syscallno() << " (" << syscall_name(syscallno, syscall_arch) << "); return val:" << HEX(t->regs().syscall_result()); /* a syscall_restart ending is equivalent to the * restarted syscall ending */ if (t->ev().Syscall().is_restart) { LOG(debug) << " exiting restarted " << syscall_name(syscallno, syscall_arch); } /* TODO: is there any reason a restart_syscall can't * be interrupted by a signal and itself restarted? */ bool may_restart = !is_restart_syscall_syscall(syscallno, t->arch()) // SYS_pause is either interrupted or // never returns. It doesn't restart. && !is_pause_syscall(syscallno, t->arch()) && t->regs().syscall_may_restart(); /* no need to process the syscall in case its * restarted this will be done in the exit from the * restart_syscall */ if (!may_restart) { rec_process_syscall(t); if (t->session().done_initial_exec() && Flags::get().check_cached_mmaps) { t->vm()->verify(t); } } else { LOG(debug) << " may restart " << syscall_name(syscallno, syscall_arch) << " (from retval " << HEX(retval) << ")"; rec_prepare_restart_syscall(t); /* If we may restart this syscall, we've most * likely fudged some of the argument * registers with scratch pointers. We don't * want to record those fudged registers, * because scratch doesn't exist in replay. * So cover our tracks here. */ Registers r = t->regs(); copy_syscall_arg_regs(&r, t->ev().Syscall().regs); t->set_regs(r); // We need to track what the return value was on architectures // where the kernel replaces the return value by the new arg1 // on restart. t->ev().Syscall().regs = r; } t->record_current_event(); /* If we're not going to restart this syscall, we're * done with it. But if we are, "freeze" it on the * event stack until the execution point where it * might be restarted. */ if (!may_restart) { t->pop_syscall(); if (EV_DESCHED == t->ev().type()) { LOG(debug) << " exiting desched critical section"; desched_state_changed(t); } } else { t->ev().transform(EV_SYSCALL_INTERRUPTION); t->ev().Syscall().is_restart = true; } t->canonicalize_regs(syscall_arch); if (!may_restart) { if (t->retry_syscall_patching) { LOG(debug) << "Retrying deferred syscall patching"; t->retry_syscall_patching = false; if (t->vm()->monkeypatcher().try_patch_syscall(t, false)) { // Syscall was patched. Emit event and continue execution. auto ev = Event::patch_syscall(); ev.PatchSyscall().patch_after_syscall = true; t->record_event(ev); } } } } last_task_switchable = ALLOW_SWITCH; step_state->continue_type = DONT_CONTINUE; if (!is_in_privileged_syscall(t)) { maybe_trigger_emulated_ptrace_syscall_exit_stop(t); } return; } default: FATAL() << "Unknown exec state " << t->ev().Syscall().state; } } /** If the perf counters seem to be working return, otherwise don't return. */ void RecordSession::check_initial_task_syscalls(RecordTask* t, RecordResult* step_result) { if (done_initial_exec()) { return; } if (is_write_syscall(t->ev().Syscall().number, t->arch()) && t->regs().arg1_signed() == -1) { Ticks ticks = t->tick_count(); LOG(debug) << "ticks on entry to dummy write: " << ticks; if (ticks == 0) { step_result->status = RecordSession::STEP_SPAWN_FAILED; step_result->failure_message = string( "rr internal recorder error: Performance counter doesn't seem to " "be working. Are you perhaps running rr in a VM but didn't enable " "perf-counter virtualization?"); } } if (is_exit_group_syscall(t->ev().Syscall().number, t->arch())) { step_result->status = RecordSession::STEP_SPAWN_FAILED; step_result->failure_message = read_spawned_task_error(); } } RecordTask* RecordSession::revive_task_for_exec(pid_t rec_tid) { unsigned long msg = 0; int ret = ptrace(_ptrace_request(PTRACE_GETEVENTMSG), rec_tid, nullptr, &msg); if (ret < 0) { FATAL() << "Can't get old tid for execve (leader=" << rec_tid << ")"; } RecordTask* t = find_task(msg); if (!t) { FATAL() << "Can't find old task for execve"; } ASSERT(t, rec_tid == t->tgid()); pid_t own_namespace_tid = t->thread_group()->tgid_own_namespace; LOG(debug) << "Changing task tid from " << t->tid << " to " << rec_tid; // Pretend the old task cloned a new task with the right tid, and then exited trace_writer().write_task_event(TraceTaskEvent::for_clone( rec_tid, t->tid, own_namespace_tid, CLONE_VM | CLONE_FS | CLONE_FILES | CLONE_SIGHAND | CLONE_THREAD | CLONE_SYSVSEM)); trace_writer().write_task_event( TraceTaskEvent::for_exit(t->tid, WaitStatus::for_exit_code(0))); // Account for tid change task_map.erase(t->tid); task_map.insert(make_pair(rec_tid, t)); // Update the serial as if this task was really created by cloning the old // task. t->set_tid_and_update_serial(rec_tid, own_namespace_tid); return t; } /** * Take a NativeArch::siginfo_t& here instead of siginfo_t because different * versions of system headers have inconsistent field naming. */ template static void setup_sigframe_siginfo_arch(RecordTask* t, const siginfo_t& siginfo) { remote_ptr dest; switch (Arch::arch()) { case x86: { auto p = t->regs().sp().cast() + 2; dest = t->read_mem(p); break; } case x86_64: dest = t->regs().si(); break; case aarch64: dest = t->regs().x1(); break; default: DEBUG_ASSERT(0 && "Unknown architecture"); break; } typename Arch::siginfo_t si = t->read_mem(dest); set_arch_siginfo(siginfo, t->arch(), &si, sizeof(si)); t->write_mem(dest, si); } static void setup_sigframe_siginfo(RecordTask* t, const siginfo_t& siginfo) { RR_ARCH_FUNCTION(setup_sigframe_siginfo_arch, t->arch(), t, siginfo); } /** * Get t into a state where resume_execution with a signal will actually work. */ static bool preinject_signal(RecordTask* t) { int sig = t->ev().Signal().siginfo.si_signo; /* Signal injection is tricky. Per the ptrace(2) man page, injecting * a signal while the task is not in a signal-stop is not guaranteed to work * (and indeed, we see that the kernel sometimes ignores such signals). * But some signals must be delayed until after the signal-stop that notified * us of them. * So, first we check if we're in a signal-stop that we can use to inject * a signal. Some (all?) SIGTRAP stops are *not* usable for signal injection. */ if (t->stop_sig() && t->stop_sig() != SIGTRAP) { LOG(debug) << " in signal-stop for " << signal_name(t->stop_sig()); } else { /* We're not in a usable signal-stop. Force a signal-stop by sending * a new signal with tgkill (as the ptrace(2) man page recommends). */ LOG(debug) << " maybe not in signal-stop (status " << t->status() << "); doing tgkill(SYSCALLBUF_DESCHED_SIGNAL)"; if (!t->move_to_signal_stop()) { /* We raced with an exit (e.g. due to a pending SIGKILL). */ return false; } ASSERT(t, t->stop_sig() == t->session().syscallbuf_desched_sig()) << "Expected SYSCALLBUF_DESCHED_SIGNAL, got " << t->status(); /* We're now in a signal-stop */ } /* Now that we're in a signal-stop, we can inject our signal and advance * to the signal handler with one single-step. */ LOG(debug) << " injecting signal " << signal_name(sig); t->set_siginfo(t->ev().Signal().siginfo); return true; } /** * Returns true if the signal should be delivered. * Returns false if this signal should not be delivered because another signal * occurred during delivery or there was a premature exit. * Must call t->stashed_signal_processed() once we're ready to unmask signals. */ static bool inject_handled_signal(RecordTask* t) { if (!preinject_signal(t)) { // Task prematurely exited. return false; } // If there aren't any more stashed signals, it's OK to stop blocking all // signals. t->stashed_signal_processed(); int sig = t->ev().Signal().siginfo.si_signo; do { // We are ready to inject our signal. // XXX we assume the kernel won't respond by notifying us of a different // signal. We don't want to do this with signals blocked because that will // save a bogus signal mask in the signal frame. if (!t->resume_execution(RESUME_SINGLESTEP, RESUME_WAIT_NO_EXIT, RESUME_NO_TICKS, sig)) { return false; } // Signal injection can change the sigmask due to sa_mask effects, lack of // SA_NODEFER, and signal frame construction triggering a synchronous // SIGSEGV. t->invalidate_sigmask(); // Repeat injection if we got a desched signal. We observe in Linux 4.14.12 // that we get SYSCALLBUF_DESCHED_SIGNAL here once in a while. } while (t->stop_sig() == t->session().syscallbuf_desched_sig()); if (t->stop_sig() == SIGSEGV) { // Constructing the signal handler frame must have failed. Stash the signal // to deliver it later. t->stash_sig(); if (sig == SIGSEGV) { // The kernel will kill the process after this. Make sure we know to treat // it as fatal when we inject it. Also disable the signal handler to match // what the kernel does. t->did_set_sig_handler_default(SIGSEGV); t->thread_group()->received_sigframe_SIGSEGV = true; } return false; } // We stepped into a user signal handler. ASSERT(t, t->stop_sig() == SIGTRAP) << "Got unexpected status " << t->status() << " trying to deliver " << sig << " siginfo is " << t->get_siginfo(); ASSERT(t, t->get_signal_user_handler(sig) == t->ip()) << "Expected handler IP " << t->get_signal_user_handler(sig) << ", got " << t->ip() << "; actual signal mask=" << HEX(t->read_sigmask_from_process()) << " (cached " << HEX(t->get_sigmask()) << ")"; if (t->signal_handler_takes_siginfo(sig)) { // The kernel copied siginfo into userspace so it can pass a pointer to // the signal handler. Replace the contents of that siginfo with // the exact data we want to deliver. (We called Task::set_siginfo // above to set that data, but the kernel sanitizes the passed-in data // which wipes out certain fields; e.g. we can't set SI_KERNEL in si_code.) setup_sigframe_siginfo(t, t->ev().Signal().siginfo); } // The kernel clears the FPU state on entering the signal handler, but prior // to 4.7 or thereabouts ptrace can still return stale values. Fix that here. // This also sets bit 0 of the XINUSE register to 1 to avoid issues where it // get set to 1 nondeterministically. ExtraRegisters e = t->extra_regs(); e.reset(); t->set_extra_regs(e); return true; } /** * |t| is being delivered a signal, and its state changed. * Must call t->stashed_signal_processed() once we're ready to unmask signals. */ bool RecordSession::signal_state_changed(RecordTask* t, StepState* step_state) { int sig = t->ev().Signal().siginfo.si_signo; switch (t->ev().type()) { case EV_SIGNAL: { // This event is used by the replayer to advance to // the point of signal delivery. if (t->arch() == aarch64 && t->status().is_syscall() && t->prev_ev() && t->prev_ev()->type() == EV_SYSCALL_INTERRUPTION) { // On aarch64, replaying expects the signal to be delivered before // the syscall instruction but the current pc during recording // is after the syscall instruction with the arg1 clobbered // with the return value (aborted syscall). auto regs = t->regs(); auto &syscall_regs = t->prev_ev()->Syscall().regs; regs.set_ip(syscall_regs.ip().decrement_by_syscall_insn_length(t->arch())); regs.set_arg1(syscall_regs.orig_arg1()); t->record_event(t->ev(), RecordTask::FLUSH_SYSCALLBUF, RecordTask::ALLOW_RESET_SYSCALLBUF, ®s); } else { t->record_current_event(); } t->ev().transform(EV_SIGNAL_DELIVERY); ssize_t sigframe_size = 0; bool has_handler = t->signal_has_user_handler(sig); if (has_handler) { LOG(debug) << " " << t->tid << ": " << signal_name(sig) << " has user handler"; if (!inject_handled_signal(t)) { // Signal delivery isn't happening. Prepare to process the new // signal that aborted signal delivery. t->signal_delivered(sig); t->pop_signal_delivery(); step_state->continue_type = DONT_CONTINUE; last_task_switchable = PREVENT_SWITCH; break; } if (is_x86ish(t->arch())) { // It's somewhat difficult engineering-wise to // compute the sigframe size at compile time, // and it can vary across kernel versions and CPU // microarchitectures. So this size is an overestimate // of the real size(s). // // If this size becomes too small in the // future, and unit tests that use sighandlers // are run with checksumming enabled, then // they can catch errors here. sigframe_size = 1152 /* Overestimate of kernel sigframe */ + 128 /* Redzone */ + /* this returns 512 when XSAVE unsupported */ xsave_area_size(); } else if (t->arch() == aarch64) { sigframe_size = sizeof(ARM64Arch::rt_sigframe) + sizeof(ARM64Arch::user_fpsimd_state); } else { DEBUG_ASSERT(0 && "Add sigframe size for your architecture here"); } t->ev().transform(EV_SIGNAL_HANDLER); t->signal_delivered(sig); // We already continued! Don't continue now, and allow switching. step_state->continue_type = DONT_CONTINUE; last_task_switchable = ALLOW_SWITCH; } else { t->stashed_signal_processed(); LOG(debug) << " " << t->tid << ": no user handler for " << signal_name(sig); // Don't do another task continue. We want to deliver the signal // as the next thing that the task does. step_state->continue_type = DONT_CONTINUE; // If we didn't set up the sighandler frame, we need // to ensure that this tracee is scheduled next so // that we can deliver the signal normally. We have // to do that because setting up the sighandler frame // is synchronous, but delivery otherwise is async. // But right after this, we may have to process some // syscallbuf state, so we can't let the tracee race // with us. last_task_switchable = PREVENT_SWITCH; } // We record this data even if sigframe_size is zero to simplify replay. // Stop recording data if we run off the end of a writable mapping. // Our sigframe size is conservative so we need to do this. t->record_remote_writable(t->regs().sp(), sigframe_size); // This event is used by the replayer to set up the signal handler frame. // But if we don't have a handler, we don't want to record the event // until we deal with the EV_SIGNAL_DELIVERY. if (has_handler) { t->record_current_event(); } break; } case EV_SIGNAL_DELIVERY: { // A SIGSTOP requires us to allow switching to another task. // So does a fatal, core-dumping signal, since we need to allow other // tasks to proceed to their exit events. bool is_deterministic = t->ev().Signal().deterministic == DETERMINISTIC_SIG; // Signals that would normally be fatal are just ignored for init processes, // unless they're deterministic. bool is_fatal = t->ev().Signal().disposition == DISPOSITION_FATAL && (!t->is_container_init() || is_deterministic); Switchable can_switch = ((is_fatal && is_coredumping_signal(sig)) || sig == SIGSTOP) ? ALLOW_SWITCH : PREVENT_SWITCH; // We didn't record this event above, so do that now. // NB: If there is no handler, and we interrupted a syscall, and there are // no more actionable signals, the kernel sets us up for a syscall // restart. But it does that *after* the ptrace trap. To replay this // correctly we need to fake those changes here. But we don't do this // if we're going to switch away at the ptrace trap, and for the moment, // 'can_switch' is actually 'will_switch'. // This is essentially copied from do_signal in arch/x86/kernel/signal.c bool has_other_signals = t->has_any_actionable_signal(); auto r = t->regs(); if (!is_fatal) { Event *prev_ev = t->prev_ev(); if (can_switch == PREVENT_SWITCH && !has_other_signals && prev_ev && EV_SYSCALL_INTERRUPTION == prev_ev->type()) { switch (prev_ev->Syscall().regs.syscall_result_signed()) { case -ERESTARTNOHAND: case -ERESTARTSYS: case -ERESTARTNOINTR: r.set_syscallno(r.original_syscallno()); break; case -ERESTART_RESTARTBLOCK: r.set_syscallno(syscall_number_for_restart_syscall(t->arch())); break; } // On aarch64, the kernel modifies the registers before the signal stop. // so we should not decrement the pc again or we'll rerun the instruction // before the syscall. // [1] https://github.com/torvalds/linux/blob/caffb99b6929f41a69edbb5aef3a359bf45f3315/arch/arm64/kernel/signal.c#L855-L862 if (t->arch() != aarch64) r.set_ip(r.ip().decrement_by_syscall_insn_length(t->arch())); // Now that we've mucked with the registers, we can't switch tasks. That // could allow more signals to be generated, breaking our assumption // that we are the last signal. } else { // But if we didn't touch the registers switching here is ok. can_switch = ALLOW_SWITCH; } } t->record_event(t->ev(), RecordTask::FLUSH_SYSCALLBUF, RecordTask::ALLOW_RESET_SYSCALLBUF, &r); // Don't actually set_regs(r), the kernel does these modifications. if (t->is_container_init() && is_fatal) { // Nondeterministic signals were already filtered out. ASSERT(t, is_deterministic); // Usually, the kernel removes the killable-protection from an init process // when a deterministic fatal signal gets executed, but (due to what is // arguably a bug) when a ptracer is attached, this does not happen. // If we try to inject it here, the kernel will just ignore it, // and we'll go around again. As a hack, we detach here, in the // expectation that the deterministic instruction will run again and // actually kill the task now that it isn't under ptrace control anymore. t->destroy_buffers(nullptr, nullptr); WaitStatus exit_status = WaitStatus::for_fatal_sig(sig); record_exit_trace_event(t, exit_status); // Allow writing child_tid now because otherwise the write will race t->record_exit_event(RecordTask::WRITE_CHILD_TID); // On a real affected kernel, we probably would have never gotten here, // since the signal we would be seeing was not deterministic, but let's // be conservative and still try to emulate the ptrace stop. t->do_ptrace_exit_stop(exit_status); t->did_kill(); t->detach(); // Not really, but we detached, so we're never gonna see that event // anyway, so just pretend we're there already t->did_reach_zombie(); return true; } // Only inject fatal signals. Non-fatal signals with signal handlers // were taken care of above; for non-fatal signals without signal // handlers, there is no need to deliver the signal at all. In fact, // there is really no way to inject a non-fatal, non-handled signal // without letting the task execute at least one instruction, which // we don't want to do here. bool inject_signal = is_fatal && sig != get_continue_through_sig(); if (inject_signal) { preinject_signal(t); t->resume_execution(RESUME_CONT, RESUME_NONBLOCKING, RESUME_NO_TICKS, sig); } t->signal_delivered(sig); if (!inject_signal || !is_coredumping_signal(sig)) { /* Fatal signals may core-dump, so we don't consider the signal * delivery complete until we've actually managed to advance past that */ t->pop_signal_delivery(); } // Mark each task in this address space as expecting a ptrace exit // to avoid causing any ptrace_exit races. if (is_fatal && is_coredumping_signal(sig)) { for (Task *ot : t->vm()->task_set()) { if (t != ot) { if (t->tgid() == ot->tgid() || coredumping_signal_takes_down_entire_vm()) { ((RecordTask *)ot)->waiting_for_ptrace_exit = true; } } } } last_task_switchable = can_switch; step_state->continue_type = DONT_CONTINUE; break; } default: FATAL() << "Unhandled signal state " << t->ev().type(); break; } return false; } bool RecordSession::handle_signal_event(RecordTask* t, StepState* step_state) { int sig = t->stop_sig(); if (!sig) { return false; } if (!done_initial_exec()) { // If the initial tracee isn't prepared to handle // signals yet, then us ignoring the ptrace // notification here will have the side effect of // declining to deliver the signal. // // This doesn't really occur in practice, only in // tests that force a degenerately low time slice. LOG(warn) << "Dropping " << signal_name(sig) << " because it can't be delivered yet"; // These signals might have effects on the sigmask. t->invalidate_sigmask(); // No events to be recorded, so no syscallbuf updates // needed. return true; } if (sig == SIGTRAP && handle_syscallbuf_breakpoint(t)) { return true; } SignalDeterministic deterministic = is_deterministic_signal(t); // The kernel might have forcibly unblocked the signal. Check whether it // was blocked now, before we update our cached sigmask. SignalBlocked signal_was_blocked = t->is_sig_blocked(sig) ? SIG_BLOCKED : SIG_UNBLOCKED; if (deterministic || sig == t->session().syscallbuf_desched_sig()) { // Don't stash these signals; deliver them immediately. // We don't want them to be reordered around other signals. // invalidate_sigmask() must not be called before we reach handle_signal! siginfo_t siginfo = t->get_siginfo(); switch (handle_signal(t, &siginfo, deterministic, signal_was_blocked)) { case SIGNAL_PTRACE_STOP: // Emulated ptrace-stop. Don't run the task again yet. last_task_switchable = ALLOW_SWITCH; step_state->continue_type = DONT_CONTINUE; return true; case DEFER_SIGNAL: ASSERT(t, false) << "Can't defer deterministic or internal signal " << siginfo << " at ip " << t->ip(); break; case SIGNAL_HANDLED: if (t->ptrace_event() == PTRACE_EVENT_SECCOMP) { // `handle_desched_event` detected a spurious desched followed // by a SECCOMP event, which it left pending. Handle that SECCOMP // event now. bool dummy_did_enter_syscall; handle_ptrace_event(&t, step_state, nullptr, &dummy_did_enter_syscall); ASSERT(t, !dummy_did_enter_syscall); } if (t->ptrace_event() == PTRACE_EVENT_EXIT) { // Tracee was nuked (probably SIGKILL) during desched processing. return true; } break; } return false; } // Conservatively invalidate the sigmask in case just accepting a signal has // sigmask effects. t->invalidate_sigmask(); if (sig == PerfCounters::TIME_SLICE_SIGNAL) { if (t->next_pmc_interrupt_is_for_user) { auto vpmc = VirtualPerfCounterMonitor::interrupting_virtual_pmc_for_task(t); ASSERT(t, vpmc); // Synthesize the requested signal. vpmc->synthesize_signal(t); t->next_pmc_interrupt_is_for_user = false; return true; } auto& si = t->get_siginfo(); /* This implementation will of course fall over if rr tries to * record itself. * * NB: we can't check that the ticks is >= the programmed * target, because this signal may have become pending before * we reset the HPC counters. There be a way to handle that * more elegantly, but bridge will be crossed in due time. * * We can't check that the fd matches t->hpc.ticks_fd() because this * signal could have been queued quite a long time ago and the PerfCounters * might have been stopped (and restarted!), perhaps even more than once, * since the signal was queued. possibly changing its fd. We could check * against all fds the PerfCounters have ever used, but that seems like * overkill. */ ASSERT(t, PerfCounters::TIME_SLICE_SIGNAL == si.si_signo && (RecordTask::SYNTHETIC_TIME_SLICE_SI_CODE == si.si_code || POLL_IN == si.si_code)) << "Tracee is using SIGSTKFLT??? (code=" << si.si_code << ", fd=" << si.si_fd << ")"; } t->stash_sig(); return true; } template static bool is_ptrace_any_sysemu_arch(int command) { return command >= 0 && (command == Arch::PTRACE_SYSEMU || command == Arch::PTRACE_SYSEMU_SINGLESTEP); } static bool is_ptrace_any_sysemu(SupportedArch arch, int command) { RR_ARCH_FUNCTION(is_ptrace_any_sysemu_arch, arch, command); } bool RecordSession::process_syscall_entry(RecordTask* t, StepState* step_state, RecordResult* step_result, SupportedArch syscall_arch) { if (const RecordTask::StashedSignal* sig = t->stashed_sig_not_synthetic_SIGCHLD()) { // The only four cases where we allow a stashed signal to be pending on // syscall entry are: // -- the signal is a ptrace-related signal, in which case if it's generated // during a blocking syscall, it does not interrupt the syscall // -- rrcall_notify_syscall_hook_exit, which is effectively a noop and // lets us dispatch signals afterward // -- when we're entering a blocking untraced syscall. If it really blocks, // we'll get the desched-signal notification and dispatch our stashed // signal. // -- when we're doing a privileged syscall that's internal to the preload // logic // We do not generally want to have stashed signals pending when we enter // a syscall, because that will execute with a hacked signal mask // (see RecordTask::will_resume_execution) which could make things go wrong. ASSERT(t, t->desched_rec() || is_rrcall_notify_syscall_hook_exit_syscall( t->regs().original_syscallno(), t->arch()) || t->ip() == t->vm() ->privileged_traced_syscall_ip() .increment_by_syscall_insn_length(t->arch())) << "Stashed signal pending on syscall entry when it shouldn't be: " << sig->siginfo << "; regs=" << t->regs() << "; last_execution_resume=" << t->last_execution_resume() << "; sig ip=" << sig->ip; } // We just entered a syscall. if (!maybe_restart_syscall(t)) { if (syscall_seccomp_ordering_ == PTRACE_SYSCALL_BEFORE_SECCOMP_UNKNOWN && t->seccomp_bpf_enabled) { // We received a PTRACE_SYSCALL notification before the seccomp // notification. Ignore it and continue to the seccomp notification. syscall_seccomp_ordering_ = PTRACE_SYSCALL_BEFORE_SECCOMP; step_state->continue_type = CONTINUE; return true; } // Don't ever patch a sigreturn syscall. These can't go through the syscallbuf. if (!is_sigreturn(t->regs().original_syscallno(), t->arch())) { if (t->vm()->monkeypatcher().try_patch_syscall(t)) { // Syscall was patched. Emit event and continue execution. t->record_event(Event::patch_syscall()); return true; } if (!t->is_stopped()) { // task exited while we were trying to patch it. // Make sure that this exit event gets processed step_state->continue_type = DONT_CONTINUE; return false; } } t->push_event(SyscallEvent(t->regs().original_syscallno(), syscall_arch)); } check_initial_task_syscalls(t, step_result); note_entering_syscall(t); if ((t->emulated_ptrace_cont_command == PTRACE_SYSCALL || is_ptrace_any_sysemu(t->arch(), t->emulated_ptrace_cont_command)) && !is_in_privileged_syscall(t)) { t->ev().Syscall().state = ENTERING_SYSCALL_PTRACE; t->emulate_ptrace_stop(WaitStatus::for_syscall(t), SYSCALL_ENTRY_STOP); t->record_current_event(); t->ev().Syscall().in_sysemu = is_ptrace_any_sysemu(t->arch(), t->emulated_ptrace_cont_command); } return true; } /** * The execution of |t| has just been resumed, and it most likely has * a new event that needs to be processed. Prepare that new event. * Returns false if the task exits during processing */ void RecordSession::runnable_state_changed(RecordTask* t, StepState* step_state, RecordResult* step_result, bool can_consume_wait_status) { switch (t->ev().type()) { case EV_NOOP: t->pop_noop(); return; case EV_INSTRUCTION_TRAP: t->record_current_event(); t->pop_event(t->ev().type()); return; case EV_SENTINEL: case EV_SIGNAL_HANDLER: case EV_SYSCALL_INTERRUPTION: { if (!can_consume_wait_status) { return; } SupportedArch syscall_arch = t->detect_syscall_arch(); t->canonicalize_regs(syscall_arch); t->apply_syscall_entry_regs(); process_syscall_entry(t, step_state, step_result, syscall_arch); return; } default: return; } } bool RecordSession::prepare_to_inject_signal(RecordTask* t, StepState* step_state) { if (!done_initial_exec() || step_state->continue_type != CONTINUE) { return false; } union { NativeArch::siginfo_t native_api; siginfo_t linux_api; } si; const RecordTask::StashedSignal* sig; while (true) { sig = t->peek_stashed_sig_to_deliver(); if (!sig) { return false; } si.linux_api = sig->siginfo; if (si.linux_api.si_signo == get_ignore_sig()) { LOG(debug) << "Declining to deliver " << signal_name(si.linux_api.si_signo) << " by user request"; t->pop_stash_sig(sig); t->stashed_signal_processed(); } else { break; } } if (sig->deterministic == DETERMINISTIC_SIG && sig->siginfo.si_signo == SIGSYS && t->is_sig_blocked(sig->siginfo.si_signo) == SIG_BLOCKED) { // Our synthesized deterministic SIGSYS (seccomp trap) needs to match the // kernel behavior of unblocking the signal and resetting disposition to // default. t->unblock_signal(SIGSYS); t->set_sig_handler_default(SIGSYS); } switch (handle_signal(t, &si.linux_api, sig->deterministic, SIG_UNBLOCKED)) { case SIGNAL_PTRACE_STOP: // Emulated ptrace-stop. Don't run the task again yet. last_task_switchable = ALLOW_SWITCH; LOG(debug) << signal_name(si.linux_api.si_signo) << ", emulating ptrace stop"; break; case DEFER_SIGNAL: LOG(debug) << signal_name(si.linux_api.si_signo) << " deferred"; // Leave signal on the stack and continue task execution. We'll try again // later. return false; case SIGNAL_HANDLED: LOG(debug) << signal_name(si.linux_api.si_signo) << " handled"; // Signal is now a pending event on |t|'s event stack if (t->ev().type() == EV_SCHED) { if (t->maybe_in_spinlock()) { LOG(debug) << "Detected possible spinlock, forcing one round-robin"; scheduler().schedule_one_round_robin(t); } // Allow switching after a SCHED. We'll flush the SCHED if and only // if we really do a switch. last_task_switchable = ALLOW_SWITCH; } break; } step_state->continue_type = DONT_CONTINUE; t->pop_stash_sig(sig); if (t->ev().type() != EV_SIGNAL) { t->stashed_signal_processed(); } return true; } static void inject_ld_helper_library(vector& env, string env_var, string value) { // Our preload lib should come first if possible, because that will speed up // the loading of the other libraries; it's also a good idea to put our audit // library at the head of the list, since there's only sixteen possible link // namespaces on glibc and each audit library uses up one. // // We supply a placeholder which is then mutated to the correct filename in // Monkeypatcher::patch_after_exec. auto env_assignment = env_var + "="; auto it = env.begin(); for (; it != env.end(); ++it) { if (it->find(env_assignment) != 0) { continue; } // Honor old preloads too. This may cause // problems, but only in those libs, and // that's the user's problem. value += ":"; value += it->substr(it->find("=") + 1); break; } value = env_assignment + value; if (it == env.end()) { env.push_back(value); } else { *it = value; } } void strip_outer_ld_preload(vector& env) { auto env_assignment = "LD_PRELOAD="; auto it = env.begin(); for (; it != env.end(); ++it) { if (it->find(env_assignment) != 0) { continue; } size_t colon_pos = it->find(":"); if (colon_pos != string::npos) { // If the preload library is loaded at all, it must be first size_t preload_pos = it->find("librrpreload"); if (preload_pos < colon_pos) { string new_ld_preload = it->substr(++colon_pos); *it = env_assignment + new_ld_preload; return; } else { DEBUG_ASSERT(preload_pos == string::npos); } } } } static const MemoryRange asan_shadow(remote_ptr((uintptr_t)0x00007fff7000LL), remote_ptr((uintptr_t)0x10007fff8000LL)); static const MemoryRange asan_allocator_reserved(remote_ptr((uintptr_t)0x600000000000LL), remote_ptr((uintptr_t)0x640000002000LL)); // See https://github.com/llvm/llvm-project/blob/main/compiler-rt/lib/tsan/rtl/tsan_platform_posix.cpp static const MemoryRange tsan_shadow(remote_ptr((uintptr_t)0x008000000000LL), remote_ptr((uintptr_t)0x550000000000LL)); // The memory area 0x7b0000000000-0x7c0000002000 is reserved for TSAN's // custom heap allocator --- applications end up using it, but *we* can't use // it. static const MemoryRange tsan_exclude(remote_ptr((uintptr_t)0x568000000000LL), remote_ptr((uintptr_t)0x7e8000000000LL)); // It's only 1TB because tsan can't fit more static const MemoryRange tsan_fixed_global_exclusion_range(remote_ptr((uintptr_t)0x7e8000000000LL), remote_ptr((uintptr_t)0x7f8000000000LL)); struct ExeInfo { ExeInfo() : arch(NativeArch::arch()) {} SupportedArch arch; // Empty if anything fails string sanitizer_path; vector sanitizer_exclude_memory_ranges; // If non-empty, use this as the global exclusion range. MemoryRange fixed_global_exclusion_range; void setup_asan_memory_ranges() { if (!check_sanitizer_arch()) { return; } sanitizer_exclude_memory_ranges.push_back(asan_shadow); sanitizer_exclude_memory_ranges.push_back(asan_allocator_reserved); } void setup_tsan_memory_ranges() { if (!check_sanitizer_arch()) { return; } sanitizer_exclude_memory_ranges.push_back(tsan_shadow); sanitizer_exclude_memory_ranges.push_back(tsan_exclude); fixed_global_exclusion_range = tsan_fixed_global_exclusion_range; } private: bool check_sanitizer_arch() { switch (arch) { case x86_64: return true; default: // We have no idea what's going on. Disable mmap randomization if // chaos mode is active. sanitizer_exclude_memory_ranges.push_back(MemoryRange::all()); return false; } } }; static ExeInfo read_exe_info(const string& exe_file) { ExeInfo ret; ScopedFd fd(exe_file.c_str(), O_RDONLY); if (!fd.is_open()) { return ret; } ElfFileReader reader(fd); ret.arch = reader.arch(); DynamicSection dynamic = reader.read_dynamic(); for (auto& entry : dynamic.entries) { if (entry.tag == DT_NEEDED && entry.val < dynamic.strtab.size()) { const char* name = &dynamic.strtab[entry.val]; if (!strncmp(name, "libasan", 7)) { ret.sanitizer_path = string(name); ret.setup_asan_memory_ranges(); } else if (!strncmp(name, "libtsan", 7)) { ret.sanitizer_path = string(name); ret.setup_tsan_memory_ranges(); } } } auto syms = reader.read_symbols(".dynsym", ".dynstr"); for (size_t i = 0; i < syms.size(); ++i) { if (syms.is_name(i, "__asan_init")) { ret.setup_asan_memory_ranges(); } else if (syms.is_name(i, "__tsan_init")) { ret.setup_tsan_memory_ranges(); } } return ret; } static string lookup_by_path(const string& name) { if (name.find('/') != string::npos) { return name; } const char* env = getenv("PATH"); if (!env) { return name; } char* p = strdup(env); char* s = p; while (*s) { char* next = strchr(s, ':'); if (next) { *next = 0; } string file = string(s) + "/" + name; struct stat st; if (!stat(file.c_str(), &st) && S_ISREG(st.st_mode) && !access(file.c_str(), X_OK)) { free(p); return file; } if (!next) { break; } s = next + 1; } free(p); return name; } /*static*/ RecordSession::shr_ptr RecordSession::create( const vector& argv, const vector& extra_env, const DisableCPUIDFeatures& disable_cpuid_features, SyscallBuffering syscallbuf, unsigned char syscallbuf_desched_sig, BindCPU bind_cpu, const string& output_trace_dir, const TraceUuid* trace_id, bool use_audit, bool unmap_vdso, bool force_asan_active, bool force_tsan_active) { TraceeAttentionSet::initialize(); // The syscallbuf library interposes some critical // external symbols like XShmQueryExtension(), so we // preload it whether or not syscallbuf is enabled. Indicate here whether // syscallbuf is enabled. if (syscallbuf == DISABLE_SYSCALL_BUF) { unsetenv(SYSCALLBUF_ENABLED_ENV_VAR); } else { setenv(SYSCALLBUF_ENABLED_ENV_VAR, "1", 1); if (!has_effective_caps(uint64_t(1) << CAP_SYS_ADMIN) && !has_effective_caps(uint64_t(1) << CAP_PERFMON)) { ScopedFd fd("/proc/sys/kernel/perf_event_paranoid", O_RDONLY); if (fd.is_open()) { char buf[100]; ssize_t size = read(fd, buf, sizeof(buf) - 1); if (size >= 0) { buf[size] = 0; int val = atoi(buf); if (val > 1) { fprintf(stderr, "rr needs /proc/sys/kernel/perf_event_paranoid <= 1, but it is %d.\n" "Change it to 1, or use 'rr record -n' (slow).\n" "Consider putting 'kernel.perf_event_paranoid = 1' in /etc/sysctl.d/10-rr.conf.\n" "See 'man 8 sysctl', 'man 5 sysctl.d' (systemd systems)\n" "and 'man 5 sysctl.conf' (non-systemd systems) for more details.\n", val); exit(1); } } } } } vector env = current_env(); // Have extra_env override anything already in the environment for (string extra : extra_env) { string extra_var = extra.substr(0, extra.find('=')); auto it = env.begin(); for (; it != env.end(); ++it) { if (it->find(extra_var) != 0) { continue; } it = env.erase(it); break; } } env.insert(env.end(), extra_env.begin(), extra_env.end()); string full_path = lookup_by_path(argv[0]); struct stat st; if (stat(full_path.c_str(), &st) == 0 && S_ISDIR(st.st_mode)) { CLEAN_FATAL() << "Provided tracee '" << argv[0] << "' is a directory, not an executable"; } ExeInfo exe_info = read_exe_info(full_path); if (exe_info.sanitizer_exclude_memory_ranges.empty()) { if (force_asan_active) { exe_info.setup_asan_memory_ranges(); } else if (force_tsan_active) { exe_info.setup_tsan_memory_ranges(); } } // Strip any LD_PRELOAD that an outer rr may have inserted strip_outer_ld_preload(env); // LD_PRELOAD the syscall interception lib string syscall_buffer_lib_path = find_helper_library(SYSCALLBUF_LIB_FILENAME); if (!syscall_buffer_lib_path.empty()) { string ld_preload = ""; if (!exe_info.sanitizer_path.empty()) { LOG(debug) << "Prepending " << exe_info.sanitizer_path << " to LD_PRELOAD"; // Put an LD_PRELOAD entry for it before our preload library, because // it checks that it's loaded first ld_preload += exe_info.sanitizer_path + ":"; } ld_preload += syscall_buffer_lib_path + SYSCALLBUF_LIB_FILENAME_PADDED; // When librrpreload is built against glibc 2.34 but runs in a process linking pre-2.34 glibc, // its call to dlsym needs to search libdl before libc. When librrpreload found dlsym // in libc at link time, pre-2.34 ld.so throws a fatal error if it searches for dlsym in libc and // can't find it. ld_preload += ":libdl.so.2"; inject_ld_helper_library(env, "LD_PRELOAD", ld_preload); } if (use_audit) { string rtld_audit_lib_path = find_helper_library(RTLDAUDIT_LIB_FILENAME); if (!rtld_audit_lib_path.empty()) { string ld_audit = rtld_audit_lib_path + RTLDAUDIT_LIB_FILENAME_PADDED; inject_ld_helper_library(env, "LD_AUDIT", ld_audit); } } env.push_back("RUNNING_UNDER_RR=1"); // Stop Mesa using the GPU env.push_back("LIBGL_ALWAYS_SOFTWARE=1"); env.push_back("GBM_ALWAYS_SOFTWARE=1"); env.push_back("SDL_RENDER_DRIVER=software"); // Stop sssd from using shared-memory with its daemon env.push_back("SSS_NSS_USE_MEMCACHE=NO"); // Disable Gecko's "wait for gdb to attach on process crash" behavior, since // it is useless when running under rr. env.push_back("MOZ_GDB_SLEEP=0"); // Avoid GVFS using separate processes that might run // outside the recording but share memory mapped files. env.push_back("GIO_USE_VFS=local"); // If we have CPUID faulting, don't use these environment hacks. We don't // need them and the user might want to use them themselves for other reasons. if (!Session::has_cpuid_faulting()) { // OpenSSL uses RDRAND, but we can disable it. These bitmasks are inverted // and ANDed with the results of CPUID. The number below is 2^62, which is the // bit for RDRAND support. env.push_back("OPENSSL_ia32cap=~4611686018427387904:0"); // Disable Qt's use of RDRAND/RDSEED/RTM env.push_back("QT_NO_CPU_FEATURE=rdrnd rdseed rtm"); // Disable systemd's use of RDRAND env.push_back("SYSTEMD_RDRAND=0"); } shr_ptr session( new RecordSession(full_path, argv, env, disable_cpuid_features, syscallbuf, syscallbuf_desched_sig, bind_cpu, output_trace_dir, trace_id, use_audit, unmap_vdso)); session->excluded_ranges_ = std::move(exe_info.sanitizer_exclude_memory_ranges); session->fixed_global_exclusion_range_ = std::move(exe_info.fixed_global_exclusion_range); return session; } RecordSession::RecordSession(const std::string& exe_path, const std::vector& argv, const std::vector& envp, const DisableCPUIDFeatures& disable_cpuid_features, SyscallBuffering syscallbuf, int syscallbuf_desched_sig, BindCPU bind_cpu, const string& output_trace_dir, const TraceUuid* trace_id, bool use_audit, bool unmap_vdso) : trace_out(argv[0], output_trace_dir, ticks_semantics_), scheduler_(*this), trace_id(trace_id), disable_cpuid_features_(disable_cpuid_features), ignore_sig(0), continue_through_sig(0), last_task_switchable(PREVENT_SWITCH), syscall_buffer_size_(1024 * 1024), syscallbuf_desched_sig_(syscallbuf_desched_sig), use_syscall_buffer_(syscallbuf == ENABLE_SYSCALL_BUF), use_file_cloning_(true), use_read_cloning_(true), enable_chaos_(false), wait_for_all_(false), use_audit_(use_audit), unmap_vdso_(unmap_vdso) { if (!has_cpuid_faulting() && disable_cpuid_features.any_features_disabled()) { FATAL() << "CPUID faulting required to disable CPUID features"; } if (rr::syscall_number_for_rrcall_init_preload(x86_64) != RR_CALL_BASE) { FATAL() << "RR_CALL_BASE is incorrect"; } trace_out.set_bound_cpu(choose_cpu(bind_cpu, cpu_lock)); do_bind_cpu(); ScopedFd error_fd = create_spawn_task_error_pipe(); RecordTask* t = static_cast( Task::spawn(*this, error_fd, &tracee_socket_fd(), &tracee_socket_receiver_fd(), &tracee_socket_fd_number, exe_path, argv, envp)); if (NativeArch::is_x86ish()) { // CPU affinity has been set. trace_out.setup_cpuid_records(has_cpuid_faulting(), disable_cpuid_features_); if (cpu_has_xsave_fip_fdp_quirk()) { trace_out.set_xsave_fip_fdp_quirk(true); // Clear FIP/FDP on every event to reduce the probability of this quirk // causing divergence, especially when porting traces to Intel machines trace_out.set_clear_fip_fdp(true); } if (cpu_has_fdp_exception_only_quirk()) { trace_out.set_fdp_exception_only_quirk(true); } } initial_thread_group = t->thread_group(); on_create(t); } RecordSession::~RecordSession() { // Do this now while we're still a RecordSession. When Session's // destructor calls kill_all_tasks(), things turn bad. kill_all_tasks(); } RecordSession::RecordResult RecordSession::record_step() { RecordResult result; if (task_map.empty()) { result.status = STEP_EXITED; result.exit_status = initial_thread_group->exit_status; return result; } if (!wait_for_all_ && initial_thread_group->task_set().empty()) { // SIGKILL any tasks we haven't already killed. terminate_tracees(); } result.status = STEP_CONTINUE; TaskUid prev_task_tuid; if (scheduler().current()) { prev_task_tuid = scheduler().current()->tuid(); } auto rescheduled = scheduler().reschedule(last_task_switchable); if (rescheduled.interrupted_by_signal) { // The scheduler was waiting for some task to become active, but was // interrupted by a signal. Yield to our caller now to give the caller // a chance to do something triggered by the signal // (e.g. terminate the recording). return result; } RecordTask* t = scheduler().current(); if (!t) { // No child to schedule. Yield to our caller to give it a chance // to do something (e.g. terminate the recording). return result; } // If the task has been reaped prematurely then it's not running // and we can't get registers etc, so minimize what we do between here // to handle_ptrace_exit_event(). if (t->waiting_for_reap) { // Give it another chance to be reaped t->did_reach_zombie(); return result; } RecordTask* prev_task = find_task(prev_task_tuid); if (prev_task && prev_task->ev().type() == EV_SCHED) { if (prev_task != t) { // We did do a context switch, so record the SCHED event. Otherwise // we'll just discard it. prev_task->record_current_event(); } prev_task->pop_event(EV_SCHED); } // Have to disable context-switching until we know it's safe // to allow switching the context. last_task_switchable = PREVENT_SWITCH; LOG(debug) << "trace time " << t->trace_time() << ": Active task is " << t->tid << ". Events:"; if (IS_LOGGING(debug)) { t->log_pending_events(); } if (handle_ptrace_exit_event(t)) { // t may have been deleted. last_task_switchable = ALLOW_SWITCH; return result; } if (rescheduled.started_new_timeslice) { t->registers_at_start_of_last_timeslice = t->regs(); t->time_at_start_of_last_timeslice = trace_writer().time(); } StepState step_state(CONTINUE); ASSERT(t, t->is_stopped()) << "Somehow we're not stopped here; status=" << t->status(); bool did_enter_syscall; if (rescheduled.by_waitpid && handle_ptrace_event(&t, &step_state, &result, &did_enter_syscall)) { if (result.status != STEP_CONTINUE || step_state.continue_type == DONT_CONTINUE) { last_task_switchable = ALLOW_SWITCH; return result; } if (did_enter_syscall && t->ev().type() == EV_SYSCALL) { syscall_state_changed(t, &step_state); } } else { ASSERT(t, t->is_stopped()) << "handle_ptrace_event left us in a not-stopped state"; if (rescheduled.by_waitpid && handle_signal_event(t, &step_state)) { // Tracee may have exited while processing descheds; handle that. if (handle_ptrace_exit_event(t)) { // t may have been deleted. last_task_switchable = ALLOW_SWITCH; return result; } } else { ASSERT(t, t->is_stopped()) << "handle_signal_event left us in a not-stopped state"; runnable_state_changed(t, &step_state, &result, rescheduled.by_waitpid); if (result.status != STEP_CONTINUE || step_state.continue_type == DONT_CONTINUE) { return result; } switch (t->ev().type()) { case EV_DESCHED: desched_state_changed(t); break; case EV_SYSCALL: syscall_state_changed(t, &step_state); break; case EV_SIGNAL: case EV_SIGNAL_DELIVERY: if (signal_state_changed(t, &step_state)) { // t may have been deleted return result; } break; default: break; } } } t->verify_signal_states(); // We try to inject a signal if there's one pending; otherwise we continue // task execution. if (!prepare_to_inject_signal(t, &step_state) && step_state.continue_type != DONT_CONTINUE) { // Ensure that we aren't allowing switches away from a running task. // Only tasks blocked in a syscall can be switched away from, otherwise // we have races. ASSERT(t, last_task_switchable == PREVENT_SWITCH || t->may_be_blocked()); debug_exec_state("EXEC_START", t); task_continue(step_state); } return result; } void RecordSession::terminate_tracees() { for (auto& v : task_map) { RecordTask* t = static_cast(v.second); if (!t->detached_proxy && !t->sent_shutdown_kill) { LOG(debug) << "Terminating tracee " << t->tid; ::kill(t->rec_tid, SIGKILL); t->sent_shutdown_kill = true; t->emulate_SIGCONT(); } } } void RecordSession::forward_SIGTERM() { if (!initial_thread_group->task_set().empty()) { kill(initial_thread_group->tgid, SIGTERM); } } void RecordSession::term_detached_tasks() { // Send SIGTERM to all detached child tasks first, so they may clean up // in parallel. for (auto& v : task_map) { RecordTask* t = static_cast(v.second); if (!t->detached_proxy) { continue; } ::kill(t->rec_tid, SIGTERM); } for (auto it = task_map.begin(); it != task_map.end(); ) { RecordTask* t = static_cast(it->second); if (!t->detached_proxy) { ++it; continue; } WaitResult result = WaitManager::wait_exit(WaitOptions(t->rec_tid)); if (result.code != WAIT_OK) { LOG(warn) << "Wait failed"; } else if (result.status.type() != WaitStatus::EXIT) { LOG(warn) << "Unexpected wait status " << result.status << " while waiting for detached child " << t->rec_tid; } ++it; delete t; } } void RecordSession::close_trace_writer(TraceWriter::CloseStatus status) { trace_out.close(status, trace_id.get()); } Task* RecordSession::new_task(pid_t tid, pid_t, uint32_t serial, SupportedArch a, const std::string&) { return new RecordTask(*this, tid, serial, a); } void RecordSession::on_create(Task* t) { Session::on_create(t); scheduler().on_create(static_cast(t)); } void RecordSession::on_destroy(Task* t) { RecordTask *rt = static_cast(t); scheduler().on_destroy(rt); if (rt->detached_proxy) { detached_task_map.erase(rt->tid); } Session::on_destroy(t); } RecordTask* RecordSession::find_task(pid_t rec_tid) const { return static_cast(Session::find_task(rec_tid)); } RecordTask* RecordSession::find_task(const TaskUid& tuid) const { return static_cast(Session::find_task(tuid)); } RecordTask* RecordSession::find_detached_proxy_task(pid_t proxy_tid) const { auto it = detached_task_map.find(proxy_tid); return detached_task_map.end() != it ? it->second : nullptr; } void RecordSession::on_proxy_detach(RecordTask *t, pid_t new_tid) { Session::on_destroy(t); task_map[new_tid] = t; detached_task_map[t->tid] = t; } uint64_t RecordSession::rr_signal_mask() const { return signal_bit(PerfCounters::TIME_SLICE_SIGNAL) | signal_bit(syscallbuf_desched_sig_); } static const uint32_t CPUID_RDRAND_FLAG = 1 << 30; static const uint32_t CPUID_RTM_FLAG = 1 << 11; static const uint32_t CPUID_RDSEED_FLAG = 1 << 18; static const uint32_t CPUID_XSAVEOPT_FLAG = 1 << 0; void DisableCPUIDFeatures::amend_cpuid_data(uint32_t eax_in, uint32_t ecx_in, CPUIDData* cpuid_data) const { switch (eax_in) { case CPUID_GETFEATURES: cpuid_data->ecx &= ~(CPUID_RDRAND_FLAG | features_ecx); cpuid_data->edx &= ~features_edx; break; case CPUID_GETEXTENDEDFEATURES: if (ecx_in == 0) { cpuid_data->ebx &= ~(CPUID_RDSEED_FLAG | CPUID_RTM_FLAG | extended_features_ebx); cpuid_data->ecx &= ~extended_features_ecx; cpuid_data->edx &= ~extended_features_edx; } break; case CPUID_GETXSAVE: if (ecx_in == 1) { // Always disable XSAVEOPT because it's nondeterministic, // possibly depending on context switching behavior. Intel // recommends not using it from user space. cpuid_data->eax &= ~(CPUID_XSAVEOPT_FLAG | xsave_features_eax); } break; default: break; } } } // namespace rr rr-5.7.0/src/RecordSession.h000066400000000000000000000226311450675474200157120ustar00rootroot00000000000000/* -*- Mode: C++; tab-width: 8; c-basic-offset: 2; indent-tabs-mode: nil; -*- */ #ifndef RR_RECORD_SESSION_H_ #define RR_RECORD_SESSION_H_ #include #include #include "Scheduler.h" #include "SeccompFilterRewriter.h" #include "Session.h" #include "ThreadGroup.h" #include "TraceFrame.h" #include "WaitStatus.h" namespace rr { class RecordTask; struct DisableCPUIDFeatures { DisableCPUIDFeatures() : features_ecx(0) , features_edx(0) , extended_features_ebx(0) , extended_features_ecx(0) , extended_features_edx(0) , xsave_features_eax(0) {} bool any_features_disabled() const { return features_ecx || features_edx || extended_features_ebx || extended_features_ecx || extended_features_edx || xsave_features_eax; } /** * Includes disabling TSX and other rr-incompatible features */ void amend_cpuid_data(uint32_t eax_in, uint32_t ecx_in, CPUIDData* cpuid_data) const; /* in: EAX=0x01 */ uint32_t features_ecx; uint32_t features_edx; /* in: EAX=0x07 ECX=0 */ uint32_t extended_features_ebx; uint32_t extended_features_ecx; uint32_t extended_features_edx; /* in: EAX=0x0D ECX=1 */ uint32_t xsave_features_eax; }; struct TraceUuid { uint8_t bytes[16]; }; /** Encapsulates additional session state related to recording. */ class RecordSession final : public Session { public: typedef std::shared_ptr shr_ptr; /** * Create a recording session for the initial command line |argv|. */ enum SyscallBuffering { ENABLE_SYSCALL_BUF, DISABLE_SYSCALL_BUF }; static shr_ptr create( const std::vector& argv, const std::vector& extra_env, const DisableCPUIDFeatures& features, SyscallBuffering syscallbuf = ENABLE_SYSCALL_BUF, unsigned char syscallbuf_desched_sig = SIGPWR, BindCPU bind_cpu = BIND_CPU, const std::string& output_trace_dir = "", const TraceUuid* trace_id = nullptr, bool use_audit = false, bool unmap_vdso = false, bool force_asan_active = false, bool force_tsan_active = false); ~RecordSession() override; const DisableCPUIDFeatures& disable_cpuid_features() const { return disable_cpuid_features_; } bool use_syscall_buffer() const { return use_syscall_buffer_; } size_t syscall_buffer_size() const { return syscall_buffer_size_; } unsigned char syscallbuf_desched_sig() const { return syscallbuf_desched_sig_; } bool use_read_cloning() const { return use_read_cloning_; } bool use_file_cloning() const { return use_file_cloning_; } void set_ignore_sig(int sig) { ignore_sig = sig; } int get_ignore_sig() const { return ignore_sig; } void set_continue_through_sig(int sig) { continue_through_sig = sig; } int get_continue_through_sig() const { return continue_through_sig; } // Returns ranges to exclude from chaos mode memory allocation. // Used to exclude ranges used by sanitizers. const std::vector excluded_ranges() const { return excluded_ranges_; } MemoryRange fixed_global_exclusion_range() const { return fixed_global_exclusion_range_; } bool use_audit() const { return use_audit_; } bool unmap_vdso() { return unmap_vdso_; } uint64_t rr_signal_mask() const; enum RecordStatus { // Some execution was recorded. record_step() can be called again. STEP_CONTINUE, // All tracees are dead. record_step() should not be called again. STEP_EXITED, // Spawning the initial tracee failed. An error message will be in // failure_message. STEP_SPAWN_FAILED }; struct RecordResult { RecordStatus status; // When status == STEP_EXITED WaitStatus exit_status; // When status == STEP_SPAWN_FAILED std::string failure_message; }; /** * Record some tracee execution. * This may block. If blocking is interrupted by a signal, will return * STEP_CONTINUE. * Typically you'd call this in a loop until it returns something other than * STEP_CONTINUE. * Note that when this returns, some tasks may be running (not in a ptrace- * stop). In particular, up to one task may be executing user code and any * number of tasks may be blocked in syscalls. */ RecordResult record_step(); /** * SIGKILL all tracees. */ void terminate_tracees(); /** * Close trace output without flushing syscall buffers or writing * task exit/termination records to the trace. */ void close_trace_writer(TraceWriter::CloseStatus status); virtual RecordSession* as_record() override { return this; } TraceWriter& trace_writer() { return trace_out; } virtual void on_destroy(Task* t) override; Scheduler& scheduler() { return scheduler_; } SeccompFilterRewriter& seccomp_filter_rewriter() { return seccomp_filter_rewriter_; } enum ContinueType { DONT_CONTINUE = 0, CONTINUE, CONTINUE_SYSCALL }; struct StepState { // Continue with this continuation type. ContinueType continue_type; StepState(ContinueType continue_type) : continue_type(continue_type) {} }; void set_enable_chaos(bool enable_chaos) { scheduler().set_enable_chaos(enable_chaos); enable_chaos_ = enable_chaos; trace_out.set_chaos_mode(enable_chaos); } bool enable_chaos() const { return enable_chaos_; } void set_num_cores(int num_cores) { scheduler().set_num_cores(num_cores); } void set_use_read_cloning(bool enable) { use_read_cloning_ = enable; } void set_use_file_cloning(bool enable) { use_file_cloning_ = enable; } void set_syscall_buffer_size(size_t size) { syscall_buffer_size_ = size; } void set_wait_for_all(bool wait_for_all) { this->wait_for_all_ = wait_for_all; } virtual Task* new_task(pid_t tid, pid_t rec_tid, uint32_t serial, SupportedArch a, const std::string& name) override; RecordTask* find_task(pid_t rec_tid) const; RecordTask* find_task(const TaskUid& tuid) const; RecordTask* find_detached_proxy_task(pid_t proxy_tid) const; void on_proxy_detach(RecordTask *t, pid_t new_tid); /** * This gets called when we detect that a task has been revived from the * dead with a PTRACE_EVENT_EXEC. See ptrace man page under "execve(2) under * ptrace" for the horrid details. * * The task in the thread-group that triggered the successful execve has changed * its tid to |rec_tid|. We mirror that, and emit TraceTaskEvents to make it * look like a new task was spawned and the old task exited. */ RecordTask* revive_task_for_exec(pid_t rec_tid); virtual TraceStream* trace_stream() override { return &trace_out; } /** * Send SIGTERM to all detached tasks and wait for them to finish. */ void term_detached_tasks(); /** * Forward SIGTERM to initial task */ void forward_SIGTERM(); private: RecordSession(const std::string& exe_path, const std::vector& argv, const std::vector& envp, const DisableCPUIDFeatures& features, SyscallBuffering syscallbuf, int syscallbuf_desched_sig, BindCPU bind_cpu, const std::string& output_trace_dir, const TraceUuid* trace_id, bool use_audit, bool unmap_vdso); virtual void on_create(Task* t) override; void handle_seccomp_traced_syscall(RecordTask* t, RecordSession::StepState* step_state, RecordResult* result, bool* did_enter_syscall); // Returns false if the task exits during processing bool process_syscall_entry(RecordTask* t, StepState* step_state, RecordResult* step_result, SupportedArch syscall_arch); void check_initial_task_syscalls(RecordTask* t, RecordResult* step_result); bool handle_ptrace_event(RecordTask** t_ptr, StepState* step_state, RecordResult* result, bool* did_enter_syscall); bool handle_signal_event(RecordTask* t, StepState* step_state); void runnable_state_changed(RecordTask* t, StepState* step_state, RecordResult* step_result, bool can_consume_wait_status); bool signal_state_changed(RecordTask* t, StepState* step_state); void syscall_state_changed(RecordTask* t, StepState* step_state); void desched_state_changed(RecordTask* t); bool prepare_to_inject_signal(RecordTask* t, StepState* step_state); void task_continue(const StepState& step_state); TraceWriter trace_out; Scheduler scheduler_; ThreadGroup::shr_ptr initial_thread_group; SeccompFilterRewriter seccomp_filter_rewriter_; std::unique_ptr trace_id; DisableCPUIDFeatures disable_cpuid_features_; int ignore_sig; int continue_through_sig; Switchable last_task_switchable; size_t syscall_buffer_size_; unsigned char syscallbuf_desched_sig_; bool use_syscall_buffer_; bool use_file_cloning_; bool use_read_cloning_; /** * When true, try to increase the probability of finding bugs. */ bool enable_chaos_; /** * When true, wait for all tracees to exit before finishing recording. */ bool wait_for_all_; std::vector excluded_ranges_; MemoryRange fixed_global_exclusion_range_; /** * Keeps track of detached tasks. */ std::map detached_task_map; std::string output_trace_dir; bool use_audit_; bool unmap_vdso_; }; } // namespace rr #endif // RR_RECORD_SESSION_H_ rr-5.7.0/src/RecordTask.cc000066400000000000000000002330751450675474200153350ustar00rootroot00000000000000/* -*- Mode: C++; tab-width: 8; c-basic-offset: 2; indent-tabs-mode: nil; -*- */ #include "RecordTask.h" #include #include #include #include #include #include #include #include "AutoRemoteSyscalls.h" #include "PreserveFileMonitor.h" #include "RecordSession.h" #include "WaitManager.h" #include "core.h" #include "kernel_abi.h" #include "kernel_metadata.h" #include "log.h" #include "record_signal.h" #include "rr/rr.h" #include "util.h" using namespace std; namespace rr { /** * Stores the table of signal dispositions and metadata for an * arbitrary set of tasks. Each of those tasks must own one of * the |refcount|s while they still refer to this. */ struct Sighandler { Sighandler() : resethand(false), takes_siginfo(false) {} template void init_arch(const typename Arch::kernel_sigaction& ksa) { k_sa_handler = ksa.k_sa_handler; sa.resize(sizeof(ksa)); memcpy(sa.data(), &ksa, sizeof(ksa)); resethand = (ksa.sa_flags & SA_RESETHAND) != 0; takes_siginfo = (ksa.sa_flags & SA_SIGINFO) != 0; } template void reset_arch() { typename Arch::kernel_sigaction ksa; memset(&ksa, 0, sizeof(ksa)); DEBUG_ASSERT(uintptr_t(SIG_DFL) == 0); init_arch(ksa); } SignalDisposition disposition() const { DEBUG_ASSERT(uintptr_t(SIG_DFL) == 0); DEBUG_ASSERT(uintptr_t(SIG_IGN) == 1); switch (k_sa_handler.as_int()) { case 0: return SIGNAL_DEFAULT; case 1: return SIGNAL_IGNORE; default: return SIGNAL_HANDLER; } } remote_code_ptr get_user_handler() const { return disposition() == SIGNAL_HANDLER ? remote_code_ptr(k_sa_handler.as_int()) : remote_code_ptr(); } remote_ptr k_sa_handler; // Saved kernel_sigaction; used to restore handler vector sa; bool resethand; bool takes_siginfo; }; static void reset_handler(Sighandler* handler, SupportedArch arch) { RR_ARCH_FUNCTION(handler->reset_arch, arch); } struct Sighandlers { typedef shared_ptr shr_ptr; shr_ptr clone() const { shr_ptr s(new Sighandlers()); // NB: depends on the fact that Sighandler is for all // intents and purposes a POD type, though not // technically. for (size_t i = 0; i < array_length(handlers); ++i) { s->handlers[i] = handlers[i]; } return s; } Sighandler& get(int sig) { assert_valid(sig); return handlers[sig]; } const Sighandler& get(int sig) const { assert_valid(sig); return handlers[sig]; } void init_from_current_process() { for (size_t i = 1; i < array_length(handlers); ++i) { Sighandler& h = handlers[i]; NativeArch::kernel_sigaction sa; if (::syscall(SYS_rt_sigaction, i, nullptr, &sa, sizeof(uint64_t))) { /* EINVAL means we're querying an * unused signal number. */ DEBUG_ASSERT(EINVAL == errno); continue; } msan_unpoison(&sa, sizeof(NativeArch::kernel_sigaction)); h.init_arch(sa); } } /** * For each signal in |table| such that is_user_handler() is * true, reset the disposition of that signal to SIG_DFL, and * clear the resethand flag if it's set. SIG_IGN signals are * not modified. * * (After an exec() call copies the original sighandler table, * this is the operation required by POSIX to initialize that * table copy.) */ void reset_user_handlers(SupportedArch arch) { for (int i = 0; i < ssize_t(array_length(handlers)); ++i) { Sighandler& h = handlers[i]; // If the handler was a user handler, reset to // default. If it was SIG_IGN or SIG_DFL, // leave it alone. if (h.disposition() == SIGNAL_HANDLER) { reset_handler(&h, arch); } } } void assert_valid(int sig) const { DEBUG_ASSERT(0 < sig && sig < ssize_t(array_length(handlers))); } static shr_ptr create() { return shr_ptr(new Sighandlers()); } Sighandler handlers[_NSIG]; private: Sighandlers() {} Sighandlers(const Sighandlers&); Sighandlers operator=(const Sighandlers&); }; RecordTask::RecordTask(RecordSession& session, pid_t _tid, uint32_t serial, SupportedArch a) : Task(session, _tid, _tid, serial, a), scheduler_token(0), ticks_at_last_recorded_syscall_exit(0), ip_at_last_recorded_syscall_exit(nullptr), time_at_start_of_last_timeslice(0), priority(0), in_round_robin_queue(false), stable_exit(false), detached_proxy(false), emulated_ptracer(nullptr), emulated_ptrace_event_msg(0), emulated_ptrace_options(0), emulated_ptrace_cont_command(0), emulated_stop_pending(false), emulated_ptrace_SIGCHLD_pending(false), emulated_SIGCHLD_pending(false), emulated_ptrace_seized(false), in_wait_type(WAIT_TYPE_NONE), in_wait_pid(0), emulated_stop_type(NOT_STOPPED), blocked_sigs_dirty(true), syscallbuf_blocked_sigs_generation(0), flushed_num_rec_bytes(0), flushed_syscallbuf(false), delay_syscallbuf_reset_for_desched(false), delay_syscallbuf_reset_for_seccomp_trap(false), prctl_seccomp_status(0), robust_futex_list_len(0), termination_signal(0), tsc_mode(PR_TSC_ENABLE), cpuid_mode(1), stashed_signals_blocking_more_signals(false), stashed_group_stop(false), break_at_syscallbuf_traced_syscalls(false), break_at_syscallbuf_untraced_syscalls(false), break_at_syscallbuf_final_instruction(false), syscallstub_exit_breakpoint(), next_pmc_interrupt_is_for_user(false), did_record_robust_futex_changes(false), waiting_for_reap(false), waiting_for_ptrace_exit(false), retry_syscall_patching(false), sent_shutdown_kill(false), did_execveat(false), tick_request_override((TicksRequest)0), schedule_frozen(false) { push_event(Event::sentinel()); if (session.tasks().empty()) { // Initial tracee. It inherited its state from this process, so set it up. // The very first task we fork inherits the signal // dispositions of the current OS process (which should all be // default at this point, but ...). From there on, new tasks // will transitively inherit from this first task. auto sh = Sighandlers::create(); sh->init_from_current_process(); sighandlers.swap(sh); own_namespace_rec_tid = _tid; } } RecordTask::~RecordTask() { if (emulated_ptracer) { emulated_ptracer->emulated_ptrace_tracees.erase(this); if (emulated_ptrace_options & PTRACE_O_TRACEEXIT) { ASSERT(this, stable_exit) << "PTRACE_O_TRACEEXIT only supported for stable exits for now"; } } for (RecordTask* t : emulated_ptrace_tracees) { // XXX emulate PTRACE_O_EXITKILL ASSERT(this, t->emulated_ptracer == this); t->emulated_ptracer = nullptr; t->emulated_ptrace_options = 0; t->emulated_stop_pending = false; t->emulated_stop_type = NOT_STOPPED; } // We expect tasks to usually exit by a call to exit() or // exit_group(), so it's not helpful to warn about that. if (EV_SENTINEL != ev().type() && (pending_events.size() > 2 || !(ev().type() == EV_SYSCALL && (is_exit_syscall(ev().Syscall().number, ev().Syscall().regs.arch()) || is_exit_group_syscall(ev().Syscall().number, ev().Syscall().regs.arch()))))) { LOG(info) << tid << " still has pending events. From top down:"; log_pending_events(); } if (detached_proxy) { // Kill it so that proceed_to_exit will reach PTRACE_EVENT_EXIT. ::kill(tid, SIGKILL); // We kept the zombie of the original task around to prevent its pid from // being re-used. Reap that now. proceed_to_exit(); if (!was_reaped() && may_reap()) { reap(); } did_kill(); } // If this was stopped, notify the scheduler. set_stopped(false); } void RecordTask::record_exit_event(WriteChildTid write_child_tid) { // The kernel explicitly only clears the futex if the address space is shared. // If the address space has no other users then the futex will not be cleared // even if it lives in shared memory which other tasks can read. if (!tid_futex.is_null() && as->task_set().size() > 1 && as->has_mapping(tid_futex)) { int val = 0; record_local(tid_futex, &val); if (write_child_tid == WRITE_CHILD_TID) { // Write the memory now, otherwise the kernel will write it later and that can // race with the execution of other threads if we don't wait for this // thread to fully exit. // This could fail since the address space might have gone away/been switched // by execve. bool ok = true; write_mem(tid_futex, 0, &ok); // The kernel will do an unconditional futex wake on that location so we don't // need to do it. } } // Write the exit event here so that the value recorded above is captured. // Don't flush syscallbuf. Whatever triggered the exit (syscall, signal) // should already have flushed it, if it was running. If it was blocked, // then the syscallbuf would already have been flushed too. Trying to flush // syscallbuf for an exiting task could be bad, // e.g. it could be in the middle of syscallbuf code that's supposed to be // atomic. For the same reasons don't allow syscallbuf to be reset here. record_event(Event::exit(), DONT_FLUSH_SYSCALLBUF, DONT_RESET_SYSCALLBUF); } RecordSession& RecordTask::session() const { return *Task::session().as_record(); } TraceWriter& RecordTask::trace_writer() const { return session().trace_writer(); } Task* RecordTask::clone(CloneReason reason, int flags, remote_ptr stack, remote_ptr tls, remote_ptr cleartid_addr, pid_t new_tid, pid_t new_rec_tid, uint32_t new_serial, Session* other_session, FdTable::shr_ptr new_fds, ThreadGroup::shr_ptr new_tg) { ASSERT(this, reason == Task::TRACEE_CLONE); ASSERT(this, !new_fds); ASSERT(this, !new_tg); Task* t = Task::clone(reason, flags, stack, tls, cleartid_addr, new_tid, new_rec_tid, new_serial, other_session, new_fds, new_tg); if (t->session().is_recording()) { RecordTask* rt = static_cast(t); if (CLONE_CLEARTID & flags) { LOG(debug) << "cleartid futex is " << cleartid_addr; ASSERT(this, !cleartid_addr.is_null()); rt->tid_futex = cleartid_addr; } else { LOG(debug) << "(clone child not enabling CLEARTID)"; } } return t; } void RecordTask::post_wait_clone(Task* cloned_from, int flags) { ASSERT(cloned_from, cloned_from->session().is_recording()); Task::post_wait_clone(cloned_from, flags); RecordTask* rt = static_cast(cloned_from); priority = rt->priority; syscallbuf_code_layout = rt->syscallbuf_code_layout; prctl_seccomp_status = rt->prctl_seccomp_status; robust_futex_list = rt->robust_futex_list; robust_futex_list_len = rt->robust_futex_list_len; tsc_mode = rt->tsc_mode; cpuid_mode = rt->cpuid_mode; if (CLONE_SHARE_SIGHANDLERS & flags) { sighandlers = rt->sighandlers; } else { auto sh = rt->sighandlers->clone(); sighandlers.swap(sh); } update_own_namespace_tid(); } void RecordTask::post_exec() { // Change syscall number to execve/execveat *for the new arch*. If we don't do this, // and the arch changes, then the syscall number for execve in the old arch/ // is treated as the syscall we're executing in the new arch, with hilarious // results. int new_syscallno = did_execveat ? syscall_number_for_execveat(arch()) : syscall_number_for_execve(arch()); registers.set_original_syscallno(new_syscallno); // Fix event architecture and syscall number ev().Syscall().number = new_syscallno; ev().Syscall().set_arch(arch()); // The signal mask is inherited across execve so we don't need to invalidate. Task::post_exec(exe_path()); if (emulated_ptracer) { ASSERT(this, !(emulated_ptracer->arch() == x86 && arch() == x86_64)) << "We don't support a 32-bit process tracing a 64-bit process"; } // Clear robust_list state to match kernel state. If this task is cloned // soon after exec, we must not do a bogus set_robust_list syscall for // the clone. set_robust_list(nullptr, 0); sighandlers = sighandlers->clone(); sighandlers->reset_user_handlers(arch()); // Newly execed tasks always have non-faulting mode (from their point of // view, even if rr is secretly causing faults). cpuid_mode = 1; } template static void do_preload_init_arch(RecordTask* t) { auto params = t->read_mem( remote_ptr>(t->regs().orig_arg1())); t->syscallbuf_code_layout.syscallbuf_syscall_hook = params.syscallbuf_syscall_hook.rptr().as_int(); t->syscallbuf_code_layout.syscallbuf_final_exit_instruction = params.syscallbuf_final_exit_instruction.rptr().as_int(); t->syscallbuf_code_layout.syscallbuf_code_start = params.syscallbuf_code_start.rptr().as_int(); t->syscallbuf_code_layout.syscallbuf_code_end = params.syscallbuf_code_end.rptr().as_int(); t->syscallbuf_code_layout.get_pc_thunks_start = params.get_pc_thunks_start.rptr().as_int(); t->syscallbuf_code_layout.get_pc_thunks_end = params.get_pc_thunks_end.rptr().as_int(); unsigned char in_chaos = t->session().enable_chaos(); auto in_chaos_ptr REMOTE_PTR_FIELD(params.globals.rptr(), in_chaos); t->write_mem(in_chaos_ptr, in_chaos); t->record_local(in_chaos_ptr, &in_chaos); auto desched_sig = t->session().syscallbuf_desched_sig(); auto desched_sig_ptr = REMOTE_PTR_FIELD(params.globals.rptr(), desched_sig); t->write_mem(desched_sig_ptr, desched_sig); t->record_local(desched_sig_ptr, &desched_sig); uint64_t random_seed; do { random_seed = rand() | (uint64_t(rand()) << 32); } while (!random_seed); auto random_seed_ptr REMOTE_PTR_FIELD(params.globals.rptr(), random_seed); t->write_mem(random_seed_ptr, random_seed); t->record_local(random_seed_ptr, &random_seed); auto cpu_binding = t->session().cpu_binding(); auto cpu_binding_ptr = REMOTE_PTR_FIELD(params.globals.rptr(), cpu_binding); t->write_mem(cpu_binding_ptr, cpu_binding); t->record_local(cpu_binding_ptr, &cpu_binding); } void RecordTask::push_syscall_event(int syscallno) { push_event(SyscallEvent(syscallno, detect_syscall_arch())); } static void do_preload_init(RecordTask* t) { RR_ARCH_FUNCTION(do_preload_init_arch, t->arch(), t); } void RecordTask::at_preload_init() { Task::at_preload_init(); do_preload_init(this); } /** * Avoid using low-numbered file descriptors since that can confuse * developers. */ static int find_free_file_descriptor(RecordTask* t) { int fd; int fd_limit; struct rlimit limit; // Try to determine what the maximum fd is. If we can't, just // start the search from 0 and bail out if we hit an absurd // number of fds; if (prlimit(t->tgid(), RLIMIT_NOFILE, nullptr, &limit) < 0) { // We might get EPERM if the tracee has changed UID. If that // happens fall back to being slow. (We can probably go faster // using readdir().) ASSERT(t, errno == EPERM) << "Unknown error in prlimit"; fd_limit = 128*1024; fd = 0; } else { fd_limit = limit.rlim_cur; fd = max(300, t->fd_table()->last_free_fd()); } bool searched_from_start = fd == 0; while (true) { if (fd >= fd_limit) { ASSERT(t, !searched_from_start) << "No free fds available"; fd = 0; searched_from_start = true; } char buf[PATH_MAX]; sprintf(buf, "/proc/%d/fd/%d", t->tid, fd); if (access(buf, F_OK) == -1 && errno == ENOENT) { t->fd_table()->set_last_free_fd(fd); return fd; } ++fd; } } template void RecordTask::init_buffers_arch() { ASSERT(this, as->syscallbuf_enabled()) << "Someone called rrcall_init_buffers with syscallbuf disabled?"; // NB: the tracee can't be interrupted with a signal while // we're processing the rrcall, because it's masked off all // signals. AutoRemoteSyscalls remote(this); // Arguments to the rrcall. remote_ptr> child_args = regs().orig_arg1(); auto args = read_mem(child_args); args.cloned_file_data_fd = -1; args.syscallbuf_size = syscallbuf_size = session().syscall_buffer_size(); KernelMapping syscallbuf_km = init_syscall_buffer(remote, nullptr); if (!syscallbuf_km.size()) { // Syscallbuf allocation failed. This should mean the child is dead, // but just in case, return an error. remote.regs().set_syscall_result(-ENOMEM); return; } args.syscallbuf_ptr = syscallbuf_child; if (syscallbuf_child != nullptr) { // This needs to be skipped if we couldn't allocate the buffer // since replaying only reads (and advances) the mmap record // if `args.syscallbuf_ptr != nullptr`. auto record_in_trace = trace_writer().write_mapped_region( this, syscallbuf_km, syscallbuf_km.fake_stat(), syscallbuf_km.fsname(), vector(), TraceWriter::RR_BUFFER_MAPPING); ASSERT(this, record_in_trace == TraceWriter::DONT_RECORD_IN_TRACE); } else { // This can fail, e.g. if the tracee died unexpectedly. LOG(debug) << "Syscallbuf initialization failed"; args.syscallbuf_size = 0; } if (args.syscallbuf_ptr) { desched_fd_child = args.desched_counter_fd; // Prevent the child from closing this fd fds->add_monitor(this, desched_fd_child, new PreserveFileMonitor()); desched_fd = remote.retrieve_fd(desched_fd_child); if (trace_writer().supports_file_data_cloning() && session().use_read_cloning()) { cloned_file_data_fname = trace_writer().file_data_clone_file_name(tuid()); ScopedFd clone_file(cloned_file_data_fname.c_str(), O_RDWR | O_CREAT, 0600); int cloned_file_data = remote.infallible_send_fd_if_alive(clone_file); if (cloned_file_data >= 0) { int free_fd = find_free_file_descriptor(this); cloned_file_data_fd_child = remote.syscall(syscall_number_for_dup3(arch()), cloned_file_data, free_fd, O_CLOEXEC); if (cloned_file_data_fd_child != free_fd) { ASSERT(this, cloned_file_data_fd_child < 0); LOG(warn) << "Couldn't dup clone-data file to free fd"; cloned_file_data_fd_child = cloned_file_data; } else { // Prevent the child from closing this fd. We're going to close it // ourselves and we don't want the child closing it and then reopening // its own file with this fd. fds->add_monitor(this, cloned_file_data_fd_child, new PreserveFileMonitor()); remote.infallible_close_syscall_if_alive(cloned_file_data); } args.cloned_file_data_fd = cloned_file_data_fd_child; } } } args.scratch_buf = scratch_ptr; args.usable_scratch_size = usable_scratch_size(); // Return the mapped buffers to the child. write_mem(child_args, args); // The tracee doesn't need this addr returned, because it's // already written to the inout |args| param, but we stash it // away in the return value slot so that we can easily check // that we map the segment at the same addr during replay. remote.regs().set_syscall_result(syscallbuf_child); } void RecordTask::init_buffers() { RR_ARCH_FUNCTION(init_buffers_arch, arch()); } template void RecordTask::on_syscall_exit_arch(int syscallno, const Registers& regs) { switch (syscallno) { // These syscalls affect the sigmask even if they fail. case Arch::epoll_pwait: case Arch::epoll_pwait2: case Arch::pselect6: case Arch::pselect6_time64: case Arch::ppoll: case Arch::ppoll_time64: invalidate_sigmask(); break; } if (regs.original_syscallno() == SECCOMP_MAGIC_SKIP_ORIGINAL_SYSCALLNO || regs.syscall_failed()) { return; } switch (syscallno) { case Arch::set_robust_list: set_robust_list(regs.orig_arg1(), (size_t)regs.arg2()); return; case Arch::sigaction: case Arch::rt_sigaction: // TODO: SYS_signal update_sigaction(regs); return; case Arch::set_tid_address: set_tid_addr(regs.orig_arg1()); return; case Arch::sigsuspend: case Arch::rt_sigsuspend: case Arch::sigprocmask: case Arch::rt_sigprocmask: case Arch::pselect6: case Arch::pselect6_time64: case Arch::ppoll: case Arch::ppoll_time64: invalidate_sigmask(); return; } } void RecordTask::on_syscall_exit(int syscallno, SupportedArch arch, const Registers& regs) { with_converted_registers(regs, arch, [&](const Registers& regs) { Task::on_syscall_exit(syscallno, arch, regs); RR_ARCH_FUNCTION(on_syscall_exit_arch, arch, syscallno, regs) }); } bool RecordTask::is_at_syscallbuf_syscall_entry_breakpoint() { auto i = ip().undo_executed_bkpt(arch()); for (auto p : syscallbuf_syscall_entry_breakpoints()) { if (i == p) { return true; } } return false; } bool RecordTask::is_at_syscallbuf_final_instruction_breakpoint() { if (!break_at_syscallbuf_final_instruction) { return false; } auto i = ip().undo_executed_bkpt(arch()); return i == syscallbuf_code_layout.syscallbuf_final_exit_instruction; } bool RecordTask::is_at_syscallstub_exit_breakpoint() { if (!break_at_syscallbuf_final_instruction || !syscallstub_exit_breakpoint) { return false; } auto i = ip().undo_executed_bkpt(arch()); return i == syscallstub_exit_breakpoint; } void RecordTask::will_resume_execution(ResumeRequest, WaitRequest, TicksRequest ticks_request, int sig) { // We may execute user code, which could lead to an RDTSC or grow-map // operation which unblocks SIGSEGV, and we'll need to know whether to // re-block it. So we need our cached sigmask to be up to date. // We don't need to this if we're not going to execute user code // (i.e. ticks_request == RESUME_NO_TICKS) except that did_wait can't // easily check for that and may restore blocked_sigs so it had better be // accurate. get_sigmask(); if (stashed_signals_blocking_more_signals) { // A stashed signal we have already accepted for this task may // have a sigaction::sa_mask that would block the next signal to be // delivered and cause it to be delivered to a different task. If we allow // such a signal to be delivered to this task then we run the risk of never // being able to process the signal (if it stays blocked indefinitely). // To prevent this, block any further signal delivery as long as there are // stashed signals. // We assume the kernel can't report a new signal of the same number // in response to us injecting a signal. XXX is this true??? We don't // have much choice, signal injection won't work if we block the signal. // We leave rr signals unblocked. TIME_SLICE_SIGNAL has to be unblocked // because blocking it seems to cause problems for some hardware/kernel // configurations (see https://github.com/rr-debugger/rr/issues/1979), // causing them to stop counting events. sig_set_t sigset = ~session().rr_signal_mask(); if (sig) { // We're injecting a signal, so make sure that signal is unblocked. sigset &= ~signal_bit(sig); } set_sigmask(sigset); } // RESUME_NO_TICKS means that tracee code is not going to run so there's no // need to set breakpoints and in fact they might interfere with rr // processing. if (ticks_request != RESUME_NO_TICKS) { if (!at_may_restart_syscall()) { // If the tracee has SIGTRAP blocked or ignored and we hit one of these // breakpoints, the kernel will automatically unblock the signal and set // its disposition to DFL, effects which we ought to undo to keep these // SIGTRAPs invisible to tracees. Fixing the sigmask happens // automatically in did_wait(). Restoring the signal-ignored status is // handled in `handle_syscallbuf_breakpoint`. // Set breakpoints at untraced syscalls to catch us entering an untraced // syscall. We don't need to do this (and shouldn't do this) if the // execution requestor wants to stop inside untraced syscalls. // If we have an interrupted syscall that we may restart, don't // set the breakpoints because we should restart the syscall instead // of breaking and delivering signals. The syscallbuf code doesn't // (and must not) perform more than one blocking syscall for any given // buffered syscall. for (auto p : syscallbuf_syscall_entry_breakpoints()) { vm()->add_breakpoint(p, BKPT_INTERNAL); } } if (break_at_syscallbuf_final_instruction) { vm()->add_breakpoint( syscallbuf_code_layout.syscallbuf_final_exit_instruction, BKPT_INTERNAL); auto stub_bp = as->monkeypatcher().get_jump_stub_exit_breakpoint(ip(), this); if (stub_bp) { syscallstub_exit_breakpoint = stub_bp; vm()->add_breakpoint(stub_bp, BKPT_INTERNAL); } } } } vector RecordTask::syscallbuf_syscall_entry_breakpoints() { vector result; if (break_at_syscallbuf_untraced_syscalls) { result.push_back(AddressSpace::rr_page_syscall_entry_point( AddressSpace::UNTRACED, AddressSpace::UNPRIVILEGED, AddressSpace::RECORDING_ONLY, arch())); result.push_back(AddressSpace::rr_page_syscall_entry_point( AddressSpace::UNTRACED, AddressSpace::UNPRIVILEGED, AddressSpace::RECORDING_AND_REPLAY, arch())); } if (break_at_syscallbuf_traced_syscalls) { result.push_back(AddressSpace::rr_page_syscall_entry_point( AddressSpace::TRACED, AddressSpace::UNPRIVILEGED, AddressSpace::RECORDING_AND_REPLAY, arch())); } return result; } void RecordTask::did_wait() { for (auto p : syscallbuf_syscall_entry_breakpoints()) { vm()->remove_breakpoint(p, BKPT_INTERNAL); } if (break_at_syscallbuf_final_instruction) { vm()->remove_breakpoint( syscallbuf_code_layout.syscallbuf_final_exit_instruction, BKPT_INTERNAL); } if (syscallstub_exit_breakpoint) { vm()->remove_breakpoint(syscallstub_exit_breakpoint, BKPT_INTERNAL); } if (stashed_signals_blocking_more_signals) { // Saved 'blocked_sigs' must still be correct regardless of syscallbuf // state, because we do not allow stashed_signals_blocking_more_signals // to hold across syscalls (traced or untraced) that change the signal mask. ASSERT(this, !blocked_sigs_dirty); // If this fails the tracee is on the exit path and its sigmask is irrelevant. ptrace_if_stopped(PTRACE_SETSIGMASK, remote_ptr(8), &blocked_sigs); } else if (syscallbuf_child) { // The syscallbuf struct is only 32 bytes currently so read the whole thing // at once to avoid multiple calls to read_mem. Even though this shouldn't // need a syscall because we use a local-mapping, apparently that lookup // is still noticeably expensive. auto syscallbuf = read_mem(syscallbuf_child); if (syscallbuf.in_sigprocmask_critical_section) { // |blocked_sigs| may have been updated but the syscall not yet issued. // Use the kernel's value. invalidate_sigmask(); } else { uint32_t syscallbuf_generation = syscallbuf.blocked_sigs_generation; if (syscallbuf_generation > syscallbuf_blocked_sigs_generation) { syscallbuf_blocked_sigs_generation = syscallbuf_generation; blocked_sigs = syscallbuf.blocked_sigs; } } } } void RecordTask::set_emulated_ptracer(RecordTask* tracer) { if (tracer) { ASSERT(this, !emulated_ptracer); emulated_ptracer = tracer; emulated_ptracer->emulated_ptrace_tracees.insert(this); } else { ASSERT(this, emulated_ptracer); ASSERT(this, emulated_stop_type == NOT_STOPPED || emulated_stop_type == GROUP_STOP); emulated_ptracer->emulated_ptrace_tracees.erase(this); emulated_ptracer = nullptr; } } bool RecordTask::emulate_ptrace_stop(WaitStatus status, EmulatedStopType stop_type, const siginfo_t* siginfo, int si_code) { ASSERT(this, emulated_stop_type == NOT_STOPPED); if (!emulated_ptracer) { return false; } if (siginfo) { ASSERT(this, status.ptrace_signal() == siginfo->si_signo); save_ptrace_signal_siginfo(*siginfo); } else { siginfo_t si; memset(&si, 0, sizeof(si)); si.si_signo = status.ptrace_signal(); if (status.ptrace_event() || status.is_syscall()) { si.si_code = status.get() >> 8; } else { si.si_code = si_code; } save_ptrace_signal_siginfo(si); } force_emulate_ptrace_stop(status, stop_type); return true; } void RecordTask::force_emulate_ptrace_stop(WaitStatus status, EmulatedStopType stop_type) { emulated_stop_type = stop_type; emulated_stop_code = status; emulated_stop_pending = true; emulated_ptrace_SIGCHLD_pending = true; emulated_ptracer->send_synthetic_SIGCHLD_if_necessary(); // The SIGCHLD will eventually be reported to rr via a ptrace stop, // interrupting wake_task's syscall (probably a waitpid) if necessary. At // that point, we'll fix up the siginfo data with values that match what // the kernel would have delivered for a real ptracer's SIGCHLD. When the // signal handler (if any) returns, if wake_task was in a blocking wait that // wait will be resumed, at which point rec_prepare_syscall_arch will // discover the pending ptrace result and emulate the wait syscall to // return that result immediately. } void RecordTask::do_ptrace_exit_stop(WaitStatus exit_status) { // Notify ptracer of the exit if it's not going to receive it from the // kernel because it's not the parent. (The kernel has similar logic to // deliver two stops in this case.) if (emulated_ptracer && (is_clone_child() || get_parent_pid() != emulated_ptracer->real_tgid())) { // The task is dead so treat it as not stopped so we can deliver a new stop emulated_stop_type = NOT_STOPPED; // This is a bit wrong; this is an exit stop, not a signal/ptrace stop. emulate_ptrace_stop(exit_status); } } void RecordTask::did_reach_zombie() { // Remove from address-space and fds list since we really aren't associated // with them anymore (and we can't be used to operate on them) as->erase_task(this); fds->erase_task(this); if (!was_reaped()) { if (may_reap()) { reap(); } else { waiting_for_reap = true; } } if ((was_reaped() || !waiting_for_reap) && !emulated_stop_pending) { delete this; } } void RecordTask::set_stopped(bool stopped) { if (is_stopped_ == stopped) { return; } is_stopped_ = stopped; if (stopped) { session().scheduler().stopped_task(this); } else { session().scheduler().started_task(this); } } void RecordTask::send_synthetic_SIGCHLD_if_necessary() { RecordTask* wake_task = nullptr; bool need_signal = false; for (RecordTask* tracee : emulated_ptrace_tracees) { if (tracee->emulated_ptrace_SIGCHLD_pending) { need_signal = true; // check to see if any thread in the ptracer process is in a waitpid that // could read the status of 'tracee'. If it is, we should wake up that // thread. Otherwise we send SIGCHLD to the ptracer thread. for (Task* t : thread_group()->task_set()) { auto rt = static_cast(t); if (rt->is_waiting_for_ptrace(tracee)) { wake_task = rt; break; } } if (wake_task) { break; } } } if (!need_signal) { for (ThreadGroup* child_tg : thread_group()->children()) { for (Task* child : child_tg->task_set()) { RecordTask* rchild = static_cast(child); if (rchild->emulated_SIGCHLD_pending) { need_signal = true; // check to see if any thread in the ptracer process is in a waitpid // that // could read the status of 'tracee'. If it is, we should wake up that // thread. Otherwise we send SIGCHLD to the ptracer thread. for (Task* t : thread_group()->task_set()) { auto rt = static_cast(t); if (rt->is_waiting_for(rchild)) { wake_task = rt; break; } } if (wake_task) { break; } } } } if (!need_signal) { return; } } // ptrace events trigger SIGCHLD in the ptracer's wake_task. // We can't set all the siginfo values to their correct values here, so // we'll patch this up when the signal is received. // If there's already a pending SIGCHLD, this signal will be ignored, // but at some point the pending SIGCHLD will be delivered and then // send_synthetic_SIGCHLD_if_necessary will be called again to deliver a new // SIGCHLD if necessary. siginfo_t si; memset(&si, 0, sizeof(si)); si.si_code = SI_QUEUE; si.si_value.sival_int = SIGCHLD_SYNTHETIC; int ret; if (wake_task) { LOG(debug) << "Sending synthetic SIGCHLD to tid " << wake_task->tid; // We must use the raw SYS_rt_tgsigqueueinfo syscall here to ensure the // signal is sent to the correct thread by tid. ret = syscall(SYS_rt_tgsigqueueinfo, wake_task->tgid(), wake_task->tid, SIGCHLD, &si); ASSERT(this, ret == 0); if (wake_task->is_sig_blocked(SIGCHLD)) { LOG(debug) << "SIGCHLD is blocked, kicking it out of the syscall"; // Just sending SIGCHLD won't wake it up. Send it a TIME_SLICE_SIGNAL // as well to make sure it exits a blocking syscall. We ensure those // can never be blocked. // We have to send a negative code here because only the kernel can set // positive codes. We set a magic number so we can recognize it // when received. si.si_code = SYNTHETIC_TIME_SLICE_SI_CODE; ret = syscall(SYS_rt_tgsigqueueinfo, wake_task->tgid(), wake_task->tid, PerfCounters::TIME_SLICE_SIGNAL, &si); ASSERT(this, ret == 0); } } else { // Send the signal to the process as a whole and let the kernel // decide which thread gets it. ret = syscall(SYS_rt_sigqueueinfo, tgid(), SIGCHLD, &si); ASSERT(this, ret == 0); LOG(debug) << "Sending synthetic SIGCHLD to pid " << tgid(); } } static bool is_synthetic_SIGCHLD(const siginfo_t& si) { return si.si_signo == SIGCHLD && si.si_value.sival_int == SIGCHLD_SYNTHETIC; } bool RecordTask::set_siginfo_for_synthetic_SIGCHLD(siginfo_t* si) { if (!is_synthetic_SIGCHLD(*si)) { return true; } if (is_syscall_restart() && EV_SYSCALL_INTERRUPTION == ev().type()) { int syscallno = regs().original_syscallno(); SupportedArch syscall_arch = ev().Syscall().arch(); if (is_waitpid_syscall(syscallno, syscall_arch) || is_waitid_syscall(syscallno, syscall_arch) || is_wait4_syscall(syscallno, syscall_arch)) { // Wait-like syscalls always check for notifications from waited-for processes // before they check for pending signals. So, if the tracee has a pending // notification that also generated a signal, the wait syscall will return // normally rather than returning with ERESTARTSYS etc. (The signal will // be dequeued and any handler run on the return to userspace, however.) // We need to emulate this by deferring our synthetic ptrace signal // until after the wait syscall has returned. LOG(debug) << "Deferring signal because we're in a wait"; // Return false to tell the caller to defer the signal and resume // the syscall. return false; } } for (RecordTask* tracee : emulated_ptrace_tracees) { if (tracee->emulated_ptrace_SIGCHLD_pending) { tracee->emulated_ptrace_SIGCHLD_pending = false; tracee->set_siginfo_for_waited_task( reinterpret_cast(si)); si->si_value.sival_int = 0; return true; } } for (ThreadGroup* child_tg : thread_group()->children()) { for (Task* child : child_tg->task_set()) { auto rchild = static_cast(child); if (rchild->emulated_SIGCHLD_pending) { rchild->emulated_SIGCHLD_pending = false; rchild->set_siginfo_for_waited_task( reinterpret_cast(si)); si->si_value.sival_int = 0; return true; } } } return true; } bool RecordTask::is_waiting_for_ptrace(RecordTask* t) { // This task's process must be a ptracer of t. if (!t->emulated_ptracer || t->emulated_ptracer->thread_group() != thread_group()) { return false; } // XXX need to check |options| to make sure this task is eligible!! switch (in_wait_type) { case WAIT_TYPE_NONE: return false; case WAIT_TYPE_ANY: return true; case WAIT_TYPE_SAME_PGID: return getpgid(t->tgid()) == getpgid(tgid()); case WAIT_TYPE_PGID: return getpgid(t->tgid()) == in_wait_pid; case WAIT_TYPE_PID: // When waiting for a ptracee, a specific pid is interpreted as the // exact tid. return t->tid == in_wait_pid; default: ASSERT(this, false); return false; } } bool RecordTask::is_waiting_for(RecordTask* t) { // t must be a child of this task. if (t->thread_group()->parent() != thread_group().get()) { return false; } switch (in_wait_type) { case WAIT_TYPE_NONE: return false; case WAIT_TYPE_ANY: return true; case WAIT_TYPE_SAME_PGID: return getpgid(t->tgid()) == getpgid(tgid()); case WAIT_TYPE_PGID: return getpgid(t->tgid()) == in_wait_pid; case WAIT_TYPE_PID: return t->tgid() == in_wait_pid; default: ASSERT(this, false); return false; } } void RecordTask::save_ptrace_signal_siginfo(const siginfo_t& si) { for (auto it = saved_ptrace_siginfos.begin(); it != saved_ptrace_siginfos.end(); ++it) { if (it->si_signo == si.si_signo) { saved_ptrace_siginfos.erase(it); break; } } saved_ptrace_siginfos.push_back(si); } siginfo_t& RecordTask::get_saved_ptrace_siginfo() { int sig = emulated_stop_code.ptrace_signal(); ASSERT(this, sig > 0); for (auto it = saved_ptrace_siginfos.begin(); it != saved_ptrace_siginfos.end(); ++it) { if (it->si_signo == sig) { return *it; } } ASSERT(this, false) << "No saved siginfo found for stop-signal???"; while (true) { // Avoid having to return anything along this (unreachable) path } } siginfo_t RecordTask::take_ptrace_signal_siginfo(int sig) { for (auto it = saved_ptrace_siginfos.begin(); it != saved_ptrace_siginfos.end(); ++it) { if (it->si_signo == sig) { siginfo_t si = *it; saved_ptrace_siginfos.erase(it); return si; } } siginfo_t si; memset(&si, 0, sizeof(si)); si.si_signo = sig; return si; } static pid_t get_ppid(pid_t pid) { auto ppid_str = read_proc_status_fields(pid, "PPid"); if (ppid_str.empty()) { return -1; } char* end; int actual_ppid = strtol(ppid_str[0].c_str(), &end, 10); return *end ? -1 : actual_ppid; } void RecordTask::apply_group_stop(int sig) { if (emulated_stop_type == NOT_STOPPED) { LOG(debug) << "setting " << tid << " to GROUP_STOP due to signal " << sig; WaitStatus status = WaitStatus::for_group_sig(sig, this); if (!emulate_ptrace_stop(status)) { emulated_stop_type = GROUP_STOP; emulated_stop_code = status; emulated_stop_pending = true; emulated_SIGCHLD_pending = true; RecordTask* t = session().find_task(get_ppid(tid)); if (t) { t->send_synthetic_SIGCHLD_if_necessary(); } } } } bool RecordTask::is_signal_pending(int sig) { auto pending_strs = read_proc_status_fields(tid, "SigPnd", "ShdPnd"); if (pending_strs.size() < 2) { return false; } char* end1; sig_set_t mask1 = strtoull(pending_strs[0].c_str(), &end1, 16); char* end2; sig_set_t mask2 = strtoull(pending_strs[1].c_str(), &end2, 16); return !*end1 && !*end2 && ((mask1 | mask2) & signal_bit(sig)); } bool RecordTask::has_any_actionable_signal() { auto sig_strs = read_proc_status_fields(tid, "SigPnd", "ShdPnd", "SigBlk"); if (sig_strs.size() < 3) { return false; } char* end1; uint64_t mask1 = strtoull(sig_strs[0].c_str(), &end1, 16); char* end2; uint64_t mask2 = strtoull(sig_strs[1].c_str(), &end2, 16); char* end3; uint64_t mask_blk = strtoull(sig_strs[2].c_str(), &end3, 16); return !*end1 && !*end2 && !*end3 && ((mask1 | mask2) & ~mask_blk); } void RecordTask::emulate_SIGCONT() { // All threads in the process are resumed. for (Task* t : thread_group()->task_set()) { auto rt = static_cast(t); LOG(debug) << "setting " << tid << " to NOT_STOPPED due to SIGCONT"; rt->clear_stashed_group_stop(); rt->emulated_stop_pending = false; rt->emulated_stop_type = NOT_STOPPED; } } void RecordTask::signal_delivered(int sig) { Sighandler& h = sighandlers->get(sig); if (h.resethand) { reset_handler(&h, arch()); } if (!is_sig_ignored(sig)) { switch (sig) { case SIGTSTP: case SIGTTIN: case SIGTTOU: if (h.disposition() == SIGNAL_HANDLER) { break; } RR_FALLTHROUGH; case SIGSTOP: // All threads in the process are stopped. for (Task* t : thread_group()->task_set()) { auto rt = static_cast(t); rt->apply_group_stop(sig); } break; case SIGCONT: emulate_SIGCONT(); break; } } send_synthetic_SIGCHLD_if_necessary(); } bool RecordTask::signal_has_user_handler(int sig) const { return sighandlers->get(sig).disposition() == SIGNAL_HANDLER; } remote_code_ptr RecordTask::get_signal_user_handler(int sig) const { return sighandlers->get(sig).get_user_handler(); } const vector& RecordTask::signal_action(int sig) const { return sighandlers->get(sig).sa; } bool RecordTask::signal_handler_takes_siginfo(int sig) const { return sighandlers->get(sig).takes_siginfo; } static bool is_unstoppable_signal(int sig) { return sig == SIGSTOP || sig == SIGKILL; } bool RecordTask::is_sig_blocked(int sig) { if (is_unstoppable_signal(sig)) { // These can never be blocked return false; } int sig_bit = sig - 1; return (get_sigmask() >> sig_bit) & 1; } bool RecordTask::is_sig_ignored(int sig) const { if (is_unstoppable_signal(sig)) { // These can never be ignored return false; } switch (sighandlers->get(sig).disposition()) { case SIGNAL_IGNORE: return true; case SIGNAL_DEFAULT: return IGNORE == default_action(sig); default: return false; } } SignalDisposition RecordTask::sig_disposition(int sig) const { return sighandlers->get(sig).disposition(); } SignalResolvedDisposition RecordTask::sig_resolved_disposition( int sig, SignalDeterministic deterministic) { if (is_fatal_signal(sig, deterministic)) { return DISPOSITION_FATAL; } if (signal_has_user_handler(sig) && !is_sig_blocked(sig)) { return DISPOSITION_USER_HANDLER; } return DISPOSITION_IGNORED; } void RecordTask::set_siginfo(const siginfo_t& si) { pending_siginfo = si; // If this fails, the tracee is on the exit path and its siginfo // is irrelevant. ptrace_if_stopped(PTRACE_SETSIGINFO, nullptr, (void*)&si); } template void RecordTask::update_sigaction_arch(const Registers& regs) { int sig = regs.orig_arg1_signed(); remote_ptr new_sigaction = regs.arg2(); if (0 == regs.syscall_result() && !new_sigaction.is_null()) { // A new sighandler was installed. Update our // sighandler table. // TODO: discard attempts to handle or ignore signals // that can't be by POSIX typename Arch::kernel_sigaction sa; memset(&sa, 0, sizeof(sa)); read_bytes_helper(new_sigaction, sizeof(sa), &sa); sighandlers->get(sig).init_arch(sa); } } void RecordTask::update_sigaction(const Registers& regs) { RR_ARCH_FUNCTION(update_sigaction_arch, regs.arch(), regs); } sig_set_t RecordTask::read_sigmask_from_process() { // During syscall interruptions, PTRACE_GETSIGMASK may return the sigmask that is going // to be restored, not the kernel's current (internal) sigmask, which is what // /proc/.../status reports. Always go with what /proc/.../status reports. See // https://github.com/torvalds/linux/commit/fcfc2aa0185f4a731d05a21e9f359968fdfd02e7 // XXXkhuey and yet that's not what we actually do here ... if (at_interrupted_non_restartable_signal_modifying_syscall()) { // Mark the sigmask as already invalid. The moment we exit the kernel and run more // of the tracee the sigmask will change, so we need to keep refetching the // sigmask until that happens. invalidate_sigmask(); } else if (!at_may_restart_syscall()) { sig_set_t mask; long ret = fallible_ptrace(PTRACE_GETSIGMASK, remote_ptr(sizeof(sig_set_t)), &mask); if (ret >= 0) { return mask; } } auto results = read_proc_status_fields(tid, "SigBlk"); if (results.empty()) { // Read failed, process probably died return 0; } ASSERT(this, results.size() == 1); return strtoull(results[0].c_str(), NULL, 16); } sig_set_t RecordTask::get_sigmask() { if (blocked_sigs_dirty) { // Clear this first, read_sigmask_from_process might set it again. blocked_sigs_dirty = false; blocked_sigs = read_sigmask_from_process(); LOG(debug) << "Refreshed sigmask, now " << HEX(blocked_sigs); } return blocked_sigs; } void RecordTask::unblock_signal(int sig) { sig_set_t mask = get_sigmask(); mask &= ~signal_bit(sig); set_sigmask(mask); invalidate_sigmask(); } void RecordTask::set_sigmask(sig_set_t mask) { ASSERT(this, is_stopped_); int ret = fallible_ptrace(PTRACE_SETSIGMASK, remote_ptr(8), &mask); if (ret < 0) { if (errno == EIO) { FATAL() << "PTRACE_SETSIGMASK not supported; rr requires Linux kernel >= 3.11"; } if (errno == ESRCH) { // The task has been unexpectedly killed due to SIGKILL or equivalent. // Just pretend we set the mask; it doesn't matter anymore. // Reporting this to the caller is pointless because callers still need // to handle the case where this function succeeds but the task // is unexpectedly killed immediately afterwards. return; } ASSERT(this, errno == EINVAL); } else { LOG(debug) << "Set signal mask to block all signals (bar " << "SYSCALLBUF_DESCHED_SIGNAL/TIME_SLICE_SIGNAL) while we " << " have a stashed signal"; } } void RecordTask::set_sig_handler_default(int sig) { did_set_sig_handler_default(sig); // This could happen during a syscallbuf untraced syscall. In that case // our remote syscall here could trigger a desched signal if that event // is armed, making progress impossible. Disarm the event now. disarm_desched_event(this); AutoRemoteSyscalls remote(this); Sighandler& h = sighandlers->get(sig); AutoRestoreMem mem(remote, h.sa.data(), h.sa.size()); remote.infallible_syscall(syscall_number_for_rt_sigaction(arch()), sig, mem.get().as_int(), nullptr, sigaction_sigset_size(arch())); } void RecordTask::did_set_sig_handler_default(int sig) { Sighandler& h = sighandlers->get(sig); reset_handler(&h, arch()); } void RecordTask::verify_signal_states() { #ifndef DEBUG return; #endif if (ev().is_syscall_event()) { // If the syscall event is on the event stack with PROCESSING or EXITING // states, we won't have applied the signal-state updates yet while the // kernel may have. return; } if (detached_proxy) { // This task isn't real return; } auto results = read_proc_status_fields(tid, "SigBlk", "SigIgn", "SigCgt"); if (results.empty()) { // Read failed, process probably died return; } ASSERT(this, results.size() == 3); sig_set_t blocked = strtoull(results[0].c_str(), NULL, 16); sig_set_t ignored = strtoull(results[1].c_str(), NULL, 16); sig_set_t caught = strtoull(results[2].c_str(), NULL, 16); for (int sig = 1; sig < _NSIG; ++sig) { sig_set_t mask = signal_bit(sig); if (is_unstoppable_signal(sig)) { ASSERT(this, !(blocked & mask)) << "Expected " << signal_name(sig) << " to not be blocked, but it is"; ASSERT(this, !(ignored & mask)) << "Expected " << signal_name(sig) << " to not be ignored, but it is"; ASSERT(this, !(caught & mask)) << "Expected " << signal_name(sig) << " to not be caught, but it is"; } else { ASSERT(this, !!(blocked & mask) == is_sig_blocked(sig)) << signal_name(sig) << ((blocked & mask) ? " is blocked" : " is not blocked"); if (sig == SIGCHLD && is_container_init() && (ignored & mask)) { // pid-1-in-its-own-pid-namespace tasks can have their SIGCHLD set // to "ignore" when they die (in zap_pid_ns_processes). We may // not have observed anything relating to this death yet. We could // probe to ensure it's already marked as a zombie but why bother. // XXX arguably we should actually change our disposition here but // it would only matter in certain very weird cases: a vfork() where // the child process is pid-1 in its namespace. continue; } auto disposition = sighandlers->get(sig).disposition(); ASSERT(this, !!(ignored & mask) == (disposition == SIGNAL_IGNORE)) << signal_name(sig) << ((ignored & mask) ? " is ignored" : " is not ignored"); ASSERT(this, !!(caught & mask) == (disposition == SIGNAL_HANDLER)) << signal_name(sig) << ((caught & mask) ? " is caught" : " is not caught"); } } } void RecordTask::stash_sig() { int sig = stop_sig(); ASSERT(this, sig); // Callers should avoid passing SYSCALLBUF_DESCHED_SIGNAL in here. ASSERT(this, sig != session().syscallbuf_desched_sig()); // multiple non-RT signals coalesce if (sig < SIGRTMIN) { for (auto it = stashed_signals.begin(); it != stashed_signals.end(); ++it) { if (it->siginfo.si_signo == sig) { LOG(debug) << "discarding stashed signal " << sig << " since we already have one pending"; return; } } } const siginfo_t& si = get_siginfo(); stashed_signals.push_back(StashedSignal(si, is_deterministic_signal(this), ip())); // Once we've stashed a signal, stop at the next traced/untraced syscall to // check whether we need to process the signal before it runs. stashed_signals_blocking_more_signals = break_at_syscallbuf_final_instruction = break_at_syscallbuf_traced_syscalls = break_at_syscallbuf_untraced_syscalls = true; syscallstub_exit_breakpoint = nullptr; } void RecordTask::stash_synthetic_sig(const siginfo_t& si, SignalDeterministic deterministic) { int sig = si.si_signo; DEBUG_ASSERT(sig); // Callers should avoid passing SYSCALLBUF_DESCHED_SIGNAL in here. DEBUG_ASSERT(sig != session().syscallbuf_desched_sig()); // multiple non-RT signals coalesce if (sig < SIGRTMIN) { for (auto it = stashed_signals.begin(); it != stashed_signals.end(); ++it) { if (it->siginfo.si_signo == sig) { if (deterministic == DETERMINISTIC_SIG && it->deterministic == NONDETERMINISTIC_SIG) { stashed_signals.erase(it); break; } else { LOG(debug) << "discarding stashed signal " << sig << " since we already have one pending"; return; } } } } stashed_signals.insert(stashed_signals.begin(), StashedSignal(si, deterministic, ip())); stashed_signals_blocking_more_signals = break_at_syscallbuf_final_instruction = break_at_syscallbuf_traced_syscalls = break_at_syscallbuf_untraced_syscalls = true; syscallstub_exit_breakpoint = nullptr; } bool RecordTask::has_stashed_sig(int sig) const { for (auto it = stashed_signals.begin(); it != stashed_signals.end(); ++it) { if (it->siginfo.si_signo == sig) { return true; } } return false; } const RecordTask::StashedSignal* RecordTask::stashed_sig_not_synthetic_SIGCHLD() const { for (auto it = stashed_signals.begin(); it != stashed_signals.end(); ++it) { if (!is_synthetic_SIGCHLD(it->siginfo)) { return &*it; } } return nullptr; } void RecordTask::pop_stash_sig(const StashedSignal* stashed) { for (auto it = stashed_signals.begin(); it != stashed_signals.end(); ++it) { if (&*it == stashed) { stashed_signals.erase(it); return; } } ASSERT(this, false) << "signal not found"; } void RecordTask::stashed_signal_processed() { break_at_syscallbuf_final_instruction = break_at_syscallbuf_traced_syscalls = break_at_syscallbuf_untraced_syscalls = stashed_signals_blocking_more_signals = has_stashed_sig(); syscallstub_exit_breakpoint = nullptr; } const RecordTask::StashedSignal* RecordTask::peek_stashed_sig_to_deliver() const { if (stashed_signals.empty()) { return nullptr; } // Choose the first non-synthetic-SIGCHLD signal so that if a syscall should // be interrupted, we'll interrupt it. for (auto& sig : stashed_signals) { if (!is_synthetic_SIGCHLD(sig.siginfo)) { return &sig; } } return &stashed_signals[0]; } bool RecordTask::is_syscall_restart() { if (EV_SYSCALL_INTERRUPTION != ev().type()) { return false; } int syscallno = regs().original_syscallno(); SupportedArch syscall_arch = ev().Syscall().arch(); string call_name = syscall_name(syscallno, syscall_arch); bool is_restart = false; LOG(debug) << " is syscall interruption of recorded " << ev() << "? (now " << call_name << ")"; /* It's possible for the tracee to resume after a sighandler * with a fresh syscall that happens to be the same as the one * that was interrupted. So we check here if the args are the * same. * * Of course, it's possible (but less likely) for the tracee * to incidentally resume with a fresh syscall that just * happens to have the same *arguments* too. But in that * case, we would usually set up scratch buffers etc the same * was as for the original interrupted syscall, so we just * save a step here. * * TODO: it's possible for arg structures to be mutated * between the original call and restarted call in such a way * that it might change the scratch allocation decisions. */ if (is_restart_syscall_syscall(syscallno, syscall_arch)) { is_restart = true; syscallno = ev().Syscall().number; LOG(debug) << " (SYS_restart_syscall)"; } if (ev().Syscall().number != syscallno) { LOG(debug) << " interrupted " << ev() << " != " << call_name; goto done; } { const Registers& old_regs = ev().Syscall().regs; if (!(old_regs.orig_arg1() == regs().arg1() && old_regs.arg2() == regs().arg2() && old_regs.arg3() == regs().arg3() && old_regs.arg4() == regs().arg4() && old_regs.arg5() == regs().arg5() && old_regs.arg6() == regs().arg6())) { LOG(debug) << " regs different at interrupted " << call_name << ": " << old_regs << " vs " << regs(); goto done; } } is_restart = true; done: if (is_restart) { LOG(debug) << " restart of " << call_name; } return is_restart; } template static uint64_t read_ptr_arch(Task* t, remote_ptr p, bool* ok) { return t->read_mem(p.cast(), ok); } static uint64_t read_ptr(Task* t, remote_ptr p, bool* ok) { RR_ARCH_FUNCTION(read_ptr_arch, t->arch(), t, p, ok); } bool RecordTask::is_in_syscallbuf() { if (!as->syscallbuf_enabled()) { // Even if we're in the rr page, if syscallbuf isn't enabled then the // rr page is not being used by syscallbuf. return false; } remote_code_ptr p = ip(); if (is_in_rr_page() || (syscallbuf_code_layout.get_pc_thunks_start <= p && p < syscallbuf_code_layout.get_pc_thunks_end)) { // Look at the caller to see if we're in the syscallbuf or not. bool ok = true; uint64_t addr; if (arch() == aarch64) { addr = regs().xlr(); } else { ASSERT(this, is_x86ish(arch())) << "Unknown architecture"; addr = read_ptr(this, regs().sp(), &ok); } if (ok) { p = addr; } } return as->monkeypatcher().is_jump_stub_instruction(p, false) || (syscallbuf_code_layout.syscallbuf_code_start <= p && p < syscallbuf_code_layout.syscallbuf_code_end); } bool RecordTask::at_may_restart_syscall() const { ssize_t depth = pending_events.size(); const Event* prev_ev = depth > 2 ? &pending_events[depth - 2] : nullptr; return EV_SYSCALL_INTERRUPTION == ev().type() || (EV_SIGNAL_DELIVERY == ev().type() && prev_ev && EV_SYSCALL_INTERRUPTION == prev_ev->type()); } bool RecordTask::at_interrupted_non_restartable_signal_modifying_syscall() const { auto r = regs(); // XXXkhuey io_uring_enter (not yet supported) can do this too. return r.syscall_result_signed() == -EINTR && (is_epoll_pwait_syscall(r.original_syscallno(), arch()) || is_epoll_pwait2_syscall(r.original_syscallno(), arch())); } bool RecordTask::is_arm_desched_event_syscall() { return is_desched_event_syscall() && PERF_EVENT_IOC_ENABLE == regs().arg2(); } bool RecordTask::is_disarm_desched_event_syscall() { return (is_desched_event_syscall() && PERF_EVENT_IOC_DISABLE == regs().arg2()); } bool RecordTask::may_be_blocked() const { return (EV_SYSCALL == ev().type() && PROCESSING_SYSCALL == ev().Syscall().state) || emulated_stop_type != NOT_STOPPED || (EV_SIGNAL_DELIVERY == ev().type() && DISPOSITION_FATAL == ev().Signal().disposition) || waiting_for_ptrace_exit; } bool RecordTask::maybe_in_spinlock() { return time_at_start_of_last_timeslice == session().trace_writer().time() && regs().matches(registers_at_start_of_last_timeslice); } remote_ptr RecordTask::desched_rec() const { return (ev().is_syscall_event() ? ev().Syscall().desched_rec : (EV_DESCHED == ev().type()) ? ev().Desched().rec : nullptr); } bool RecordTask::running_inside_desched() const { for (auto& e : pending_events) { if (e.type() == EV_DESCHED) { return e.Desched().rec != desched_rec(); } } return false; } int RecordTask::get_ptrace_eventmsg_seccomp_data() { unsigned long data = 0; // in theory we could hit an assertion failure if the tracee suffers // a SIGKILL before we get here. But the SIGKILL would have to be // precisely timed between the generation of a PTRACE_EVENT_FORK/CLONE/ // SYS_clone event, and us fetching the event message here. if (fallible_ptrace(PTRACE_GETEVENTMSG, nullptr, &data) < 0) { ASSERT(this, errno == ESRCH); return -1; } return data; } void RecordTask::record_local(remote_ptr addr, ssize_t num_bytes, const void* data) { ASSERT(this, num_bytes >= 0); if (!addr) { return; } trace_writer().write_raw(rec_tid, data, num_bytes, addr); } bool RecordTask::record_remote_by_local_map(remote_ptr addr, size_t num_bytes) { if (uint8_t* local_addr = as->local_mapping(addr, num_bytes)) { record_local(addr, num_bytes, local_addr); return true; } return false; } void RecordTask::record_remote(remote_ptr addr, ssize_t num_bytes) { ASSERT(this, num_bytes >= 0); if (!addr) { return; } if (record_remote_by_local_map(addr, num_bytes) != 0) { return; } bool ok = true; auto buf = read_mem(addr.cast(), num_bytes, &ok); if (!ok) { // Tracee probably died unexpectely. This should only happen // due to SIGKILL racing with our PTRACE_CONT. if (!vm()->find_other_thread_group(this) && vm()->range_is_private_mapping(MemoryRange(addr, num_bytes))) { // The recording range is mapped private and no other threadgroup shares the // address space, so the new memory contents should be unobservable, and we can // just not record the data. return; } ASSERT(this, false) << "Should have recorded " << num_bytes << " bytes from " << addr << ", but failed"; } trace_writer().write_raw(rec_tid, buf.data(), num_bytes, addr); } void RecordTask::record_remote_writable(remote_ptr addr, ssize_t num_bytes) { ASSERT(this, num_bytes >= 0); remote_ptr p = addr; bool seen_rr_mapping = false; int mapping_count = 0; while (p < addr + num_bytes) { if (!as->has_mapping(p)) { break; } ++mapping_count; auto m = as->mapping_of(p); if (m.flags) { seen_rr_mapping = true; } if (!(m.map.prot() & PROT_WRITE) || (seen_rr_mapping && mapping_count > 1)) { break; } p = m.map.end(); } num_bytes = min(num_bytes, p - addr); record_remote(addr, num_bytes); } ssize_t RecordTask::record_remote_fallible(remote_ptr addr, uintptr_t num_bytes, const std::vector& holes) { auto hole_iter = holes.begin(); uintptr_t offset = 0; vector buf; while (offset < num_bytes) { if (hole_iter != holes.end() && hole_iter->offset == offset) { offset += hole_iter->size; ++hole_iter; continue; } uintptr_t bytes = min(uintptr_t(4*1024*1024), num_bytes - offset); if (hole_iter != holes.end()) { ASSERT(this, hole_iter->offset > offset); bytes = min(bytes, uintptr_t(hole_iter->offset) - offset); } if (record_remote_by_local_map(addr + offset, bytes)) { offset += bytes; continue; } if (addr) { buf.resize(bytes); ssize_t nread = read_bytes_fallible(addr + offset, bytes, buf.data()); if (nread <= 0) { break; } trace_writer().write_raw_data(buf.data(), nread); offset += nread; } else { offset += bytes; } } trace_writer().write_raw_header(rec_tid, offset, addr, holes); return offset; } void RecordTask::record_remote_even_if_null(remote_ptr addr, ssize_t num_bytes) { DEBUG_ASSERT(num_bytes >= 0); if (!addr) { trace_writer().write_raw(rec_tid, nullptr, 0, addr); return; } if (record_remote_by_local_map(addr, num_bytes) != 0) { return; } auto buf = read_mem(addr.cast(), num_bytes); trace_writer().write_raw(rec_tid, buf.data(), num_bytes, addr); } void RecordTask::pop_event(EventType expected_type) { ASSERT(this, pending_events.back().type() == expected_type); pending_events.pop_back(); } void RecordTask::log_pending_events() const { ssize_t depth = pending_events.size(); DEBUG_ASSERT(depth > 0); if (1 == depth) { LOG(info) << "(no pending events)"; return; } /* The event at depth 0 is the placeholder event, which isn't * useful to log. Skip it. */ for (auto it = pending_events.rbegin(); it != pending_events.rend(); ++it) { LOG(info) << *it; } } template static void maybe_handle_set_robust_list_arch(RecordTask* t) { auto remote_locals = AddressSpace::preload_thread_locals_start() .cast>(); if (!remote_locals) { return; } auto robust_list_ptr = REMOTE_PTR_FIELD(remote_locals, robust_list); auto robust_list = t->read_mem(robust_list_ptr); if (robust_list.len) { t->set_robust_list(robust_list.head.rptr(), robust_list.len); auto robust_list_len_ptr = REMOTE_PTR_FIELD(robust_list_ptr, len); t->write_mem(robust_list_len_ptr, static_cast(0)); robust_list.len = 0; t->record_local(robust_list_len_ptr, &robust_list.len); } } static void maybe_handle_set_robust_list(RecordTask* t) { RR_ARCH_FUNCTION(maybe_handle_set_robust_list_arch, t->arch(), t); } template static void maybe_handle_rseq_arch(RecordTask* t) { auto remote_locals = AddressSpace::preload_thread_locals_start() .cast>(); if (!remote_locals) { return; } auto rseq_ptr = REMOTE_PTR_FIELD(remote_locals, rseq); auto rseq = t->read_mem(rseq_ptr); if (rseq.len) { t->rseq_state = make_unique(rseq.rseq.rptr(), rseq.sig); auto rseq_len_ptr = REMOTE_PTR_FIELD(rseq_ptr, len); t->write_mem(rseq_len_ptr, static_cast(0)); rseq.len = 0; t->record_local(rseq_len_ptr, &rseq.len); } } static void maybe_handle_rseq(RecordTask* t) { RR_ARCH_FUNCTION(maybe_handle_rseq_arch, t->arch(), t); } void RecordTask::maybe_flush_syscallbuf() { if (EV_SYSCALLBUF_FLUSH == ev().type()) { // Already flushing. return; } if (!syscallbuf_child) { return; } // This can be called while the task is not stopped, when we prematurely // terminate the trace. In that case, the tracee could be concurrently // modifying the header. We'll take a snapshot of the header now. // The syscallbuf code ensures that writes to syscallbuf records // complete before num_rec_bytes is incremented. struct syscallbuf_hdr hdr = read_mem(syscallbuf_child); ASSERT(this, !flushed_syscallbuf || flushed_num_rec_bytes == hdr.num_rec_bytes); if (!hdr.num_rec_bytes || flushed_syscallbuf) { // no records, or we've already flushed. return; } push_event(Event(SyscallbufFlushEvent())); // Apply buffered mprotect operations and flush the buffer in the tracee. if (hdr.mprotect_record_count) { auto& records = ev().SyscallbufFlush().mprotect_records; records = read_mem(REMOTE_PTR_FIELD(preload_globals, mprotect_records[0]), hdr.mprotect_record_count); for (auto& r : records) { as->protect(this, r.start, r.size, r.prot); } } // Write the entire buffer in one shot without parsing it, // because replay will take care of that. if (is_stopped()) { record_remote(syscallbuf_child, syscallbuf_data_size()); } else { vector buf; buf.resize(sizeof(hdr) + hdr.num_rec_bytes); memcpy(buf.data(), &hdr, sizeof(hdr)); read_bytes_helper(syscallbuf_child + 1, hdr.num_rec_bytes, buf.data() + sizeof(hdr)); record_local(syscallbuf_child, buf.size(), buf.data()); } maybe_handle_rseq(this); maybe_handle_set_robust_list(this); record_current_event(); pop_event(EV_SYSCALLBUF_FLUSH); flushed_syscallbuf = true; flushed_num_rec_bytes = hdr.num_rec_bytes; LOG(debug) << "Syscallbuf flushed with num_rec_bytes=" << (uint32_t)hdr.num_rec_bytes; } /** * If the syscallbuf has just been flushed, and resetting hasn't been * overridden with a delay request, then record the reset event for * replay. */ void RecordTask::maybe_reset_syscallbuf() { if (flushed_syscallbuf && !delay_syscallbuf_reset_for_desched && !delay_syscallbuf_reset_for_seccomp_trap) { flushed_syscallbuf = false; LOG(debug) << "Syscallbuf reset"; reset_syscallbuf(); syscallbuf_blocked_sigs_generation = 0; record_event(Event::syscallbuf_reset()); } } void RecordTask::record_event(Event ev, FlushSyscallbuf flush, AllowSyscallbufReset reset, const Registers* registers) { if (flush == FLUSH_SYSCALLBUF) { maybe_flush_syscallbuf(); } FrameTime current_time = trace_writer().time(); if (should_dump_memory(ev, current_time)) { dump_process_memory(this, current_time, "rec"); } if (should_checksum(ev, current_time)) { checksum_process_memory(this, current_time); } if (trace_writer().clear_fip_fdp()) { const ExtraRegisters* maybe_extra = extra_regs_fallible(); if (maybe_extra) { ExtraRegisters extra_registers = *maybe_extra; extra_registers.clear_fip_fdp(); set_extra_regs(extra_registers); } } const ExtraRegisters* extra_registers = nullptr; if (ev.record_regs()) { if (!registers) { registers = ®s(); } if (ev.record_extra_regs()) { extra_registers = &extra_regs(); } } if (ev.is_syscall_event()) { auto state = ev.Syscall().state; if (state == EXITING_SYSCALL) { ticks_at_last_recorded_syscall_exit = tick_count(); ip_at_last_recorded_syscall_exit = registers->ip(); if (ticks_at_last_recorded_syscall_exit == ticks_at_last_syscall_entry && ip_at_last_recorded_syscall_exit == ip_at_last_syscall_entry) { // We've done processing this syscall so we can forget about the entry now // This makes sure that any restarted syscalls would not be treated // as the same entry. ticks_at_last_syscall_entry = 0; ip_at_last_syscall_entry = nullptr; last_syscall_entry_recorded = false; } } else if (state == ENTERING_SYSCALL || state == ENTERING_SYSCALL_PTRACE) { if (tick_count() == ticks_at_last_syscall_entry && registers->ip() == ip_at_last_syscall_entry) { // Let the process handler know that we've recorded the entry already last_syscall_entry_recorded = true; } } } remote_code_ptr rseq_new_ip = ip(); bool invalid_rseq_cs = false; if (should_apply_rseq_abort(ev.type(), &rseq_new_ip, &invalid_rseq_cs)) { auto addr = REMOTE_PTR_FIELD(rseq_state->ptr.cast(), rseq_cs); uint64_t value = 0; write_mem(addr, value); record_local(addr, &value); } else { ASSERT(this, !invalid_rseq_cs) << "Invalid rseq_cs found, not currently emulated properly by rr (should segfault)"; } trace_writer().write_frame(this, ev, registers, extra_registers); LOG(debug) << "Wrote event " << ev << " for time " << current_time; if (rseq_new_ip != ip()) { Registers r = regs(); r.set_ip(rseq_new_ip); set_regs(r); } if (!ev.has_ticks_slop() && reset == ALLOW_RESET_SYSCALLBUF) { // After we've output an event, it's safe to reset the syscallbuf (if not // explicitly delayed) since we will have exited the syscallbuf code that // consumed the syscallbuf data. // This only works if the event has a reliable tick count so when we // reach it, we're done. maybe_reset_syscallbuf(); } } bool RecordTask::is_fatal_signal(int sig, SignalDeterministic deterministic) const { if (thread_group()->received_sigframe_SIGSEGV) { // Can't be blocked, caught or ignored return true; } auto action = default_action(sig); if (action != DUMP_CORE && action != TERMINATE) { // If the default action doesn't kill the process, it won't die. return false; } if (is_sig_ignored(sig)) { // Deterministic fatal signals can't be ignored. return deterministic == DETERMINISTIC_SIG; } // If there's a signal handler, the signal won't be fatal. return !signal_has_user_handler(sig); } void RecordTask::record_current_event() { record_event(ev()); } pid_t RecordTask::find_newborn_thread() { ASSERT(this, session().is_recording()); ASSERT(this, ptrace_event() == PTRACE_EVENT_CLONE); pid_t hint = get_ptrace_eventmsg_pid(); char path[PATH_MAX]; if (hint >= 0) { sprintf(path, "/proc/%d/task/%d", tid, hint); struct stat stat_buf; // This should always succeed, but may fail in old kernels due to // a kernel bug. See RecordSession::handle_ptrace_event. if (!session().find_task(hint) && 0 == stat(path, &stat_buf)) { return hint; } } sprintf(path, "/proc/%d/task", tid); DIR* dir = opendir(path); ASSERT(this, dir); while (true) { struct dirent* result = readdir(dir); ASSERT(this, result); char* end; pid_t thread_tid = strtol(result->d_name, &end, 10); if (*end == '\0' && !session().find_task(thread_tid)) { closedir(dir); return thread_tid; } } } pid_t RecordTask::find_newborn_process(pid_t child_parent) { ASSERT(this, session().is_recording()); ASSERT(this, ptrace_event() == PTRACE_EVENT_CLONE || ptrace_event() == PTRACE_EVENT_VFORK || ptrace_event() == PTRACE_EVENT_FORK); pid_t hint = get_ptrace_eventmsg_pid(); // This should always succeed, but may fail in old kernels due to // a kernel bug. See RecordSession::handle_ptrace_event. if (hint >= 0 && !session().find_task(hint) && get_ppid(hint) == child_parent) { return hint; } DIR* dir = opendir("/proc"); ASSERT(this, dir); while (true) { struct dirent* result = readdir(dir); ASSERT(this, result); char* end; pid_t proc_tid = strtol(result->d_name, &end, 10); if (*end == '\0' && !session().find_task(proc_tid) && get_ppid(proc_tid) == child_parent) { closedir(dir); return proc_tid; } } } void RecordTask::set_tid_addr(remote_ptr tid_addr) { LOG(debug) << "updating cleartid futex to " << tid_addr; tid_futex = tid_addr; } void RecordTask::update_own_namespace_tid() { AutoRemoteSyscalls remote(this); own_namespace_rec_tid = remote.infallible_syscall_if_alive(syscall_number_for_gettid(arch())); if (own_namespace_rec_tid == -ESRCH) { own_namespace_rec_tid = -1; } } void RecordTask::kill_if_alive() { if (!seen_ptrace_exit_event()) { tgkill(SIGKILL); } } pid_t RecordTask::get_parent_pid() const { return get_ppid(tid); } void RecordTask::set_tid_and_update_serial(pid_t tid, pid_t own_namespace_tid) { hpc.set_tid(tid); this->tid = rec_tid = tid; serial = session().next_task_serial(); own_namespace_rec_tid = own_namespace_tid; } bool RecordTask::may_reap() { if (emulated_stop_pending) { LOG(debug) << "Declining to reap " << tid << "; emulated stop pending"; // Don't reap until the emulated ptrace stop has been processed. return false; } // Non thread-group-leaders may always be reaped if (tid != real_tgid()) { return true; } for (auto it : thread_group()->task_set()) { if (&*it != this) { LOG(debug) << "Declining to reap " << tid << "; leader of non-empty thread-group with active thread " << it->tid; return false; } } return true; } void RecordTask::reap() { ASSERT(this, !was_reaped_); LOG(debug) << "Reaping " << tid; WaitOptions options(tid); options.block_seconds = 0; WaitManager::wait_exit(options); was_reaped_ = true; } static uint64_t read_pid_ns(const RecordTask* t) { char buf[PATH_MAX]; sprintf(buf, "/proc/%d/ns/pid", t->tid); char link[PATH_MAX]; int ret = readlink(buf, link, sizeof(link)); ASSERT(t, ret >= 0); ASSERT(t, ret < (int)sizeof(link)); link[ret] = 0; ASSERT(t, strncmp(link, "pid:[", 5) == 0); char* end; uint64_t result = strtoul(link + 5, &end, 10); ASSERT(t, strcmp(end, "]") == 0); return result; } bool RecordTask::waiting_for_pid_namespace_tasks_to_exit() const { // read_pid_ns() will fail if we're reaped if (tg->tgid_own_namespace != 1 || was_reaped()) { return false; } // This might be the last live thread for pid-1 in the pid namespace. // Checking that it *is* the last live thread is tricky because other // threads could unexpectedly die asynchronously :-(. // See if there are any other tasks in the pid namespace. // Note that due to setns there can be tasks in the pid namespace // with parents outside the pid namespace other than our thread-group. // If there are multiple threads in our threadgroup, they're in our // pid namespace. if (thread_group()->task_set().size() > 1) { return true; } // If we have any child processes then those belong to our pid namespace // (or a descendant). for (auto p : session().thread_group_map()) { if (p.second->parent() == tg.get()) { return true; } } // If there are any other tasks in the pid namespace at least one must be // directly in the namespace. uint64_t pid_ns = read_pid_ns(this); for (auto it : session().tasks()) { auto rt = static_cast(it.second); if (rt == this) { continue; } if (read_pid_ns(rt) == pid_ns) { return true; } } return false; } // Disable chaos mode memory randomization for 32-bit ASAN builds. There isn't // much address space to play with and it gets tricky. bool RecordTask::enable_chaos_memory_allocations() const { return session().enable_chaos() && (session().excluded_ranges().empty() || word_size(arch()) >= 8); } int RecordTask::process_depth() const { int depth = 0; ThreadGroup* tg = this->tg.get(); while (tg) { ++depth; tg = tg->parent(); } return depth; } template static void maybe_restore_original_syscall_registers_arch(RecordTask* t, void* local_addr) { if (!local_addr) { return; } auto locals = reinterpret_cast*>(local_addr); static_assert(sizeof(*locals) <= PRELOAD_THREAD_LOCALS_SIZE, "bad PRELOAD_THREAD_LOCALS_SIZE"); if (!locals->original_syscall_parameters) { return; } auto args = t->read_mem(locals->original_syscall_parameters.rptr()); Registers r = t->regs(); if (args.no != r.syscallno()) { // Maybe a preparatory syscall before the real syscall (e.g. sys_read) return; } r.set_arg1(args.args[0]); r.set_arg2(args.args[1]); r.set_arg3(args.args[2]); r.set_arg4(args.args[3]); r.set_arg5(args.args[4]); r.set_arg6(args.args[5]); t->set_regs(r); } void RecordTask::maybe_restore_original_syscall_registers() { RR_ARCH_FUNCTION(maybe_restore_original_syscall_registers_arch, arch(), this, preload_thread_locals()); } bool RecordTask::post_vm_clone(CloneReason reason, int flags, Task* origin) { if (Task::post_vm_clone(reason, flags, origin)) { KernelMapping preload_thread_locals_mapping = vm()->mapping_of(AddressSpace::preload_thread_locals_start()).map; auto mode = trace_writer().write_mapped_region( this, preload_thread_locals_mapping, preload_thread_locals_mapping.fake_stat(), preload_thread_locals_mapping.fsname(), vector(), TraceWriter::RR_BUFFER_MAPPING); ASSERT(this, mode == TraceWriter::DONT_RECORD_IN_TRACE); return true; } return false; }; } // namespace rr rr-5.7.0/src/RecordTask.h000066400000000000000000000734411450675474200151760ustar00rootroot00000000000000/* -*- Mode: C++; tab-width: 8; c-basic-offset: 2; indent-tabs-mode: nil; -*- */ #ifndef RR_RECORD_TASK_H_ #define RR_RECORD_TASK_H_ #include "Registers.h" #include "Task.h" #include "TraceFrame.h" namespace rr { struct Sighandlers; class TaskSyscallStateBase { public: virtual ~TaskSyscallStateBase() {} }; /** Different kinds of waits a task can do. */ enum WaitType { // Not waiting for anything WAIT_TYPE_NONE, // Waiting for any child process WAIT_TYPE_ANY, // Waiting for any child with the same process group ID WAIT_TYPE_SAME_PGID, // Waiting for any child with a specific process group ID WAIT_TYPE_PGID, // Waiting for a specific process ID WAIT_TYPE_PID }; /** Reasons why we simulate stopping of a task (see ptrace(2) man page). */ enum EmulatedStopType { NOT_STOPPED, GROUP_STOP, // stopped by a signal. This applies to non-ptracees too. SIGNAL_DELIVERY_STOP,// Stopped before delivering a signal. ptracees only. SYSCALL_ENTRY_STOP, // Stopped at syscall entry. ptracees only SYSCALL_EXIT_STOP, // Stopped at syscall exit. ptracees only SECCOMP_STOP, // Stopped at seccomp stop. ptracees only CHILD_STOP // All other kinds of non-ptrace stops }; /** * Pass USE_SYSGOOD to emulate_ptrace_stop to add 0x80 to the signal * if PTRACE_O_TRACESYSGOOD is in effect. */ enum AddSysgoodFlag { IGNORE_SYSGOOD, USE_SYSGOOD }; struct SyscallbufCodeLayout { remote_code_ptr syscallbuf_code_start; remote_code_ptr syscallbuf_code_end; remote_code_ptr get_pc_thunks_start; remote_code_ptr get_pc_thunks_end; remote_code_ptr syscallbuf_syscall_hook; remote_code_ptr syscallbuf_final_exit_instruction; }; enum SignalDisposition { SIGNAL_DEFAULT, SIGNAL_IGNORE, SIGNAL_HANDLER }; /** * Every Task owned by a RecordSession is a RecordTask. Functionality that * only applies during recording belongs here. */ class RecordTask final : public Task { public: RecordTask(RecordSession& session, pid_t _tid, uint32_t serial, SupportedArch a); Task* clone(CloneReason reason, int flags, remote_ptr stack, remote_ptr tls, remote_ptr cleartid_addr, pid_t new_tid, pid_t new_rec_tid, uint32_t new_serial, Session* other_session = nullptr, FdTable::shr_ptr new_fds = nullptr, ThreadGroup::shr_ptr new_tg = nullptr) override; virtual void post_wait_clone(Task* cloned_from, int flags) override; virtual void on_syscall_exit(int syscallno, SupportedArch arch, const Registers& regs) override; virtual void will_resume_execution(ResumeRequest, WaitRequest, TicksRequest, int /*sig*/) override; virtual void did_wait() override; bool enable_chaos_memory_allocations() const; std::vector syscallbuf_syscall_entry_breakpoints(); bool is_at_syscallbuf_syscall_entry_breakpoint(); bool is_at_syscallbuf_final_instruction_breakpoint(); bool is_at_syscallstub_exit_breakpoint(); /** * Initialize tracee buffers in this, i.e., implement * RRCALL_init_syscall_buffer. This task must be at the point * of *exit from* the rrcall. Registers will be updated with * the return value from the rrcall, which is also returned * from this call. */ void init_buffers(); void post_exec(); /** * Called when SYS_rrcall_init_preload has happened. */ virtual void at_preload_init() override; RecordSession& session() const; TraceWriter& trace_writer() const; /** * Emulate 'tracer' ptracing this task. */ void set_emulated_ptracer(RecordTask* tracer); /** * Call this when an event occurs that should stop a ptraced task. * If we're emulating ptrace of the task, stop the task and wake the ptracer * if it's waiting, and queue "status" to be reported to the * ptracer. If siginfo is non-null, we'll report that siginfo, otherwise we'll * make one up based on the status (unless the status is an exit code). * Returns true if the task is stopped-for-emulated-ptrace, false otherwise. */ bool emulate_ptrace_stop(WaitStatus status, const siginfo_t* siginfo = nullptr, int si_code = 0) { return emulate_ptrace_stop(status, status.group_stop() ? GROUP_STOP : SIGNAL_DELIVERY_STOP, siginfo, si_code); } bool emulate_ptrace_stop(WaitStatus status, EmulatedStopType stop_type, const siginfo_t* siginfo = nullptr, int si_code = 0); /** * Force the ptrace-stop state no matter what state the task is currently in. */ void force_emulate_ptrace_stop(WaitStatus status, EmulatedStopType stop_type); /** * If necessary, signal the ptracer that this task has exited. */ void do_ptrace_exit_stop(WaitStatus exit_status); /** * Return the exit event. * If write_child_tid is set, zero out child_tid now if applicable. */ enum WriteChildTid { KERNEL_WRITES_CHILD_TID, WRITE_CHILD_TID, }; void record_exit_event(WriteChildTid write_child_tid = KERNEL_WRITES_CHILD_TID); /** * Called when we're about to deliver a signal to this task. If it's a * synthetic SIGCHLD and there's a ptraced task that needs to SIGCHLD, * update the siginfo to reflect the status and note that that * ptraced task has had its SIGCHLD sent. * Note that we can't set the correct siginfo when we send the signal, because * it requires us to set information only the kernel has permission to set. * Returns false if this signal should be deferred. */ bool set_siginfo_for_synthetic_SIGCHLD(siginfo_t* si); /** * Sets up |si| as if we're delivering a SIGCHLD/waitid for this waited task. */ template void set_siginfo_for_waited_task(typename Arch::siginfo_t* si) { // XXX: The `ptrace` argument is likely incorrect here. emulated_stop_code.fill_siginfo(si, emulated_stop_type != GROUP_STOP, emulated_ptrace_options); si->_sifields._sigchld.si_pid_ = tgid(); si->_sifields._sigchld.si_uid_ = getuid(); } /** * Return a reference to the saved siginfo record for the stop-signal * that we're currently in a ptrace-stop for. */ siginfo_t& get_saved_ptrace_siginfo(); /** * When emulating a ptrace-continue with a signal number, extract the siginfo * that was saved by |save_ptrace_signal_siginfo|. If no such siginfo was * saved, make one up. */ siginfo_t take_ptrace_signal_siginfo(int sig); /** * Returns true if this task is in a waitpid or similar that would return * when t's status changes due to a ptrace event. */ bool is_waiting_for_ptrace(RecordTask* t); /** * Returns true if this task is in a waitpid or similar that would return * when t's status changes due to a regular event (exit). */ bool is_waiting_for(RecordTask* t); bool already_exited() const override { return waiting_for_reap; } bool is_detached_proxy() const override { return detached_proxy; } /** * Call this to force a group stop for this task with signal 'sig', * notifying ptracer if necessary. */ void apply_group_stop(int sig); /** * Call this after |sig| is delivered to this task. Emulate * sighandler updates induced by the signal delivery. */ void signal_delivered(int sig); /** * Return true if |sig| is pending but hasn't been reported to ptrace yet */ bool is_signal_pending(int sig); /** * Return true if there are any signals pending that are not blocked. */ bool has_any_actionable_signal(); /** * Get all threads out of an emulated GROUP_STOP */ void emulate_SIGCONT(); /** * Return true if the disposition of |sig| in |table| isn't * SIG_IGN or SIG_DFL, that is, if a user sighandler will be * invoked when |sig| is received. */ bool signal_has_user_handler(int sig) const; /** * If signal_has_user_handler(sig) is true, return the address of the * user handler, otherwise return null. */ remote_code_ptr get_signal_user_handler(int sig) const; /** * Return true if the signal handler for |sig| takes a siginfo_t* * parameter. */ bool signal_handler_takes_siginfo(int sig) const; /** * Return |sig|'s current sigaction. Returned as raw bytes since the * data is architecture-dependent. */ const std::vector& signal_action(int sig) const; /** Return true iff |sig| is blocked for this. */ bool is_sig_blocked(int sig); /** * Return true iff |sig| is SIG_IGN, or it's SIG_DFL and the * default disposition is "ignore". */ bool is_sig_ignored(int sig) const; /** * Return the applications current disposition of |sig|. */ SignalDisposition sig_disposition(int sig) const; /** * Return the resolved disposition --- what this signal will actually do, * taking into account the default behavior. */ SignalResolvedDisposition sig_resolved_disposition( int sig, SignalDeterministic deterministic); /** * Set the siginfo for the signal-stop of this. */ void set_siginfo(const siginfo_t& si); /** Note that the task sigmask needs to be refetched. */ void invalidate_sigmask() { blocked_sigs_dirty = true; } /** * Reset the signal handler for this signal to the default. */ void did_set_sig_handler_default(int sig); /** * Check that our status for |sig| matches what's in /proc//status. */ void verify_signal_states(); /** * Stashed-signal API: if a signal becomes pending at an * awkward time, but could be handled "soon", call * |stash_sig()| to stash the current pending-signal state. * * |has_stashed_sig()| obviously returns true if |stash_sig()| * has been called successfully. * * |pop_stash_sig()| restores the (relevant) state of this * Task to what was saved in |stash_sig()|, and returns the * saved siginfo. After this call, |has_stashed_sig()| is * false. * * NB: |get_siginfo()| will always return the "real" siginfo, * regardless of stash popped-ness state. Callers must ensure * they do the right thing with the popped siginfo. * * If the process unexpectedly died (due to SIGKILL), we don't * stash anything. */ void stash_sig(); void stash_synthetic_sig(const siginfo_t& si, SignalDeterministic deterministic); bool has_stashed_sig() const { return !stashed_signals.empty(); } struct StashedSignal { StashedSignal(const siginfo_t& siginfo, SignalDeterministic deterministic, remote_code_ptr ip) : siginfo(siginfo), deterministic(deterministic), ip(ip) {} siginfo_t siginfo; SignalDeterministic deterministic; remote_code_ptr ip; }; const StashedSignal* stashed_sig_not_synthetic_SIGCHLD() const; bool has_stashed_sig(int sig) const; const StashedSignal* peek_stashed_sig_to_deliver() const; void pop_stash_sig(const StashedSignal* stashed); void stashed_signal_processed(); /** * If a group-stop occurs at an inconvenient time, stash it and * process it later. */ void stash_group_stop() { stashed_group_stop = true; } void clear_stashed_group_stop() { stashed_group_stop = false; } bool has_stashed_group_stop() const { return stashed_group_stop; } /** * Return true if the current state of this looks like the * interrupted syscall at the top of our event stack, if there * is one. */ bool is_syscall_restart(); /** * Return true iff this is at an execution state where * resuming execution may lead to the restart of an * interrupted syscall. * * For example, if a signal without a user handler is about to * be delivered to this just after a syscall interruption, * then delivering the signal may restart the first syscall * and this method will return true. */ bool at_may_restart_syscall() const; /** * Return true iff this is at an execution state where * a syscall that modifies signals was interrupted but will not * be automatically restarted. **/ bool at_interrupted_non_restartable_signal_modifying_syscall() const; /** * Return true if this is at an arm-desched-event syscall. */ bool is_arm_desched_event_syscall(); /** * Return true if this is at a disarm-desched-event syscall. */ bool is_disarm_desched_event_syscall(); /** * Return true if |t| may not be immediately runnable, * i.e., resuming execution and then |waitpid()|'ing may block * for an unbounded amount of time. When the task is in this * state, the tracer must await a |waitpid()| notification * that the task is no longer possibly-blocked before resuming * its execution. */ bool may_be_blocked() const; /** * Returns true if it looks like this task has been spinning on an atomic * access/lock. */ bool maybe_in_spinlock(); /** * Return true if this is within the syscallbuf library. This * *does not* imply that $ip is at a buffered syscall. * This also includes the runtime stub code that runs * before entering syscallbuf but does not include the "safe area". * Returning true from this function implies that the code will execute * `_syscallbuf_final_exit_instruction` before returning to normal code. */ bool is_in_syscallbuf(); /** * Shortcut to the most recent |pending_event->desched.rec| when * there's a desched event on the stack, and nullptr otherwise. * Exists just so that clients don't need to dig around in the * event stack to find this record. */ remote_ptr desched_rec() const; /** * Returns true when the task is in a signal handler in an interrupted * system call being handled by syscall buffering. */ bool running_inside_desched() const; /** * Returns -1 if we failed (the process unexpectedly exited). */ int get_ptrace_eventmsg_seccomp_data(); /** * Save tracee data to the trace. |addr| is the address in * the address space of this task. The |record_local*()| * variants record data that's already been read from this, * and the |record_remote*()| variants read the data and then * record it. * If 'addr' is null then no record is written. */ void record_local(remote_ptr addr, ssize_t num_bytes, const void* buf); template void record_local(remote_ptr addr, const T* buf, size_t count = 1) { record_local(addr, sizeof(T) * count, buf); } void record_remote(remote_ptr addr, ssize_t num_bytes); template void record_remote(remote_ptr addr) { record_remote(addr, sizeof(T)); } void record_remote(const MemoryRange& range) { record_remote(range.start(), range.size()); } ssize_t record_remote_fallible(const MemoryRange& range) { return record_remote_fallible(range.start(), range.size()); } // Record as much as we can of the bytes in this range. Will record only // contiguous mapped data starting at `addr`. ssize_t record_remote_fallible(remote_ptr addr, uintptr_t num_bytes, const std::vector& holes = std::vector()); // Record as much as we can of the bytes in this range. Will record only // contiguous mapped-writable data starting at `addr`. rr mappings (e.g. syscallbuf) // are treated as non-contiguous with any other mapping. void record_remote_writable(remote_ptr addr, ssize_t num_bytes); // Simple helper that attempts to use the local mapping to record if one // exists bool record_remote_by_local_map(remote_ptr addr, size_t num_bytes); template void write_and_record(remote_ptr addr, const T& value, bool* ok = nullptr, uint32_t flags = 0) { write_mem(addr, value, ok, flags); record_local(addr, &value, 1); } /** * Save tracee data to the trace. |addr| is the address in * the address space of this task. * If 'addr' is null then a zero-length record is written. */ void record_remote_even_if_null(remote_ptr addr, ssize_t num_bytes); template void record_remote_even_if_null(remote_ptr addr) { record_remote_even_if_null(addr, sizeof(T)); } /** * Manage pending events. |push_event()| pushes the given * event onto the top of the event stack. The |pop_*()| * helpers pop the event at top of the stack, which must be of * the specified type. */ void push_event(const Event& ev) { pending_events.push_back(ev); } void push_syscall_event(int syscallno); void pop_event(EventType expected_type); void pop_noop() { pop_event(EV_NOOP); } void pop_desched() { pop_event(EV_DESCHED); } void pop_seccomp_trap() { pop_event(EV_SECCOMP_TRAP); } void pop_signal_delivery() { pop_event(EV_SIGNAL_DELIVERY); } void pop_signal_handler() { pop_event(EV_SIGNAL_HANDLER); } void pop_syscall() { pop_event(EV_SYSCALL); } void pop_syscall_interruption() { pop_event(EV_SYSCALL_INTERRUPTION); } virtual void log_pending_events() const override; /** Return the event at the top of this's stack. */ Event& ev() { return pending_events.back(); } const Event& ev() const { return pending_events.back(); } /** * Obtain the previous event on the stack (if any) or nullptr (if not) */ Event *prev_ev() { ssize_t depth = pending_events.size(); return depth > 2 ? &pending_events[depth - 2] : nullptr; } /** * Call this before recording events or data. Records * syscallbuf data and flushes the buffer, if there's buffered * data. * * The timing of calls to this is tricky. We must flush the syscallbuf * before recording any data associated with events that happened after the * buffered syscalls. But we don't support flushing a syscallbuf twice with * no intervening reset, i.e. after flushing we have to be sure we'll get * a chance to reset the syscallbuf (i.e. record some other kind of event) * before the tracee runs again in a way that might append another buffered * syscall --- so we can't flush too early */ void maybe_flush_syscallbuf(); /** * Call this after recording an event when it might be safe to reset the * syscallbuf. It must be after recording an event to ensure during replay * we run past any syscallbuf after-syscall code that uses the buffer data. */ void maybe_reset_syscallbuf(); /** * Record an event on behalf of this. Record the registers of * this (and other relevant execution state) so that it can be * used or verified during replay, if that state is available * and meaningful at this's current execution point. * |record_current_event()| record |this->ev()|, and * |record_event()| records the specified event. */ void record_current_event(); enum FlushSyscallbuf { FLUSH_SYSCALLBUF, /* Pass this if it's safe to replay the event before we process the * syscallbuf records. */ DONT_FLUSH_SYSCALLBUF }; enum AllowSyscallbufReset { ALLOW_RESET_SYSCALLBUF, /* Pass this if it's safe to replay the event before we process the * syscallbuf records. */ DONT_RESET_SYSCALLBUF }; // Take `ev` by value to avoid bugs where we pass in an event in // `pending_events`, which could lead to dangling references when // flushing the syscallbuf manipulates `pending_events`. void record_event(Event ev, FlushSyscallbuf flush = FLUSH_SYSCALLBUF, AllowSyscallbufReset reset = ALLOW_RESET_SYSCALLBUF, const Registers* registers = nullptr); bool is_fatal_signal(int sig, SignalDeterministic deterministic) const; /** * Return the pid of the newborn thread created by this task. * Called when this task has a PTRACE_CLONE_EVENT with CLONE_THREAD. */ pid_t find_newborn_thread(); /** * Return the pid of the newborn process (whose parent has pid `parent_pid`, * which need not be the same as the current task's pid, due to CLONE_PARENT) * created by this task. Called when this task has a PTRACE_CLONE_EVENT * without CLONE_THREAD, or PTRACE_FORK_EVENT. */ pid_t find_newborn_process(pid_t child_parent); /** * If the process looks alive, kill it. */ void kill_if_alive(); remote_ptr robust_list() const { return robust_futex_list; } size_t robust_list_len() const { return robust_futex_list_len; } /** Uses /proc so not trivially cheap. */ pid_t get_parent_pid() const; /** * Return true if this is a "clone child" per the wait(2) man page. */ bool is_clone_child() { return termination_signal != SIGCHLD; } void set_termination_signal(int sig) { termination_signal = sig; } /** * When a signal triggers an emulated a ptrace-stop for this task, * save the siginfo so a later emulated ptrace-continue with this signal * number can use it. */ void save_ptrace_signal_siginfo(const siginfo_t& si); enum { SYNTHETIC_TIME_SLICE_SI_CODE = -9999 }; /** * Tasks normally can't change their tid. There is one very special situation * where they can: when a non-main-thread does an execve, its tid changes * to the tid of the thread-group leader. */ void set_tid_and_update_serial(pid_t tid, pid_t own_namespace_tid); /** * Return our cached copy of the signal mask, updating it if necessary. */ sig_set_t get_sigmask(); /** * Just get the signal mask of the process. */ sig_set_t read_sigmask_from_process(); /** * Unblock the signal for the process. */ void unblock_signal(int sig); /** * Set the signal handler to default for the process. */ void set_sig_handler_default(int sig); ~RecordTask(); void maybe_restore_original_syscall_registers(); /** * The task reached zombie state. Do whatever processing is necessary (reaping * it, emulating ptrace stops, etc.) */ void did_reach_zombie(); // Is this task a container init? (which has special signal behavior) bool is_container_init() const { return tg->tgid_own_namespace == 1; } /** * Linux requires the invariant that all members of a thread group * are reaped before the thread group leader. This determines whether or * not we're allowed to attempt reaping this thread or whether doing so * risks deadlock. */ bool may_reap(); /** * Reaps a task-exit notification, thus detaching us from the tracee. * N.B.: If may_reap is false, this risks a deadlock. */ void reap(); bool waiting_for_pid_namespace_tasks_to_exit() const; int process_depth() const; /** * Called when this task is able to receive a SIGCHLD (e.g. because * we completed delivery of a signal). Sends a new synthetic * SIGCHLD to the task if there are still tasks that need a SIGCHLD * sent for them. * May queue signals for specific tasks. */ void send_synthetic_SIGCHLD_if_necessary(); void set_sigmask(sig_set_t mask); /** * Update the futex robust list head pointer to |list| (which * is of size |len|). */ void set_robust_list(remote_ptr list, size_t len) { robust_futex_list = list; robust_futex_list_len = len; } void set_stopped(bool stopped) override; private: /* Retrieve the tid of this task from the tracee and store it */ void update_own_namespace_tid(); /** * Wait for |futex| in this address space to have the value * |val|. * * WARNING: this implementation semi-busy-waits for the value * change. This must only be used in contexts where the futex * will change "soon". */ void futex_wait(remote_ptr futex, int val, bool* ok); /** * Call this when SYS_sigaction is finishing with |regs|. */ void update_sigaction(const Registers& regs); template void init_buffers_arch(); template void on_syscall_exit_arch(int syscallno, const Registers& regs); /** Helper function for update_sigaction. */ template void update_sigaction_arch(const Registers& regs); /** Update the clear-tid futex to |tid_addr|. */ void set_tid_addr(remote_ptr tid_addr); virtual bool post_vm_clone(CloneReason reason, int flags, Task* origin) override; public: uint64_t scheduler_token; std::unique_ptr syscall_state; Ticks ticks_at_last_recorded_syscall_exit; remote_code_ptr ip_at_last_recorded_syscall_exit; // Scheduler state Registers registers_at_start_of_last_timeslice; FrameTime time_at_start_of_last_timeslice; /* Task 'nice' value set by setpriority(2). We use this to drive scheduling decisions. rr's scheduler is deliberately simple and unfair; a task never runs as long as there's another runnable task with a lower nice value. */ int priority; /* Tasks with in_round_robin_queue set are in the session's * in_round_robin_queue instead of its task_priority_set. */ bool in_round_robin_queue; /* exit(), or exit_group() with one task, has been called, so * the exit can be treated as stable. */ bool stable_exit; bool detached_proxy; // ptrace emulation state // Task for which we're emulating ptrace of this task, or null RecordTask* emulated_ptracer; std::set emulated_ptrace_tracees; uintptr_t emulated_ptrace_event_msg; // Saved emulated-ptrace signals std::vector saved_ptrace_siginfos; // Code to deliver to ptracer/waiter when it waits. Note that zero can be a // valid code! Reset to zero when leaving the stop due to PTRACE_CONT etc. WaitStatus emulated_stop_code; // Always zero while no ptracer is attached. int emulated_ptrace_options; // One of PTRACE_CONT, PTRACE_SYSCALL --- or 0 if the tracee has not been // continued by its ptracer yet, or has no ptracer. int emulated_ptrace_cont_command; // true when a ptracer/waiter wait() can return |emulated_stop_code|. bool emulated_stop_pending; // true if this task needs to send a SIGCHLD to its ptracer for its // emulated ptrace stop bool emulated_ptrace_SIGCHLD_pending; // true if this task needs to send a SIGCHLD to its parent for its // emulated stop bool emulated_SIGCHLD_pending; // tracer attached via PTRACE_SEIZE bool emulated_ptrace_seized; WaitType in_wait_type; pid_t in_wait_pid; // Signal handler state // Points to the signal-hander table of this task. If this // task is a non-fork clone child, then the table will be // shared with all its "thread" siblings. Any updates made to // that shared table are immediately visible to all sibling // threads. // // fork children always get their own copies of the table. // And if this task exec()s, the table is copied and stripped // of user sighandlers (see below). */ std::shared_ptr sighandlers; // If not NOT_STOPPED, then the task is logically stopped and this is the type // of stop. EmulatedStopType emulated_stop_type; // True if the task sigmask may have changed and we need to refetch it. bool blocked_sigs_dirty; // Most accesses to this should use set_sigmask and get_sigmask to ensure // the mirroring to syscallbuf is correct. sig_set_t blocked_sigs; uint32_t syscallbuf_blocked_sigs_generation; // Syscallbuf state SyscallbufCodeLayout syscallbuf_code_layout; ScopedFd desched_fd; /* Value of hdr->num_rec_bytes when the buffer was flushed */ uint32_t flushed_num_rec_bytes; /* Nonzero after the trace recorder has flushed the * syscallbuf. When this happens, the recorder must prepare a * "reset" of the buffer, to zero the record count, at the * next available slow (taking |desched| into * consideration). */ bool flushed_syscallbuf; /* This bit is set when code wants to prevent the syscall * record buffer from being reset when it normally would be. * This bit is set by the desched code. */ bool delay_syscallbuf_reset_for_desched; /* This is set when code wants to prevent the syscall * record buffer from being reset when it normally would be. * This is set by the code for handling seccomp SIGSYS signals. */ bool delay_syscallbuf_reset_for_seccomp_trap; // Value to return from PR_GET_SECCOMP uint8_t prctl_seccomp_status; // Mirrored kernel state // This state agrees with kernel-internal values // Futex list passed to |set_robust_list()|. We could keep a // strong type for this list head and read it if we wanted to, // but for now we only need to remember its address / size at // the time of the most recent set_robust_list() call. remote_ptr robust_futex_list; size_t robust_futex_list_len; // The memory cell the kernel will clear and notify on exit, // if our clone parent requested it. remote_ptr tid_futex; // Signal delivered by the kernel when this task terminates, or zero int termination_signal; // Our value for PR_GET/SET_TSC (one of PR_TSC_ENABLED, PR_TSC_SIGSEGV). int tsc_mode; // Our value for ARCH_GET/SET_CPUID (0 -> generate SIGSEGV, 1 -> do CPUID). // Only used if session().has_cpuid_faulting(). int cpuid_mode; // The current stack of events being processed. std::vector pending_events; // Stashed signal-delivery state, ready to be delivered at // next opportunity. std::deque stashed_signals; bool stashed_signals_blocking_more_signals; bool stashed_group_stop; bool break_at_syscallbuf_traced_syscalls; bool break_at_syscallbuf_untraced_syscalls; bool break_at_syscallbuf_final_instruction; remote_code_ptr syscallstub_exit_breakpoint; // The pmc is programmed to interrupt at a value requested by the tracee, not // by rr. bool next_pmc_interrupt_is_for_user; bool did_record_robust_futex_changes; // This task is just waiting to be reaped. bool waiting_for_reap; // This task is waiting for a ptrace exit event. It should not // be manually run. bool waiting_for_ptrace_exit; // When exiting a syscall, we should call MonkeyPatcher::try_patch_syscall again. bool retry_syscall_patching; // We've sent a SIGKILL during shutdown for this task. bool sent_shutdown_kill; // Last exec system call was an execveat bool did_execveat; // Set if the tracee requested an override of the ticks request. // Used for testing. TicksRequest tick_request_override; // Set to prevent the scheduler from scheduling this tid, even // if it is otherwise considered runnable. Used for testing. bool schedule_frozen; }; } // namespace rr #endif /* RR_RECORD_TASK_H_ */ rr-5.7.0/src/Registers.cc000066400000000000000000000616111450675474200152360ustar00rootroot00000000000000/* -*- Mode: C++; tab-width: 8; c-basic-offset: 2; indent-tabs-mode: nil; -*- */ #include "Registers.h" #include #include #include #include #include "ReplayTask.h" #include "core.h" #include "log.h" using namespace std; namespace rr { struct RegisterValue { // The name of this register. const char* name; // The offsetof the register in user_regs_struct. size_t offset; // The size of the register. 0 means we cannot read it. size_t nbytes; // Mask to be applied to register values prior to comparing them. Will // typically be ((1 << nbytes) - 1), but some registers may have special // comparison semantics. uint64_t comparison_mask; constexpr RegisterValue() : name(nullptr), offset(0), nbytes(0), comparison_mask(0) {} RegisterValue(const char* name_, size_t offset_, size_t nbytes_) : name(name_), offset(offset_), nbytes(nbytes_) { comparison_mask = mask_for_nbytes(nbytes_); } RegisterValue(const char* name_, size_t offset_, size_t nbytes_, uint64_t comparison_mask_, size_t size_override = 0) : name(name_), offset(offset_), nbytes(nbytes_), comparison_mask(comparison_mask_) { // Ensure no bits are set outside of the register's bitwidth. DEBUG_ASSERT((comparison_mask_ & ~mask_for_nbytes(nbytes_)) == 0); if (size_override > 0) { nbytes = size_override; } } // Returns a pointer to the register in |regs| represented by |offset|. // |regs| is assumed to be a pointer to the user_struct_regs for the // appropriate architecture. void* pointer_into(void* regs) { return static_cast(regs) + offset; } const void* pointer_into(const void* regs) { return static_cast(regs) + offset; } static uint64_t mask_for_nbytes(size_t nbytes) { DEBUG_ASSERT(nbytes <= sizeof(comparison_mask)); return ((nbytes == sizeof(comparison_mask)) ? uint64_t(0) : (uint64_t(1) << nbytes * 8)) - 1; } }; typedef std::pair RegisterInit; template struct RegisterTable : std::array { RegisterTable(std::initializer_list list) { for (auto& ri : list) { (*this)[ri.first] = ri.second; } } }; template struct RegisterInfo; template <> struct RegisterInfo { static bool ignore_undefined_register(GdbRegister regno) { return regno == DREG_FOSEG || regno == DREG_MXCSR; } static const size_t num_registers = DREG_NUM_LINUX_I386; typedef RegisterTable Table; static Table registers; }; template <> struct RegisterInfo { static bool ignore_undefined_register(GdbRegister regno) { return regno == DREG_64_FOSEG || regno == DREG_64_MXCSR; } static const size_t num_registers = DREG_NUM_LINUX_X86_64; typedef RegisterTable Table; static Table registers; }; template <> struct RegisterInfo { static bool ignore_undefined_register(GdbRegister) { return false; } static const size_t num_registers = DREG_NUM_LINUX_AARCH64; typedef RegisterTable Table; static Table registers; }; #define RV_ARCH(gdb_suffix, name, arch, extra_ctor_args) \ RegisterInit(DREG_##gdb_suffix, \ RegisterValue(#name, offsetof(arch::user_regs_struct, name), \ sizeof(((arch::user_regs_struct*)0)->name) \ extra_ctor_args)) #define RV_X86(gdb_suffix, name) \ RV_ARCH(gdb_suffix, name, rr::X86Arch, /* empty */) #define RV_X64(gdb_suffix, name) \ RV_ARCH(gdb_suffix, name, rr::X64Arch, /* empty */) #define COMMA , #define RV_X86_WITH_MASK(gdb_suffix, name, comparison_mask) \ RV_ARCH(gdb_suffix, name, rr::X86Arch, COMMA comparison_mask) #define RV_X64_WITH_MASK(gdb_suffix, name, comparison_mask, size) \ RV_ARCH(gdb_suffix, name, rr::X64Arch, COMMA comparison_mask COMMA size) #define RV_AARCH64(gdb_suffix, name) RV_ARCH(gdb_suffix, name, rr::ARM64Arch, /* empty */) #define RV_AARCH64_WITH_MASK(gdb_suffix, name, comparison_mask, size) \ RV_ARCH(gdb_suffix, name, rr::ARM64Arch, COMMA comparison_mask COMMA size) RegisterInfo::Table RegisterInfo::registers = { RV_X86(EAX, eax), RV_X86(ECX, ecx), RV_X86(EDX, edx), RV_X86(EBX, ebx), RV_X86(ESP, esp), RV_X86(EBP, ebp), RV_X86(ESI, esi), RV_X86(EDI, edi), RV_X86(EIP, eip), RV_X86_WITH_MASK(EFLAGS, eflags, 0), RV_X86_WITH_MASK(CS, xcs, 0), RV_X86_WITH_MASK(SS, xss, 0), RV_X86_WITH_MASK(DS, xds, 0), RV_X86_WITH_MASK(ES, xes, 0), // Mask out the RPL from the fs and gs segment selectors. The kernel // unconditionally sets RPL=3 on sigreturn, but if the segment index is 0, // the RPL doesn't matter, and the CPU resets the entire register to 0, // so whether or not we see this depends on whether the value round-tripped // to the CPU yet. RV_X86_WITH_MASK(FS, xfs, (uint16_t)~3), RV_X86_WITH_MASK(GS, xgs, (uint16_t)~3), // The comparison for this is handled specially elsewhere. RV_X86_WITH_MASK(ORIG_EAX, orig_eax, 0), }; RegisterInfo::Table RegisterInfo::registers = { RV_X64(RAX, rax), RV_X64(RCX, rcx), RV_X64(RDX, rdx), RV_X64(RBX, rbx), RV_X64_WITH_MASK(RSP, rsp, 0, 8), RV_X64(RBP, rbp), RV_X64(RSI, rsi), RV_X64(RDI, rdi), RV_X64(R8, r8), RV_X64(R9, r9), RV_X64(R10, r10), RV_X64(R11, r11), RV_X64(R12, r12), RV_X64(R13, r13), RV_X64(R14, r14), RV_X64(R15, r15), RV_X64(RIP, rip), RV_X64_WITH_MASK(64_EFLAGS, eflags, 0, 4), RV_X64_WITH_MASK(64_CS, cs, 0, 4), RV_X64_WITH_MASK(64_SS, ss, 0, 4), RV_X64_WITH_MASK(64_DS, ds, 0, 4), RV_X64_WITH_MASK(64_ES, es, 0, 4), RV_X64_WITH_MASK(64_FS, fs, 0xffffffffLL, 4), RV_X64_WITH_MASK(64_GS, gs, 0xffffffffLL, 4), // The comparison for this is handled specially // elsewhere. RV_X64_WITH_MASK(ORIG_RAX, orig_rax, 0, 8), RV_X64(FS_BASE, fs_base), RV_X64(GS_BASE, gs_base), }; RegisterInfo::Table RegisterInfo::registers = { RV_AARCH64(X0, x[0]), RV_AARCH64(X1, x[1]), RV_AARCH64(X2, x[2]), RV_AARCH64(X3, x[3]), RV_AARCH64(X4, x[4]), RV_AARCH64(X5, x[5]), RV_AARCH64(X6, x[6]), // Don't compare these - the kernel sometimes lies [1] about this value // [1] https://github.com/torvalds/linux/blob/d2f8825ab78e4c18686f3e1a756a30255bb00bf3/arch/arm64/kernel/ptrace.c#L1814-L1820 RV_AARCH64_WITH_MASK(X7, x[7], 0, 8), RV_AARCH64(X8, x[8]), RV_AARCH64(X9, x[9]), RV_AARCH64(X10, x[10]), RV_AARCH64(X11, x[11]), RV_AARCH64(X12, x[12]), RV_AARCH64(X13, x[13]), RV_AARCH64(X14, x[14]), RV_AARCH64(X15, x[15]), RV_AARCH64(X16, x[16]), RV_AARCH64(X17, x[17]), RV_AARCH64(X18, x[18]), RV_AARCH64(X19, x[19]), RV_AARCH64(X20, x[20]), RV_AARCH64(X21, x[21]), RV_AARCH64(X22, x[22]), RV_AARCH64(X23, x[23]), RV_AARCH64(X24, x[24]), RV_AARCH64(X25, x[25]), RV_AARCH64(X26, x[26]), RV_AARCH64(X27, x[27]), RV_AARCH64(X28, x[28]), RV_AARCH64(X29, x[29]), RV_AARCH64(X30, x[30]), RV_AARCH64(SP, sp), RV_AARCH64(PC, pc), // Mask out the single-step flag from the pstate. During replay, we may // single-step to an execution point, which could set the single-step bit // when it wasn't set during record. // // In Apple Air M2 SPSR bit 11 seems to be sometimes set leading to record/replay register // comparison errors. This seems to be a unused/undocumented bit in SPSR as per aarch64 // documentation anyways so ignore it. RV_AARCH64_WITH_MASK(CPSR, pstate, 0xffffffffLL & ~AARCH64_DBG_SPSR_SS & ~AARCH64_DBG_SPSR_11, 4), }; #undef RV_X64 #undef RV_X86 #undef RV_AARCH64 #undef RV_X64_WITH_MASK #undef RV_X86_WITH_MASK #undef RV_AARCH64_WITH_MASK #undef RV_ARCH // 32-bit format, 64-bit format for all of these. // format_index in RegisterPrinting depends on the ordering here. static const char* hex_format_leading_0x[] = { "0x%" PRIx32, "0x%" PRIx64 }; // static const char* decimal_format[] = { "%" PRId32, "%" PRId64 }; template struct RegisterPrinting; template <> struct RegisterPrinting<4> { typedef uint32_t type; static const size_t format_index = 0; }; template <> struct RegisterPrinting<8> { typedef uint64_t type; static const size_t format_index = 1; }; template void print_single_register(FILE* f, const char* name, const void* register_ptr, const char* formats[]) { typename RegisterPrinting::type val; memcpy(&val, register_ptr, nbytes); if (name) { fprintf(f, "%s:", name); } else { fprintf(f, " "); } fprintf(f, formats[RegisterPrinting::format_index], val); } template void Registers::print_register_file_arch(FILE* f, const char* formats[]) const { fprintf(f, "Printing register file:\n"); const void* user_regs = &u; for (auto& rv : RegisterInfo::registers) { if (rv.nbytes == 0) { continue; } switch (rv.nbytes) { case 8: print_single_register<8>(f, rv.name, rv.pointer_into(user_regs), formats); break; case 4: print_single_register<4>(f, rv.name, rv.pointer_into(user_regs), formats); break; default: DEBUG_ASSERT(0 && "bad register size"); } fprintf(f, "\n"); } fprintf(f, "\n"); } void Registers::print_register_file(FILE* f) const { RR_ARCH_FUNCTION(print_register_file_arch, arch(), f, hex_format_leading_0x); } template void Registers::print_register_file_for_trace_arch( FILE* f, TraceStyle style, const char* formats[]) const { const void* user_regs = &u; bool first = true; for (auto& rv : RegisterInfo::registers) { if (rv.nbytes == 0) { continue; } if (!first) { fputc(' ', f); } first = false; const char* name = (style == Annotated ? rv.name : nullptr); switch (rv.nbytes) { case 8: print_single_register<8>(f, name, rv.pointer_into(user_regs), formats); break; case 4: print_single_register<4>(f, name, rv.pointer_into(user_regs), formats); break; default: DEBUG_ASSERT(0 && "bad register size"); } } } void Registers::print_register_file_compact(FILE* f) const { RR_ARCH_FUNCTION(print_register_file_for_trace_arch, arch(), f, Annotated, hex_format_leading_0x); } void Registers::print_register_file_for_trace_raw(FILE* f) const { fprintf(f, " %d %d %d %d %d %d %d" " %d %d %d %d", u.x86regs.eax, u.x86regs.ebx, u.x86regs.ecx, u.x86regs.edx, u.x86regs.esi, u.x86regs.edi, u.x86regs.ebp, u.x86regs.orig_eax, u.x86regs.esp, u.x86regs.eip, u.x86regs.eflags); } std::ostream& operator<<(std::ostream& stream, const Registers::Comparison& c) { if (c.store_mismatches) { bool first = true; for (const Registers::Mismatch& mismatch : c.mismatches) { if (first) { first = false; } else { stream << ", "; } stream << mismatch.register_name << " " << mismatch.val1 << " != " << mismatch.val2; } } else { stream << c.mismatch_count << " register mismatches"; } return stream; } void Registers::Comparison::add_mismatch(const char* reg_name, uint64_t val1, uint64_t val2) { ++mismatch_count; if (store_mismatches) { char buf1[40]; sprintf(buf1, "%p", reinterpret_cast(val1)); char buf2[40]; sprintf(buf2, "%p", reinterpret_cast(val2)); mismatches.push_back({reg_name, buf1, buf2}); } } template void Registers::compare_registers_core(const Registers& reg1, const Registers& reg2, Comparison& result) { for (auto& rv : RegisterInfo::registers) { if (rv.nbytes == 0) { continue; } // Disregard registers that will trivially compare equal. if (rv.comparison_mask == 0) { continue; } // XXX correct but oddly displayed for big-endian processors. uint64_t val1 = 0, val2 = 0; memcpy(&val1, rv.pointer_into(®1.u), rv.nbytes); memcpy(&val2, rv.pointer_into(®2.u), rv.nbytes); if ((val1 ^ val2) & rv.comparison_mask) { result.add_mismatch(rv.name, val1, val2); } } } // A handy macro for compare_registers_arch specializations. #define REGCMP(user_regs, _reg, result) \ do { \ if (reg1.user_regs._reg != reg2.user_regs._reg) { \ result.add_mismatch(#_reg, reg1.user_regs._reg, reg2.user_regs._reg);\ } \ } while (0) #define X86_REGCMP(_reg, result) REGCMP(u.x86regs, _reg, result) #define X64_REGCMP(_reg, result) REGCMP(u.x64regs, _reg, result) // A wrapper around compare_registers_core so registers requiring special // processing can be handled via template specialization. template /* static */ void Registers::compare_registers_arch( const Registers& reg1, const Registers& reg2, Comparison& result) { // Default behavior. return compare_registers_core(reg1, reg2, result); } template <> /* static */ void Registers::compare_registers_arch( const Registers& reg1, const Registers& reg2, Comparison& result) { compare_registers_core(reg1, reg2, result); /* When the kernel is entered via an interrupt, orig_rax is set to -IRQ. We observe negative orig_eax values at SCHED events and signals and other timer interrupts. These values are only really meaningful to compare when they reflect original syscall numbers, in which case both will be positive. */ if (reg1.u.x86regs.orig_eax >= 0 && reg2.u.x86regs.orig_eax >= 0) { X86_REGCMP(orig_eax, result); } } template <> /* static */ void Registers::compare_registers_arch( const Registers& reg1, const Registers& reg2, Comparison& result) { compare_registers_core(reg1, reg2, result); // See comment in the x86 case if ((intptr_t)reg1.u.x64regs.orig_rax >= 0 && (intptr_t)reg2.u.x64regs.orig_rax >= 0) { X64_REGCMP(orig_rax, result); } } void Registers::compare_internal(const Registers& other, Comparison& result) const { DEBUG_ASSERT(arch() == other.arch()); RR_ARCH_FUNCTION(compare_registers_arch, arch(), *this, other, result); } template size_t Registers::read_register_arch(uint8_t* buf, GdbRegister regno, bool* defined) const { if (regno >= array_length(RegisterInfo::registers)) { *defined = false; return 0; } RegisterValue& rv = RegisterInfo::registers[regno]; if (rv.nbytes == 0) { *defined = false; } else { *defined = true; memcpy(buf, rv.pointer_into(&u), rv.nbytes); } return rv.nbytes; } size_t Registers::read_register(uint8_t* buf, GdbRegister regno, bool* defined) const { RR_ARCH_FUNCTION(read_register_arch, arch(), buf, regno, defined); } template size_t Registers::read_register_by_user_offset_arch(uint8_t* buf, uintptr_t offset, bool* defined) const { for (size_t regno = 0; regno < RegisterInfo::num_registers; ++regno) { RegisterValue& rv = RegisterInfo::registers[regno]; if (rv.offset == offset) { return read_register_arch(buf, GdbRegister(regno), defined); } } *defined = false; return 0; } size_t Registers::read_register_by_user_offset(uint8_t* buf, uintptr_t offset, bool* defined) const { RR_ARCH_FUNCTION(read_register_by_user_offset_arch, arch(), buf, offset, defined); } template bool Registers::write_register_arch(GdbRegister regno, const void* value, size_t value_size) { RegisterValue& rv = RegisterInfo::registers[regno]; if (rv.nbytes == 0) { // TODO: can we get away with not writing these? if (RegisterInfo::ignore_undefined_register(regno)) { return true; } return false; } else { DEBUG_ASSERT(value_size == rv.nbytes); memcpy(rv.pointer_into(&u), value, value_size); return true; } } bool Registers::write_register(GdbRegister regno, const void* value, size_t value_size) { RR_ARCH_FUNCTION(write_register_arch, arch(), regno, value, value_size); } template void Registers::write_register_by_user_offset_arch(uintptr_t offset, uintptr_t value) { for (size_t regno = 0; regno < RegisterInfo::num_registers; ++regno) { RegisterValue& rv = RegisterInfo::registers[regno]; if (rv.offset == offset) { DEBUG_ASSERT(rv.nbytes <= sizeof(value)); memcpy(rv.pointer_into(&u), &value, rv.nbytes); return; } } } void Registers::write_register_by_user_offset(uintptr_t offset, uintptr_t value) { RR_ARCH_FUNCTION(write_register_by_user_offset_arch, arch(), offset, value); } // In theory it doesn't matter how 32-bit register values are sign extended // to 64 bits for PTRACE_SETREGS. However: // -- When setting up a signal handler frame, the kernel does some arithmetic // on the 64-bit SP value and validates that the result points to writeable // memory. This validation fails if SP has been sign-extended to point // outside the 32-bit address space. // -- Some kernels (e.g. 4.3.3-301.fc23.x86_64) with commit // c5c46f59e4e7c1ab244b8d38f2b61d317df90bba have a bug where if you clear // the upper 32 bits of %rax while in the kernel, syscalls may fail to // restart. So sign-extension is necessary for %eax in this case. We may as // well sign-extend %eax in all cases. typedef void (*NarrowConversion)(int32_t& r32, uint64_t& r64); template void convert_x86(X86Arch::user_regs_struct& x86, X64Arch::user_regs_struct& x64) { narrow_signed(x86.eax, x64.rax); narrow(x86.ebx, x64.rbx); narrow(x86.ecx, x64.rcx); narrow(x86.edx, x64.rdx); narrow(x86.esi, x64.rsi); narrow(x86.edi, x64.rdi); narrow(x86.esp, x64.rsp); narrow(x86.ebp, x64.rbp); narrow(x86.eip, x64.rip); narrow(x86.orig_eax, x64.orig_rax); narrow(x86.eflags, x64.eflags); narrow(x86.xcs, x64.cs); narrow(x86.xds, x64.ds); narrow(x86.xes, x64.es); narrow(x86.xfs, x64.fs); narrow(x86.xgs, x64.gs); narrow(x86.xss, x64.ss); } void to_x86_narrow(int32_t& r32, uint64_t& r64) { r32 = r64; } void from_x86_narrow(int32_t& r32, uint64_t& r64) { r64 = (uint32_t)r32; } void from_x86_narrow_signed(int32_t& r32, uint64_t& r64) { r64 = (int64_t)r32; } void Registers::set_from_ptrace(const NativeArch::user_regs_struct& ptrace_regs) { if (arch() == NativeArch::arch()) { memcpy(&u, &ptrace_regs, sizeof(ptrace_regs)); return; } DEBUG_ASSERT(arch() == x86 && NativeArch::arch() == x86_64); convert_x86( u.x86regs, *const_cast( reinterpret_cast(&ptrace_regs))); } /** * Get a user_regs_struct from these Registers. If the tracee architecture * is not rr's native architecture, then it must be a 32-bit tracee with a * 64-bit rr. In that case the user_regs_struct is 64-bit and we copy * the 32-bit register values from u.x86regs into it. */ NativeArch::user_regs_struct Registers::get_ptrace() const { union { NativeArch::user_regs_struct linux_api; struct X64Arch::user_regs_struct x64arch_api; } result; if (arch() == NativeArch::arch()) { memcpy(&result, &u, sizeof(result)); return result.linux_api; } DEBUG_ASSERT(arch() == x86 && NativeArch::arch() == x86_64); memset(&result, 0, sizeof(result)); convert_x86( const_cast(this)->u.x86regs, result.x64arch_api); return result.linux_api; } iovec Registers::get_ptrace_iovec() { if (arch() == NativeArch::arch()) { iovec iov = { &u, sizeof(NativeArch::user_regs_struct) }; return iov; } DEBUG_ASSERT(arch() == x86 && NativeArch::arch() == x86_64); iovec iov = { &u.x86regs, sizeof(u.x86regs) }; return iov; } Registers::InternalData Registers::get_ptrace_for_self_arch() const { switch (arch_) { case x86: return { reinterpret_cast(&u.x86regs), sizeof(u.x86regs) }; case x86_64: return { reinterpret_cast(&u.x64regs), sizeof(u.x64regs) }; case aarch64: return { reinterpret_cast(&u.arm64regs._ptrace), sizeof(u.arm64regs._ptrace) }; default: DEBUG_ASSERT(0 && "Unknown arch"); return { nullptr, 0 }; } } Registers::InternalData Registers::get_regs_for_trace() const { switch (arch_) { case x86: return { reinterpret_cast(&u.x86regs), sizeof(u.x86regs) }; case x86_64: return { reinterpret_cast(&u.x64regs), sizeof(u.x64regs) }; case aarch64: return { reinterpret_cast(&u.arm64regs), sizeof(u.arm64regs) }; default: DEBUG_ASSERT(0 && "Unknown arch"); return { nullptr, 0 }; } } vector Registers::get_ptrace_for_arch(SupportedArch arch) const { Registers tmp_regs(arch); tmp_regs.set_from_ptrace(get_ptrace()); InternalData tmp_data = tmp_regs.get_ptrace_for_self_arch(); vector result; result.resize(tmp_data.size); memcpy(result.data(), tmp_data.data, tmp_data.size); return result; } void Registers::set_from_ptrace_for_arch(SupportedArch a, const void* data, size_t size) { if (a == NativeArch::arch()) { DEBUG_ASSERT(size == sizeof(NativeArch::user_regs_struct)); set_from_ptrace(*static_cast(data)); return; } DEBUG_ASSERT(a == x86 && NativeArch::arch() == x86_64); // We don't support a 32-bit tracee trying to set registers of a 64-bit tracee DEBUG_ASSERT(arch() == x86); DEBUG_ASSERT(size == sizeof(u.x86regs)); memcpy(&u.x86regs, data, sizeof(u.x86regs)); } void Registers::set_from_trace(SupportedArch a, const void* data, size_t size) { if (is_x86ish(a)) { return set_from_ptrace_for_arch(a, data, size); } DEBUG_ASSERT(a == aarch64); DEBUG_ASSERT(size == sizeof(u.arm64regs)); memcpy(&u.arm64regs, data, sizeof(u.arm64regs)); } bool Registers::aarch64_singlestep_flag() const { switch (arch()) { case aarch64: return pstate() & AARCH64_DBG_SPSR_SS; default: DEBUG_ASSERT(0 && "X86 only code path"); return false; } } void Registers::set_aarch64_singlestep_flag() { switch (arch()) { case aarch64: return set_pstate(pstate() | AARCH64_DBG_SPSR_SS); default: DEBUG_ASSERT(0 && "AArch64 only code path"); return; } } bool Registers::x86_singlestep_flag() const { switch (arch()) { case x86: case x86_64: return flags() & X86_TF_FLAG; default: DEBUG_ASSERT(0 && "X86 only code path"); return false; } } void Registers::clear_x86_singlestep_flag() { switch (arch()) { case x86: case x86_64: set_flags(flags() & ~X86_TF_FLAG); return; default: DEBUG_ASSERT(0 && "X86 only code path"); break; } } bool Registers::syscall_failed() const { auto result = syscall_result_signed(); return -4096 < result && result < 0; } bool Registers::syscall_may_restart() const { switch (-syscall_result_signed()) { case ERESTART_RESTARTBLOCK: case ERESTARTNOINTR: case ERESTARTNOHAND: case ERESTARTSYS: return true; default: return false; } } ostream& operator<<(ostream& stream, const Registers& r) { stream << "{ ip:" << HEX(r.ip().register_value()) << " args:(" << HEX(r.arg1()) << "," << HEX(r.arg2()) << "," << HEX(r.arg3()) << "," << HEX(r.arg4()) << "," << HEX(r.arg5()) << "," << r.arg6() << ") orig_syscall: " << r.original_syscallno() << " syscallno: " << r.syscallno(); if (r.arch() == aarch64) { stream << " orig_arg1: " << HEX(r.orig_arg1()); } stream << " }"; return stream; } void Registers::emulate_syscall_entry() { set_original_syscallno(syscallno()); set_orig_arg1(arg1()); /** * The aarch64 kernel has a quirk where if the syscallno is -1 (and only -1), * it will apply the -ENOSYS result before any ptrace entry stop. * On x86, this happens unconditionally for every syscall, but there the * result isn't shared with arg1, and we usually don't care because we have * access to original_syscallno. */ if (is_x86ish(arch()) || (arch() == aarch64 && syscallno() == -1)) { set_syscall_result(-ENOSYS); } } } // namespace rr rr-5.7.0/src/Registers.h000066400000000000000000000543011450675474200150760ustar00rootroot00000000000000/* -*- Mode: C++; tab-width: 8; c-basic-offset: 2; indent-tabs-mode: nil; -*- */ #ifndef RR_REGISTERS_H_ #define RR_REGISTERS_H_ #include #include #include #include #include #include #include "GdbRegister.h" #include "core.h" #include "kernel_abi.h" #include "remote_code_ptr.h" #include "remote_ptr.h" struct iovec; namespace rr { class ReplayTask; const uintptr_t X86_RESERVED_FLAG = 1 << 1; const uintptr_t X86_ZF_FLAG = 1 << 6; const uintptr_t X86_TF_FLAG = 1 << 8; const uintptr_t X86_IF_FLAG = 1 << 9; const uintptr_t X86_DF_FLAG = 1 << 10; const uintptr_t X86_RF_FLAG = 1 << 16; const uintptr_t X86_ID_FLAG = 1 << 21; const uintptr_t AARCH64_DBG_SPSR_SS = 1 << 21; const uintptr_t AARCH64_DBG_SPSR_11 = 1 << 11; /** * A Registers object contains values for all general-purpose registers. * These must include all registers used to pass syscall parameters and return * syscall results. * * When reading register values, be sure to cast the result to the correct * type according to the kernel docs. E.g. int values should be cast * to int explicitly (or implicitly, by assigning to an int-typed variable), * size_t should be cast to size_t, etc. If the type is signed, call the * _signed getter. This ensures that when building rr 64-bit we will use the * right number of register bits whether the tracee is 32-bit or 64-bit, and * get sign-extension right. * * We have different register sets for different architectures. To ensure a * trace can be dumped/processed by an rr build on any platform, we allow * Registers to contain registers for any architecture. So we store them * in a union of Arch::user_regs_structs for each known Arch. */ class Registers { public: enum { MAX_SIZE = 16 }; Registers(SupportedArch a = SupportedArch(-1)) : arch_(a) { memset(&u, 0, sizeof(u)); } SupportedArch arch() const { return arch_; } void set_arch(SupportedArch a) { arch_ = a; } /** * Copy a user_regs_struct into these Registers. If the tracee architecture * is not rr's native architecture, then it must be a 32-bit tracee with a * 64-bit rr. In that case the user_regs_struct is 64-bit and we extract * the 32-bit register values from it into u.x86regs. * It's invalid to call this when the Registers' arch is 64-bit and the * rr build is 32-bit, or when the Registers' arch is completely different * to the rr build (e.g. ARM vs x86). */ void set_from_ptrace(const NativeArch::user_regs_struct& ptrace_regs); /** * Get a user_regs_struct from these Registers. If the tracee architecture * is not rr's native architecture, then it must be a 32-bit tracee with a * 64-bit rr. In that case the user_regs_struct is 64-bit and we copy * the 32-bit register values from u.x86regs into it. * It's invalid to call this when the Registers' arch is 64-bit and the * rr build is 32-bit, or when the Registers' arch is completely different * to the rr build (e.g. ARM vs x86). */ NativeArch::user_regs_struct get_ptrace() const; iovec get_ptrace_iovec(); /** * Get a user_regs_struct for a particular Arch from these Registers. * It's invalid to call this when 'arch' is 64-bit and the * rr build is 32-bit, or when the Registers' arch is completely different * to the rr build (e.g. ARM vs x86). */ std::vector get_ptrace_for_arch(SupportedArch arch) const; struct InternalData { const uint8_t* data; size_t size; }; /** * Get the register content to save in the trace. */ InternalData get_regs_for_trace() const; /** * Equivalent to get_ptrace_for_arch(arch()) but doesn't copy. */ InternalData get_ptrace_for_self_arch() const; /** * Copy an arch-specific user_regs_struct into these Registers. * It's invalid to call this when 'arch' is 64-bit and the * rr build is 32-bit, or when the Registers' arch is completely different * to the rr build (e.g. ARM vs x86). */ void set_from_ptrace_for_arch(SupportedArch arch, const void* data, size_t size); /** * Copy from the arch-specific structure returned in get_regs_for_trace() * back into *this */ void set_from_trace(SupportedArch arch, const void* data, size_t size); #define ARCH_SWITCH_CASE(rettype, x86case, x64case, arm64case) \ (([=](void) -> rettype { \ switch (arch()) { \ default: \ DEBUG_ASSERT(0 && "unknown architecture"); \ RR_FALLTHROUGH; /* Fall through to avoid warnings */ \ case x86: { \ x86case; \ break; \ } \ case x86_64: { \ x64case; \ break; \ } \ case aarch64: { \ arm64case; \ break; \ } \ } \ })()) #define RR_GET_REG(x86case, x64case, arm64case) \ ARCH_SWITCH_CASE(uint64_t, \ return (uint32_t)u.x86regs.x86case, \ return u.x64regs.x64case, \ return u.arm64regs.arm64case) #define RR_GET_REG_SIGNED(x86case, x64case, arm64case) \ ARCH_SWITCH_CASE(int64_t, \ return (int32_t)u.x86regs.x86case, \ return u.x64regs.x64case, \ return u.arm64regs.arm64case) #define RR_GET_REG_X86(x86case, x64case) \ ARCH_SWITCH_CASE(uint64_t, \ return (uint32_t)u.x86regs.x86case, \ return u.x64regs.x64case, \ DEBUG_ASSERT(0 && "Hit an x86-only case, but this is not x86"); return 0) #define RR_UPDATE_CHECK(loc, value) bool changed = (uintptr_t)loc != (uintptr_t)(value); \ loc = (value); \ return changed; #define RR_SET_REG(x86case, x64case, arm64case, value) \ ARCH_SWITCH_CASE(bool, \ RR_UPDATE_CHECK(u.x86regs.x86case, value), \ RR_UPDATE_CHECK(u.x64regs.x64case, value), \ RR_UPDATE_CHECK(u.arm64regs.arm64case, value)) #define RR_SET_REG_X86(x86case, x64case, value) \ ARCH_SWITCH_CASE(bool, \ RR_UPDATE_CHECK(u.x86regs.x86case, value), \ RR_UPDATE_CHECK(u.x64regs.x64case, value), \ DEBUG_ASSERT(0 && "Hit an x86-only case, but this is not x86"); return false) remote_code_ptr ip() const { return RR_GET_REG(eip, rip, pc); } bool set_ip(remote_code_ptr addr) { return RR_SET_REG(eip, rip, pc, addr.register_value()); } remote_ptr sp() const { return RR_GET_REG(esp, rsp, sp); } bool set_sp(remote_ptr addr) { return RR_SET_REG(esp, rsp, sp, addr.as_int()); } // Access the registers holding system-call numbers, results, and // parameters. intptr_t syscallno() const { return (int)RR_GET_REG(eax, rax, x[8]); } bool set_syscallno(intptr_t syscallno) { return RR_SET_REG(eax, rax, x[8], syscallno); } /** * This pseudo-register holds the system-call number when we get ptrace * enter-system-call and exit-system-call events. Setting it changes * the system-call executed when resuming after an enter-system-call * event. */ intptr_t original_syscallno() const { return RR_GET_REG_SIGNED(orig_eax, orig_rax, orig_syscall); } bool set_original_syscallno(intptr_t syscallno) { return RR_SET_REG(orig_eax, orig_rax, orig_syscall, syscallno); } #define SYSCALL_REGISTER(name, x86case, x64case, arm64case) \ uintptr_t name() const { return RR_GET_REG(x86case, x64case, arm64case); } \ intptr_t name ## _signed() const { \ return RR_GET_REG_SIGNED(x86case, x64case, arm64case); \ } \ bool set_ ## name(uintptr_t value) { \ return RR_SET_REG(x86case, x64case, arm64case, value); \ } \ template bool set_ ## name(remote_ptr value) { \ return RR_SET_REG(x86case, x64case, arm64case, value.as_int()); \ } SYSCALL_REGISTER(syscall_result, eax, rax, x[0]); SYSCALL_REGISTER(orig_arg1, ebx, rdi, orig_x0) SYSCALL_REGISTER(arg1, ebx, rdi, x[0]) SYSCALL_REGISTER(arg2, ecx, rsi, x[1]) SYSCALL_REGISTER(arg3, edx, rdx, x[2]) SYSCALL_REGISTER(arg4, esi, r10, x[3]) SYSCALL_REGISTER(arg5, edi, r8, x[4]) SYSCALL_REGISTER(arg6, ebp, r9, x[5]) uintptr_t arg(int index) const { switch (index) { case 1: return arg1(); case 2: return arg2(); case 3: return arg3(); case 4: return arg4(); case 5: return arg5(); case 6: return arg6(); default: DEBUG_ASSERT(0 && "Argument index out of range"); return 0; } } /** * Set the register containing syscall argument |Index| to * |value|. */ template bool set_arg(std::nullptr_t) { return set_arg(Index, 0); } template bool set_arg(remote_ptr value) { return set_arg(Index, value.as_int()); } template bool set_arg(T value) { return set_arg(Index, uintptr_t(value)); } bool set_arg(int index, uintptr_t value) { switch (index) { case 1: return set_arg1(value); case 2: return set_arg2(value); case 3: return set_arg3(value); case 4: return set_arg4(value); case 5: return set_arg5(value); case 6: return set_arg6(value); default: DEBUG_ASSERT(0 && "Argument index out of range"); return false; } } bool set_orig_arg(int index, uintptr_t value) { switch (index) { case 1: return set_orig_arg1(value); case 2: return set_arg2(value); case 3: return set_arg3(value); case 4: return set_arg4(value); case 5: return set_arg5(value); case 6: return set_arg6(value); default: DEBUG_ASSERT(0 && "Argument index out of range"); return false; } } /** * Returns true if syscall_result() indicates failure. */ bool syscall_failed() const; /** * Returns true if syscall_result() indicates a syscall restart. */ bool syscall_may_restart() const; // Some X86-specific stuff follows. Use of these accessors should be guarded // by an architecture test. /** * Set the output registers of the |rdtsc| instruction. */ void set_rdtsc_output(uint64_t value) { RR_SET_REG_X86(eax, rax, value & 0xffffffff); RR_SET_REG_X86(edx, rdx, value >> 32); } void set_cpuid_output(uint32_t eax, uint32_t ebx, uint32_t ecx, uint32_t edx) { RR_SET_REG_X86(eax, rax, eax); RR_SET_REG_X86(ebx, rbx, ebx); RR_SET_REG_X86(ecx, rcx, ecx); RR_SET_REG_X86(edx, rdx, edx); } bool set_r8(uintptr_t value) { DEBUG_ASSERT(arch() == x86_64); RR_UPDATE_CHECK(u.x64regs.r8, value); } bool set_r9(uintptr_t value) { DEBUG_ASSERT(arch() == x86_64); RR_UPDATE_CHECK(u.x64regs.r9, value); } bool set_r10(uintptr_t value) { DEBUG_ASSERT(arch() == x86_64); RR_UPDATE_CHECK(u.x64regs.r10, value); } bool set_r11(uintptr_t value) { DEBUG_ASSERT(arch() == x86_64); RR_UPDATE_CHECK(u.x64regs.r11, value); } uintptr_t di() const { return RR_GET_REG_X86(edi, rdi); } bool set_di(uintptr_t value) { return RR_SET_REG_X86(edi, rdi, value); } uintptr_t si() const { return RR_GET_REG_X86(esi, rsi); } bool set_si(uintptr_t value) { return RR_SET_REG_X86(esi, rsi, value); } uintptr_t cx() const { return RR_GET_REG_X86(ecx, rcx); } bool set_cx(uintptr_t value) { return RR_SET_REG_X86(ecx, rcx, value); } uintptr_t ax() const { return RR_GET_REG_X86(eax, rax); } bool set_ax(uintptr_t value) { return RR_SET_REG_X86(eax, rax, value); } uintptr_t dx() const { return RR_GET_REG_X86(edx, rdx); } bool set_dx(uintptr_t value) { return RR_SET_REG_X86(edx, rdx, value); } uintptr_t bp() const { return RR_GET_REG_X86(ebp, rbp); } uintptr_t flags() const { return RR_GET_REG_X86(eflags, eflags); }; bool set_flags(uintptr_t value) { return RR_SET_REG_X86(eflags, eflags, value); } bool zf_flag() const { return flags() & X86_ZF_FLAG; } bool df_flag() const { return flags() & X86_DF_FLAG; } uintptr_t fs_base() const { DEBUG_ASSERT(arch() == x86_64); return u.x64regs.fs_base; } uintptr_t gs_base() const { DEBUG_ASSERT(arch() == x86_64); return u.x64regs.gs_base; } void set_fs_base(uintptr_t fs_base) { DEBUG_ASSERT(arch() == x86_64); u.x64regs.fs_base = fs_base; } void set_gs_base(uintptr_t gs_base) { DEBUG_ASSERT(arch() == x86_64); u.x64regs.gs_base = gs_base; } uint64_t cs() const { return RR_GET_REG_X86(xcs, cs); } uint64_t ss() const { return RR_GET_REG_X86(xss, ss); } uint64_t ds() const { return RR_GET_REG_X86(xds, ds); } uint64_t es() const { return RR_GET_REG_X86(xes, es); } uint64_t fs() const { return RR_GET_REG_X86(xfs, fs); } uint64_t gs() const { return RR_GET_REG_X86(xgs, gs); } // End of X86-specific stuff // Begin aarch64 specific accessors uintptr_t pstate() const { DEBUG_ASSERT(arch() == aarch64); return u.arm64regs.pstate; } void set_pstate(uintptr_t pstate) { DEBUG_ASSERT(arch() == aarch64); u.arm64regs.pstate = pstate; } void set_x7(uintptr_t x7) { DEBUG_ASSERT(arch() == aarch64); u.arm64regs.x[7] = x7; } void set_x15(uintptr_t x15) { DEBUG_ASSERT(arch() == aarch64); u.arm64regs.x[15] = x15; } void set_xlr(uintptr_t xlr) { DEBUG_ASSERT(arch() == aarch64); u.arm64regs.x[30] = xlr; } uintptr_t x1() const { DEBUG_ASSERT(arch() == aarch64); return u.arm64regs.x[1]; } uintptr_t x7() const { DEBUG_ASSERT(arch() == aarch64); return u.arm64regs.x[7]; } uintptr_t xlr() const { DEBUG_ASSERT(arch() == aarch64); return u.arm64regs.x[30]; } // End of aarch64 specific accessors /** * Modify the processor's single step flag. On x86 this is the TF flag in the * eflags register. */ bool x86_singlestep_flag() const; void clear_x86_singlestep_flag(); /** * Aarch64 has two flags that control single stepping. An EL1 one that * enables singlestep exceptions and an EL0 one in pstate (SPSR_SS). The EL1 bit * is controlled by PTRACE_SINGLESTEP (it gets turned on upon the first * PTRACE_(SYSEMU_)SINGLESTEP and turned off on any other ptrace resume). * The EL0 bit controls whether an exception is taken *before* execution * of the next instruction (an exception is taken when the bit is *clear*). * The hardware clears this bit whenever an instruction completes. Thus, to * ensure that a single step actually happens, regardless of how we got to * this step, we must both using PTRACE_SINGLESTEP and *set* the SPSR_SS bit. * Otherwise, if we got to this stop via single step, the SPSR_SS bit will * likely already be clear, and we'd take a single step exception without * ever having executed any userspace instructions whatsoever. */ bool aarch64_singlestep_flag() const; void set_aarch64_singlestep_flag(); void print_register_file(FILE* f) const; void print_register_file_compact(FILE* f) const; void print_register_file_for_trace_raw(FILE* f) const; struct Mismatch { std::string register_name; std::string val1; std::string val2; }; struct Comparison { std::vector mismatches; int mismatch_count = 0; bool store_mismatches = true; void add_mismatch(const char* reg_name, uint64_t val1, uint64_t val2); }; // This is cheap when there are no mismatches. It can be a bit expensive // (allocation) when mismatches are expected; call matches() instead in // that case, which doesn't allocate. Comparison compare_with(const Registers& other) const { Comparison result; compare_internal(other, result); return result; } bool matches(const Registers& other) const { Comparison result; result.store_mismatches = false; compare_internal(other, result); return !result.mismatch_count; } // TODO: refactor me to use the GdbRegisterValue helper from // GdbConnection.h. /** * Write the value for register |regno| into |buf|, which should * be large enough to hold any register supported by the target. * Return the size of the register in bytes and set |defined| to * indicate whether a useful value has been written to |buf|. */ size_t read_register(uint8_t* buf, GdbRegister regno, bool* defined) const; /** * Write the value for register |offset| into |buf|, which should * be large enough to hold any register supported by the target. * Return the size of the register in bytes and set |defined| to * indicate whether a useful value has been written to |buf|. * |offset| is the offset of the register within a user_regs_struct. */ size_t read_register_by_user_offset(uint8_t* buf, uintptr_t offset, bool* defined) const; /** * Update the register named |reg_name| to |value| with * |value_size| number of bytes. */ bool write_register(GdbRegister reg_name, const void* value, size_t value_size); /** * Update the register at user offset |offset| to |value|, taking the low * bytes if necessary. */ void write_register_by_user_offset(uintptr_t offset, uintptr_t value); bool operator==(const Registers &other) const { if (arch() != other.arch()) { return false; } switch (arch()) { case x86: return memcmp(&u.x86regs, &other.u.x86regs, sizeof(u.x86regs)) == 0; case x86_64: return memcmp(&u.x64regs, &other.u.x64regs, sizeof(u.x64regs)) == 0; case aarch64: return memcmp(&u.arm64regs, &other.u.arm64regs, sizeof(u.arm64regs)) == 0; default: DEBUG_ASSERT(0 && "Unknown architecture"); return false; } } bool operator!=(const Registers &other) const { return !(*this == other); } void emulate_syscall_entry(); private: template void print_register_file_arch(FILE* f, const char* formats[]) const; enum TraceStyle { Annotated, Raw, }; template void print_register_file_for_trace_arch(FILE* f, TraceStyle style, const char* formats[]) const; template static void compare_registers_core(const Registers& reg1, const Registers& reg2, Comparison& result); template static void compare_registers_arch(const Registers& reg1, const Registers& reg2, Comparison& result); void compare_internal(const Registers& other, Comparison& result) const; template size_t read_register_arch(uint8_t* buf, GdbRegister regno, bool* defined) const; template size_t read_register_by_user_offset_arch(uint8_t* buf, uintptr_t offset, bool* defined) const; template bool write_register_arch(GdbRegister regno, const void* value, size_t value_size); template void write_register_by_user_offset_arch(uintptr_t offset, uintptr_t value); template size_t total_registers_arch() const; SupportedArch arch_; union { rr::X86Arch::user_regs_struct x86regs; rr::X64Arch::user_regs_struct x64regs; struct { // This is the NT_PRSTATUS regset union { rr::ARM64Arch::user_regs_struct _ptrace; // This duplicates the field names of the user_regs_struct and makes // them available as fields of arm64regs for easy access. struct { uint64_t x[31]; uint64_t sp; uint64_t pc; uint64_t pstate; }; }; // This is not exposed through GETREGSET. We track it manually uint64_t orig_x0; // This is the NT_ARM_SYSTEM_CALL regset int orig_syscall; } arm64regs; } u; }; template ret with_converted_registers(const Registers& regs, SupportedArch arch, callback f) { if (regs.arch() != arch) { // If this is a cross architecture syscall, first convert the registers. Registers converted_regs(arch); std::vector data = regs.get_ptrace_for_arch(arch); converted_regs.set_from_ptrace_for_arch(arch, data.data(), data.size()); return f(converted_regs); } return f(regs); } std::ostream& operator<<(std::ostream& stream, const Registers& r); std::ostream& operator<<(std::ostream& stream, const Registers::Comparison& c); } // namespace rr #endif /* RR_REGISTERS_H_ */ rr-5.7.0/src/ReplayCommand.cc000066400000000000000000000513051450675474200160210ustar00rootroot00000000000000/* -*- Mode: C++; tab-width: 8; c-basic-offset: 2; indent-tabs-mode: nil; -*- */ #include "ReplayCommand.h" #include #include #include #include #include #include "Command.h" #include "Flags.h" #include "GdbServer.h" #include "ReplaySession.h" #include "ScopedFd.h" #include "WaitManager.h" #include "core.h" #include "kernel_metadata.h" #include "log.h" #include "main.h" using namespace std; namespace rr { ReplayCommand ReplayCommand::singleton( "replay", " rr replay [OPTION]... [] [-- ]\n" " -a, --autopilot replay without debugger server\n" " -f, --onfork= start a debug server when has been\n" " fork()d, AND the target event has been\n" " reached.\n" " -g, --goto= start a debug server on reaching " "\n" " in the trace. See -M in the general " "options.\n" " -e, --goto-exit start a debug server at the end of the " "recording\n" " or the end of the specified process.\n" " -o, --debugger-option=